diff --git a/.gitignore b/.gitignore index ed98daa8..f214f65f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ prof .vscode doc/html doc/latex +*.d +*.in *.x *.xlsx *.log diff --git a/Makefile b/Makefile index b29206a8..b59bb75a 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,13 @@ ifneq (,$(findstring -DHAVE_METIS,$(CXXFLAGS))) LIB+= -L$(METIS_LIB) -lmetis -Wl,-rpath,$(METIS_LIB) endif +#----------------------------------------------------------------------------- +# LGF SPECIAL CASE +# by default the LGF kernel data is installed in the include directory +LGF_PATH=$(abspath $(PREFIX)/include) +DEF += -DKERNEL_PATH=${LGF_PATH} +LGF_DATA := $(wildcard $(PREFIX)/kernel/*.ker) + #----------------------------------------------------------------------------- ## add the wanted folders - common folders SRC := $(notdir $(wildcard $(SRC_DIR)/*.cpp)) @@ -129,6 +136,7 @@ install_dynamic: lib_dynamic @cp $(TARGET_LIB_A2A).so $(PREFIX)/lib @cp $(TARGET_LIB_NB).so $(PREFIX)/lib @cp $(API) $(PREFIX)/include + @cp $(LGF_DATA) $(PREFIX)/include install_static: lib_static @mkdir -p $(PREFIX)/lib @@ -136,6 +144,7 @@ install_static: lib_static @cp $(TARGET_LIB_A2A).a $(PREFIX)/lib @cp $(TARGET_LIB_NB).a $(PREFIX)/lib @cp $(API) $(PREFIX)/include + @cp $(LGF_DATA) $(PREFIX)/include # for a standard installation, do the dynamic link install: info install_static @@ -163,6 +172,7 @@ info: logo $(info compil. flags = $(CXXFLAGS) $(INC) $(DEF) -fPIC -MMD) $(info linker flags = -shared $(LDFLAGS)) $(info using arch file = $(ARCH_FILE) ) + $(info LGF path = $(LGF_PATH) ) $(info ------------) $(info FFTW:) $(info - include: -I$(FFTW_INC) ) @@ -177,6 +187,7 @@ info: logo $(info - OBJ A2A = $(OBJ_A2A)) $(info - OBJ NB = $(OBJ_NB)) $(info - DEP = $(DEP)) + $(info - LGF_DATA = $(LGF_DATA)) $(info ------------) .NOTPARALLEL: logo diff --git a/README.md b/README.md index 9731e216..dd9f05dc 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,21 @@ For the list of all the contributors to the development of FLUPS, description an If you use FLUPS, please cite it as follows in your publications: - Caprace et al., **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2019 (under review) +### Why should you use FLUPS? +- You can solve the Poisson on rectangular and uniform distributed 2D/3D grids; +- You can use any boundary conditions, including truly unbounded boundary conditions and semi-unbounded conditions +- You can solve may times the same Poisson problem at low cost using precomputed Green's function and communication patterns; +- You can use threads and/or MPI to fasten the execution; +- You can use the build-in profiler to optimize the execution speed; +- You can use any part of the library on its own, especially the pre-computed communications and the FFTs; +- You can apply filters or do any computation you want while in the Fourier space. ### Installation FLUPS is a C++ library, with an API in C. -The compilation of FLUPS was tested with Intel compilers and GCC. +The compilation of FLUPS was tested with Intel compilers and GCC., -#### 1. Dependencies +#### Dependencies First, you need to install the dependencies, typically using the following configuration commands (for the intel compilers) - FFTW (> v3.3.8) in the `fftw_prefix` dir: ```shell @@ -31,9 +39,8 @@ CC=icc CXX=icpc FC=ifort ./configure --prefix=fftw_prefix --enable-mpi --enable- ```shell CC=mpiicc CXX=mpiicpc FC=mpif90 ./configure --prefix=hdf5_prefix --enable-build-mode=production --enable-parallel ``` -- METIS (> v5.1.0) - only if compiling with `REORDER_RANKS` -#### 2. The Library +#### Compilation You need now to create a architecture/compiler dependent file in `make_arch` to define `CXX`, `CXXFLAGS`, `FFTWDIR` and `HDF5DIR`. For example: ```makefile @@ -57,15 +64,21 @@ HDF5_LIB := ${HDF5_DIR}/lib HDF5_INC := ${HDF5_DIR}/include ``` By default, the Makefile is looking for `-lfftw3_openmp -lfftw3` and `-lhdf5`. You can overwrite this by changing the variable `FFTW_LIBNAME` and `HDF5_LIBNAME` in your arch file. - -Then you need to reference the created configuration file and the prefix you wish to : -```shell -export ARCH_FILE=make_arch/my_arch_dependent_file +For example: +```makefile +FFTW_LIBNAME := -lfftw3_omp -lfftw3 +HDF5_LIBNAME := -lhdf5_openmpi ``` +Then you need to reference the created configuration file (using `ARCH_FILE`) and the prefix in you wish to install the library (using `PREFIX`). +You can either `export` the variables or reference them later while calling the Makefile. +If no prefix is given, `make install` uses the current working directory to install the library + Finally, go to the main folder and type the compilation command. -- Check the compilation details before doing the installation +- Check the compilation details before doing the installation\ ```shell +export ARCH_FILE=make_arch/my_arch_dependent_file +export PREFIX=/my/lib/prefix make info ## or ARCH_FILE=make_arch/my_arch_dependent_file PREFIX=/my/lib/prefix make info @@ -77,12 +90,15 @@ make install ARCH_FILE=make_arch/my_arch_dependent_file PREFIX=/my/lib/prefix make install ``` -#### 3. Documentation +:warning: you must **install** the library. Indeed, we copy some data required by the solver. +If you wish to keep everything local, simply do not give a prefix and the current directory will be selected. + +#### Documentation -The documentation is built with Doxygen. -To build the documentation, please go to the `./doc` subfolder and type `doxygen`. +The documentation is built using Doxygen. +To build the documentation, go to the `./doc` subfolder and type `doxygen`. -#### 4. Compilation flags +#### Available compilation flags Here is an exhautstive list of the compilation flags that can be used to change the behavior of the code. To use `MY_FLAG`, simply add `-DMY_FLAG` to the variable `CXXFLAGS` in your `make_arch`. - `DUMP_DBG`: if specified, the solver will I/O fields using the HDF5 library. - `COMM_NONBLOCK`: if specified, the code will use the non-blocking communication pattern instead of the all to all version. @@ -90,18 +106,43 @@ Here is an exhautstive list of the compilation flags that can be used to change - `NDEBUG`: use this flag to bypass various checks inside the library - `PROF`: allow you to use the build-in profiler to have a detailed view of the timing in each part of the solve. Make sure you have created a folder ```./prof``` next to your executable. - `REORDER_RANKS`: try to reorder the MPI ranks based on the precomputed communication graph, using call to MPI_Dist_graph. We recommend the use of this feature when the number of processes > 128 and the nodes are allocated exclusive for your application, especially on fully unbounded domains. -- `HAVE_METIS`: in combination with REORDER_RANKS, use METIS instead of MPI_Dist_graph to partition the call graph based on the allocated ressources +- `HAVE_METIS`: in combination with REORDER_RANKS, use METIS instead of MPI_Dist_graph to partition the call graph based on the allocated ressources. You must hence install metis for this functionality. :warning: You may also change the memory alignement and the FFTW planner flag in the `flups.h` file. ### How to use a solver? #### Detailed reference +The scientific background of the library is explained in "Caprace et al., **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2019 (under review)". -The scientific background of the library is explained in "Caprace et al., **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2019 (under review)" +A detailed description of the API is provided in the documentation (@ref flups.h), as well as many implementation details. + +#### Memory layout +In this project we choose to handle the memory in a **Fortran** way of doing even if we are in C/C++. +So, the memory is aligned as a single row of size `n[0] * n[1] * n[2]`. +The fastest rotating index is set to be `n[0]` then `n[1]` and finally `n[2]`. + +We have chosen this way of doing to reuse the 3D code in a 2D framework. +Indeed having the last dimension in the slower rotating index does not penalize the loops writting. -For the detailed specifications of the API, have a look at @ref flups.h . +As an example, we here is how we access the memory + +```cpp +double* data =(double*) flups_malloc(n[0] * n[1] * n[2] * sizeof(double)); + +for(int iz=0; iz we put a value of 1 -Topology *topo = new Topology(axis, nglob, nproc, isComplex,NULL,1, MPI_COMM_WORLD); +FLUPS_Topology *topo = flups_topo_new(axis, nglob, nproc, isComplex, NULL, 1, MPI_COMM_WORLD); // define additional quantities double L = {1.0, 2.0, 1.0}; double h = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]}; ``` -Then, you can define a new solver and it's boundary condition +Then, you can define a new solver and its boundary condition ```cpp // define the solver -const BoundaryType mybc[3][2] = {{UNB, UNB}, {EVEN, ODD}, {UNB, EVEN}}; // BC in X,Y,Z -Solver * mysolver = new Solver(topo, mybc, h, L); +const FLUPS_BoundaryType mybc[3][2] = {{UNB, UNB}, {EVEN, ODD}, {UNB, EVEN}}; // BC in X,Y,Z +FLUPS_Solver *mysolver = flups_init(topo, mybc, h, L,prof); // setup the solver -mysolver->set_GreenType(HEJ2); -mysolver->setup(false); +flups_set_greenType(mysolver,typeGreen); +flups_setup(mysolver,false); ``` To solve a field `rhs` that has been defined on the topology, use ```cpp -mysolver->solve(rhs, rhs, SRHS); +flups_solve(mysolver,rhs, rhs, SRHS); ``` -Then, destroy the solver +Then, destroy the solver and the created topology ``` -delete (mysolver); +flups_cleanup(mysolver); +flups_topo_free(topo); ``` #### Advanced usage - Examples of usage of FLUPS in C programs are provided in the `./sample` subfolder. #### Memory footprint - For the recommanded configuration of 128^3 unknowns per processor in full unbounded, we have measured the memory usage of FLUPS on a 2000 cores run: - the all to all version uses ~530Mb (O.253kB/unknown) - the non-blocking version uses ~560Mb (O.267kB/unknown) @@ -157,8 +197,7 @@ For 1.5Go, max 168 21*8 7*24--> -**CAUTION** -FLUPS was nerver tested above 1024^3 unknowns per core. +:warning: FLUPS was nerver tested above 1024^3 unknowns per core. ### Implementation details and developers guide #### C++ use @@ -166,8 +205,9 @@ We use the C++ language in a very limited way, on purpose. The features used are the object oriented layout and some usefull features of the standard library. #### Conventions - -- Put a ```BEGIN_FUNC;``` at the begining of each function +- Put a ```BEGIN_FUNC;``` at the begining and a ```END_FUNC;``` at the end of each function +- Use ```FLUPS_INFO``` for verbosity (several levels available), ```FLUPS_CHECK``` for assertions and ```FLUPS_ERROR``` for error management +- Use ```flups_malloc``` and ```flups_free``` function to allocate/free memory - how to name an action? ```action_mySuperFunction``` where ```action``` = ```set```, ```get```, ```execute```, ```switch```, ```cmpt``` - how to name a function? ```mySuperFunction``` - how to name an class? ```MyClass``` @@ -183,31 +223,6 @@ Set then the value: Inspired from https://clang.llvm.org/docs/ClangFormatStyleOptions.html (*Configurable Format Style Options* section) -#### Memory layout -In this project we choose to handle the memory in a **Fortran** way of doing iven if we are in C/C++. -So, the memory is aligned as a single row of size `n[0] * n[1] * n[2]`. -The fastest rotating index is set to be `n[0]` then `n[1]` and finally `n[2]`. - -We have chosen this way of doing to reuse the 3D code in a 2D framework. -Indeed having the last dimension in the slower rotating index does not penalize the loops writting. - -As an example, we here is how we access the memory - -```cpp -double* data =(double*) flups_malloc(n[0] * n[1] * n[2] * sizeof(double)); - -for(int iz=0; iznglob(0),topoIn->nglob(1),topoIn->nglob(2)); - printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2)); - printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2)); - printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2)); + if(rank == 0) { + printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2)); + printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2)); + printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2)); + printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2)); #ifndef SKIP_P3D - printf("[P3DFFT++] topo IN glob : %d %d %d \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]); - printf("[P3DFFT++] topo IN loc : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]); - printf("[P3DFFT++] topo OUT glob : %d %d %d \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]); - printf("[P3DFFT++] topo OUT loc : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]); + printf("[P3DFFT++] topo IN glob : %d %d %d \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]); + printf("[P3DFFT++] topo IN loc : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]); + printf("[P3DFFT++] topo OUT glob : %d %d %d \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]); + printf("[P3DFFT++] topo OUT loc : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]); #endif - - printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT); + printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT); #ifndef SKIP_P3D - printf(" P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT); + printf(" P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT); #endif + } double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN); diff --git a/samples/compareP3DFFT++/run/zenobe_kernel.sh b/samples/compareP3DFFT++/run/zenobe_kernel.sh index 46b313d8..e6be3100 100755 --- a/samples/compareP3DFFT++/run/zenobe_kernel.sh +++ b/samples/compareP3DFFT++/run/zenobe_kernel.sh @@ -36,12 +36,6 @@ MY_SIZE_Z=$((${MY_SIZE}*${LZ})) echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE} -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE} -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID} -echo "============================== FLUPS ONLY, WITHOUT METIS ==========================================" >> stdout_${PBS_JOBID} - -echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE} -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID}" -mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE}_noP3D -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID} - - ################## echo "End time : " $(date) echo "----------------- Computation over, bye bye! ----" diff --git a/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh b/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh index 20411d77..5dc9913a 100755 --- a/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh +++ b/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh @@ -9,7 +9,7 @@ VER=a2a EXE=flups_vs_p3dfft++_${VER} ######### WEAK -> increase the number of CPU and the size -SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_$VER +SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_${VER}_V4 # clean the validation dir # rm -rf ${SCRATCH} @@ -18,7 +18,6 @@ mkdir -p $SCRATCH/data mkdir -p $SCRATCH/prof # copy the needed info cp $HOME_FLUPS/$EXE $SCRATCH -cp $HOME_FLUPS/${EXE}_noP3D $SCRATCH cp $HOME_FLUPS/run/zenobe_kernel.sh $SCRATCH cd $SCRATCH @@ -38,18 +37,22 @@ cd $SCRATCH # qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=64,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh ##################### size = 128^3/proc ################################# -# cpu = 64 +## CANNOT DO cpu=96,192,384... due to P3D ! + +# cpu = 128 (-> actually allocating 144) # same on large -qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=32:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=6:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 256 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=64:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 256 (->264) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=11:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 512 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=128:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 512 (->528) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=22:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 1024 -qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 1024 (->1032) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=43:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh +# cpu = 2048 (->2064) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=64,LX=8,LY=16,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=86:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh #end of file diff --git a/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh b/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh index a56f1f02..c78305f4 100755 --- a/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh +++ b/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh @@ -9,7 +9,7 @@ VER=nb EXE=flups_vs_p3dfft++_${VER} ######### WEAK -> increase the number of CPU and the size -SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_$VER +SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_${VER}_V3 # clean the validation dir # rm -rf ${SCRATCH} @@ -38,18 +38,22 @@ cd $SCRATCH # qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=64,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh ##################### size = 128^3/proc ################################# -# cpu = 64 +## CANNOT DO cpu=96,192,384... due to P3D ! + +# cpu = 128 (-> actually allocating 144) # same on large -qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=32:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=6:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 256 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=64:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 256 (->264) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=11:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 512 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=128:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 512 (->528) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=22:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 1024 -qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 1024 (->1032) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=43:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh +# cpu = 2048 (->2064) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=64,LX=8,LY=16,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=86:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh #end of file diff --git a/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt index 228c963a..e19044b4 100644 --- a/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt @@ -1 +1 @@ -8 2.879671993812e-16 1.110223024625e-15 +8 3.066390565529e-16 1.110223024625e-15 diff --git a/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt index de54da40..492c1f29 100644 --- a/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt @@ -1 +1 @@ -8 3.412318533331e-16 1.110223024625e-15 +8 3.852295714867e-16 1.332267629550e-15 diff --git a/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt index 391b172f..13b8e85d 100644 --- a/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt @@ -1 +1 @@ -8 5.541747624352e-16 2.109423746788e-15 +8 5.419249018646e-16 2.220446049250e-15 diff --git a/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt index 1f0eae9c..c27ccb3b 100644 --- a/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt @@ -1 +1 @@ -8 7.581356855986e-17 1.665334536938e-16 +8 9.720644932128e-17 2.220446049250e-16 diff --git a/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt index 1f0eae9c..c27ccb3b 100644 --- a/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt @@ -1 +1 @@ -8 7.581356855986e-17 1.665334536938e-16 +8 9.720644932128e-17 2.220446049250e-16 diff --git a/samples/validation/data_ref/validation_3d_000099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000099_typeGreen=0.txt new file mode 100644 index 00000000..ade1e653 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_000099_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.360116630811e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt index e2160119..3b32bd6f 100644 --- a/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt @@ -1 +1 @@ -8 2.741897992369e-16 9.992007221626e-16 +8 2.580705344185e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt index ff702b7f..b2e87218 100644 --- a/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt @@ -1 +1 @@ -8 2.880255821380e-16 8.881784197001e-16 +8 2.904773986610e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt index 45bedcc0..e2e3ad7e 100644 --- a/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt @@ -1 +1 @@ -8 5.022257820411e-16 2.109423746788e-15 +8 5.227751426413e-16 2.109423746788e-15 diff --git a/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt index 6c92f22d..4ae4d18b 100644 --- a/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt @@ -1 +1 @@ -8 1.383034061608e-16 3.330669073875e-16 +8 1.400043829337e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt index 6c92f22d..4ae4d18b 100644 --- a/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt @@ -1 +1 @@ -8 1.383034061608e-16 3.330669073875e-16 +8 1.400043829337e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_000199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000199_typeGreen=0.txt new file mode 100644 index 00000000..8f9aa6b3 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_000199_typeGreen=0.txt @@ -0,0 +1 @@ +8 1.230079805695e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_000499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000499_typeGreen=0.txt new file mode 100644 index 00000000..0417ff66 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_000499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.978324264490e-02 1.215492793748e-01 diff --git a/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt index c02a8d47..2da51f9c 100644 --- a/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt @@ -1 +1 @@ -8 2.837726816918e-16 9.992007221626e-16 +8 2.319347045319e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt index 58f2d574..4a0b5d87 100644 --- a/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt @@ -1 +1 @@ -8 2.824889171655e-16 9.992007221626e-16 +8 2.915468233191e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt index 323119eb..64da6deb 100644 --- a/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt @@ -1 +1 @@ -8 3.493854481640e-16 1.443289932013e-15 +8 3.326123904568e-16 1.443289932013e-15 diff --git a/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt index 6fd2d107..a50ebf6b 100644 --- a/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt @@ -1 +1 @@ -8 1.184869787392e-16 3.053113317719e-16 +8 1.171869586735e-16 3.053113317719e-16 diff --git a/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt index 6fd2d107..a50ebf6b 100644 --- a/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt @@ -1 +1 @@ -8 1.184869787392e-16 3.053113317719e-16 +8 1.171869586735e-16 3.053113317719e-16 diff --git a/samples/validation/data_ref/validation_3d_001099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001099_typeGreen=0.txt new file mode 100644 index 00000000..9d4c7391 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_001099_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.559180168130e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt index 55386e5e..5d1f87c1 100644 --- a/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt @@ -1 +1 @@ -8 2.117464329924e-16 7.216449660064e-16 +8 2.506862993999e-16 9.436895709314e-16 diff --git a/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt index e8c9e7fd..431cfec5 100644 --- a/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt @@ -1 +1 @@ -8 2.346660992591e-16 8.881784197001e-16 +8 2.006729208194e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt index 68ef1c69..96423adf 100644 --- a/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt @@ -1 +1 @@ -8 2.606439386068e-16 9.992007221626e-16 +8 2.623438039626e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt index 753eac20..9b9d370d 100644 --- a/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt @@ -1 +1 @@ -8 1.291175088092e-16 3.330669073875e-16 +8 1.284632718366e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt index 753eac20..9b9d370d 100644 --- a/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt @@ -1 +1 @@ -8 1.291175088092e-16 3.330669073875e-16 +8 1.284632718366e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_001199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001199_typeGreen=0.txt new file mode 100644 index 00000000..6e9f1441 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_001199_typeGreen=0.txt @@ -0,0 +1 @@ +8 1.666418386409e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_001499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001499_typeGreen=0.txt new file mode 100644 index 00000000..cc71831b --- /dev/null +++ b/samples/validation/data_ref/validation_3d_001499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.860935708545e-02 1.337508016338e-01 diff --git a/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt index 3b80641e..5e6c393d 100644 --- a/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt @@ -1 +1 @@ -8 2.324124869550e-16 8.881784197001e-16 +8 4.249776009696e-16 1.276756478319e-15 diff --git a/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt index 3abb847f..be4b0054 100644 --- a/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt @@ -1 +1 @@ -8 2.647136944320e-16 7.771561172376e-16 +8 2.731931856942e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt index 64e046e4..8cf094f1 100644 --- a/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt @@ -1 +1 @@ -8 2.471325589817e-16 8.881784197001e-16 +8 2.635294617138e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt index ae7ed52a..59a46df3 100644 --- a/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt @@ -1 +1 @@ -8 9.880312447293e-17 2.220446049250e-16 +8 9.745379437590e-17 2.220446049250e-16 diff --git a/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt index ae7ed52a..59a46df3 100644 --- a/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt @@ -1 +1 @@ -8 9.880312447293e-17 2.220446049250e-16 +8 9.745379437590e-17 2.220446049250e-16 diff --git a/samples/validation/data_ref/validation_3d_003399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003399_typeGreen=0.txt new file mode 100644 index 00000000..962bfe94 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_003399_typeGreen=0.txt @@ -0,0 +1 @@ +8 1.015351299880e-15 2.109423746788e-15 diff --git a/samples/validation/data_ref/validation_3d_004099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_004099_typeGreen=0.txt new file mode 100644 index 00000000..0417ff66 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_004099_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.978324264490e-02 1.215492793748e-01 diff --git a/samples/validation/data_ref/validation_3d_004199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_004199_typeGreen=0.txt new file mode 100644 index 00000000..cc71831b --- /dev/null +++ b/samples/validation/data_ref/validation_3d_004199_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.860935708545e-02 1.337508016338e-01 diff --git a/samples/validation/data_ref/validation_3d_004499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_004499_typeGreen=0.txt new file mode 100644 index 00000000..23e8aa48 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_004499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.223380168681e-02 9.249743986706e-02 diff --git a/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt index 28873f1d..4fa1f9b9 100644 --- a/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt @@ -1 +1 @@ -8 2.460575272916e-16 8.881784197001e-16 +8 2.746147771692e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt index dbdf8e6d..5cf70871 100644 --- a/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt @@ -1 +1 @@ -8 3.786553299301e-16 1.221245327088e-15 +8 2.284799278593e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt index e97b4ee5..41b89059 100644 --- a/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt @@ -1 +1 @@ -8 3.651152377896e-16 1.554312234475e-15 +8 4.130595809974e-16 1.554312234475e-15 diff --git a/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt index e07c5669..6b52819e 100644 --- a/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt @@ -1 +1 @@ -8 1.251858722303e-16 3.330669073875e-16 +8 1.485975713201e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt index e07c5669..6b52819e 100644 --- a/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt @@ -1 +1 @@ -8 1.251858722303e-16 3.330669073875e-16 +8 1.485975713201e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_010099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010099_typeGreen=0.txt new file mode 100644 index 00000000..eea94cb4 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_010099_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.729847094858e-16 7.216449660064e-16 diff --git a/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt index 957d351c..afaec869 100644 --- a/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt @@ -1 +1 @@ -8 1.714256197463e-16 6.106226635438e-16 +8 2.065930418964e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt index 8eac4509..dab5cdbc 100644 --- a/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt @@ -1 +1 @@ -8 3.375444239021e-16 1.221245327088e-15 +8 2.810036259783e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt index 5ef9a0fc..09ddfd1e 100644 --- a/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt @@ -1 +1 @@ -8 3.781711920843e-16 1.665334536938e-15 +8 3.807108354803e-16 1.665334536938e-15 diff --git a/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt index c5baf099..19b11e9a 100644 --- a/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt @@ -1 +1 @@ -8 1.380618125209e-16 4.440892098501e-16 +8 1.270840351296e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt index c5baf099..19b11e9a 100644 --- a/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt @@ -1 +1 @@ -8 1.380618125209e-16 4.440892098501e-16 +8 1.270840351296e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_010199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010199_typeGreen=0.txt new file mode 100644 index 00000000..6b3f7dc4 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_010199_typeGreen=0.txt @@ -0,0 +1 @@ +8 1.740249526309e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_010499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010499_typeGreen=0.txt new file mode 100644 index 00000000..f3839c98 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_010499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.239982674130e-02 1.460032340803e-01 diff --git a/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt index 1bc27be1..d325efaa 100644 --- a/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt @@ -1 +1 @@ -8 2.463321950145e-16 8.881784197001e-16 +8 1.594821179868e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt index 1168b450..23dacd93 100644 --- a/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt @@ -1 +1 @@ -8 2.070035810573e-16 9.992007221626e-16 +8 2.415845289031e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt index a729477a..895ac10c 100644 --- a/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt @@ -1 +1 @@ -8 3.464774054186e-16 1.554312234475e-15 +8 3.633428620793e-16 1.443289932013e-15 diff --git a/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt index 9b25beee..6d41e571 100644 --- a/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt @@ -1 +1 @@ -8 1.883444858262e-16 4.440892098501e-16 +8 1.517128559027e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt index 9b25beee..6d41e571 100644 --- a/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt @@ -1 +1 @@ -8 1.883444858262e-16 4.440892098501e-16 +8 1.517128559027e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_011099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011099_typeGreen=0.txt new file mode 100644 index 00000000..d9d5094d --- /dev/null +++ b/samples/validation/data_ref/validation_3d_011099_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.494344651719e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt index a460571a..b6fbf4d6 100644 --- a/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt @@ -1 +1 @@ -8 1.738708762151e-16 6.106226635438e-16 +8 1.936485848748e-16 7.216449660064e-16 diff --git a/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt index 9480fbfb..87b3ea7b 100644 --- a/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt @@ -1 +1 @@ -8 2.241018778753e-16 9.992007221626e-16 +8 1.648270320622e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt index 552c0225..946c4094 100644 --- a/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt @@ -1 +1 @@ -8 2.285105617638e-16 9.992007221626e-16 +8 2.224172249971e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt index ab57c1a4..86bda4bb 100644 --- a/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt @@ -1 +1 @@ -8 1.981233446090e-16 5.551115123126e-16 +8 1.984875422410e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt index ab57c1a4..86bda4bb 100644 --- a/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt @@ -1 +1 @@ -8 1.981233446090e-16 5.551115123126e-16 +8 1.984875422410e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_011199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011199_typeGreen=0.txt new file mode 100644 index 00000000..f084ef42 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_011199_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.670798201198e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_011499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011499_typeGreen=0.txt new file mode 100644 index 00000000..c828ab08 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_011499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.200258829336e-02 1.509764206207e-01 diff --git a/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt index bfc94ffb..bd1538be 100644 --- a/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt @@ -1 +1 @@ -8 2.016219626873e-16 7.216449660064e-16 +8 4.180582353684e-16 1.221245327088e-15 diff --git a/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt index 19ac8a2f..343db07c 100644 --- a/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt @@ -1 +1 @@ -8 2.826868462581e-16 8.881784197001e-16 +8 2.362957789240e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt index 78e3ffc1..da942b45 100644 --- a/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt @@ -1 +1 @@ -8 2.158074840743e-16 8.881784197001e-16 +8 2.800810052076e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt index 70b6177a..4ca2b300 100644 --- a/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt @@ -1 +1 @@ -8 1.322036846889e-16 3.330669073875e-16 +8 1.270499915246e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt index 70b6177a..4ca2b300 100644 --- a/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt @@ -1 +1 @@ -8 1.322036846889e-16 3.330669073875e-16 +8 1.270499915246e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_013399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013399_typeGreen=0.txt new file mode 100644 index 00000000..880342c3 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_013399_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.301219577348e-16 1.332267629550e-15 diff --git a/samples/validation/data_ref/validation_3d_014099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_014099_typeGreen=0.txt new file mode 100644 index 00000000..f3839c98 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_014099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.239982674130e-02 1.460032340803e-01 diff --git a/samples/validation/data_ref/validation_3d_014199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_014199_typeGreen=0.txt new file mode 100644 index 00000000..c828ab08 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_014199_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.200258829336e-02 1.509764206207e-01 diff --git a/samples/validation/data_ref/validation_3d_014499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_014499_typeGreen=0.txt new file mode 100644 index 00000000..afab620e --- /dev/null +++ b/samples/validation/data_ref/validation_3d_014499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.506048052626e-02 1.040899031895e-01 diff --git a/samples/validation/data_ref/validation_3d_040099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040099_typeGreen=0.txt new file mode 100644 index 00000000..3f75abfd --- /dev/null +++ b/samples/validation/data_ref/validation_3d_040099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.741303848805e-02 1.583004445949e-01 diff --git a/samples/validation/data_ref/validation_3d_040199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040199_typeGreen=0.txt new file mode 100644 index 00000000..5256d4db --- /dev/null +++ b/samples/validation/data_ref/validation_3d_040199_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.453121113062e-02 1.997372939511e-01 diff --git a/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt index ecbb818b..106b1622 100644 --- a/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt @@ -1 +1 @@ -8 9.233949105056e-03 5.785860761320e-02 +8 9.233949105056e-03 5.785860761319e-02 diff --git a/samples/validation/data_ref/validation_3d_040499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040499_typeGreen=0.txt new file mode 100644 index 00000000..506ba9d2 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_040499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.051484886687e-01 2.005866243539e+00 diff --git a/samples/validation/data_ref/validation_3d_041099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_041099_typeGreen=0.txt new file mode 100644 index 00000000..5256d4db --- /dev/null +++ b/samples/validation/data_ref/validation_3d_041099_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.453121113062e-02 1.997372939511e-01 diff --git a/samples/validation/data_ref/validation_3d_041199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_041199_typeGreen=0.txt new file mode 100644 index 00000000..3b67f047 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_041199_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.796571614602e-02 2.268632740773e-01 diff --git a/samples/validation/data_ref/validation_3d_041499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_041499_typeGreen=0.txt new file mode 100644 index 00000000..7c6fcb39 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_041499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_043399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_043399_typeGreen=0.txt new file mode 100644 index 00000000..3b67f047 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_043399_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.796571614602e-02 2.268632740773e-01 diff --git a/samples/validation/data_ref/validation_3d_044099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_044099_typeGreen=0.txt new file mode 100644 index 00000000..506ba9d2 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_044099_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.051484886687e-01 2.005866243539e+00 diff --git a/samples/validation/data_ref/validation_3d_044199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_044199_typeGreen=0.txt new file mode 100644 index 00000000..7c6fcb39 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_044199_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_044199_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_044199_typeGreen=1.txt new file mode 100644 index 00000000..1d71a7b0 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_044199_typeGreen=1.txt @@ -0,0 +1 @@ +17 5.516870695410e-03 3.245510815373e-02 diff --git a/samples/validation/data_ref/validation_3d_044499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_044499_typeGreen=0.txt new file mode 100644 index 00000000..b52c1f22 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_044499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.102639526126e-01 1.513172916610e+00 diff --git a/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt index 087e6489..c9e547e8 100644 --- a/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt @@ -1 +1 @@ -8 2.248521341384e-16 8.881784197001e-16 +8 2.295284422793e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt index c9422d2c..fbdec53e 100644 --- a/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt @@ -1 +1 @@ -8 3.085055053406e-16 1.110223024625e-15 +8 3.487725898558e-16 1.221245327088e-15 diff --git a/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt index e085c996..84c902bd 100644 --- a/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt @@ -1 +1 @@ -8 4.112667330478e-16 1.776356839400e-15 +8 4.037770508257e-16 1.665334536938e-15 diff --git a/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt index 1faf7787..575d4d26 100644 --- a/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt @@ -1 +1 @@ -8 1.185940757741e-16 4.440892098501e-16 +8 1.042242595377e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt index 1faf7787..575d4d26 100644 --- a/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt @@ -1 +1 @@ -8 1.185940757741e-16 4.440892098501e-16 +8 1.042242595377e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_100099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100099_typeGreen=0.txt new file mode 100644 index 00000000..0f01fcec --- /dev/null +++ b/samples/validation/data_ref/validation_3d_100099_typeGreen=0.txt @@ -0,0 +1 @@ +8 1.665402298195e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt index 76679cd7..1a97b27f 100644 --- a/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt @@ -1 +1 @@ -8 2.370175092121e-16 9.992007221626e-16 +8 2.487098902905e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt index 625143fb..d0c9990c 100644 --- a/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt @@ -1 +1 @@ -8 2.857673038043e-16 1.110223024625e-15 +8 3.446501259485e-16 1.221245327088e-15 diff --git a/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt index af1fd558..041eaee7 100644 --- a/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt @@ -1 +1 @@ -8 3.027825802011e-16 1.443289932013e-15 +8 2.981402106445e-16 1.221245327088e-15 diff --git a/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt index 4e2e4283..f5d3807b 100644 --- a/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt @@ -1 +1 @@ -8 1.143744394216e-16 3.330669073875e-16 +8 1.575522964835e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt index 4e2e4283..f5d3807b 100644 --- a/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt @@ -1 +1 @@ -8 1.143744394216e-16 3.330669073875e-16 +8 1.575522964835e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_100199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100199_typeGreen=0.txt new file mode 100644 index 00000000..2d9578e7 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_100199_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.005183188409e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt index 452bd3f7..a9039708 100644 --- a/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt @@ -1 +1 @@ -8 6.901116042656e-03 5.203433555700e-02 +8 6.901116042656e-03 5.203433555701e-02 diff --git a/samples/validation/data_ref/validation_3d_100499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100499_typeGreen=0.txt new file mode 100644 index 00000000..f3839c98 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_100499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.239982674130e-02 1.460032340803e-01 diff --git a/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt index b83ad742..3f4b8e9c 100644 --- a/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt @@ -1 +1 @@ -8 2.115439972084e-16 8.881784197001e-16 +8 2.407414558747e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt index 1f417e05..09f54508 100644 --- a/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt @@ -1 +1 @@ -8 2.390377305867e-16 1.110223024625e-15 +8 2.517897300738e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt index a9963574..067e1f83 100644 --- a/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt @@ -1 +1 @@ -8 3.272278205610e-16 1.443289932013e-15 +8 3.055679946251e-16 1.221245327088e-15 diff --git a/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt index 0fa97df7..83518696 100644 --- a/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt @@ -1 +1 @@ -8 1.247458350191e-16 4.440892098501e-16 +8 1.469559156822e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt index 0fa97df7..83518696 100644 --- a/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt @@ -1 +1 @@ -8 1.247458350191e-16 4.440892098501e-16 +8 1.469559156822e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_101099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101099_typeGreen=0.txt new file mode 100644 index 00000000..f01700dc --- /dev/null +++ b/samples/validation/data_ref/validation_3d_101099_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.523039791531e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt index 25a84091..13a78458 100644 --- a/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt @@ -1 +1 @@ -8 1.724051917864e-16 6.106226635438e-16 +8 1.543809479869e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt index 718acb43..75a95090 100644 --- a/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt @@ -1 +1 @@ -8 2.086964319043e-16 6.661338147751e-16 +8 1.879473652148e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt index 3c0a0538..148ac719 100644 --- a/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt @@ -1 +1 @@ -8 2.837394475002e-16 1.110223024625e-15 +8 2.411452824717e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt index c0d94a16..de85982b 100644 --- a/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt @@ -1 +1 @@ -8 1.276289714672e-16 3.330669073875e-16 +8 1.650541757014e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt index c0d94a16..de85982b 100644 --- a/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt @@ -1 +1 @@ -8 1.276289714672e-16 3.330669073875e-16 +8 1.650541757014e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_101199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101199_typeGreen=0.txt new file mode 100644 index 00000000..d0d7b55e --- /dev/null +++ b/samples/validation/data_ref/validation_3d_101199_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.114442198303e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_101499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101499_typeGreen=0.txt new file mode 100644 index 00000000..c828ab08 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_101499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.200258829336e-02 1.509764206207e-01 diff --git a/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt index 81b07c5f..dfaa3afe 100644 --- a/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt @@ -1 +1 @@ -8 1.824851077850e-16 6.106226635438e-16 +8 3.082139455477e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt index 9851958c..0e00d15c 100644 --- a/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt @@ -1 +1 @@ -8 4.190767293333e-16 1.332267629550e-15 +8 2.483565619348e-16 1.110223024625e-15 diff --git a/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt index f89a838f..b315b3da 100644 --- a/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt @@ -1 +1 @@ -8 3.320754177571e-16 1.221245327088e-15 +8 3.008633483925e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt index 3e6c23ea..fd6aa38c 100644 --- a/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt @@ -1 +1 @@ -8 1.102743943057e-16 2.220446049250e-16 +8 1.238475936291e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt index 3e6c23ea..fd6aa38c 100644 --- a/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt @@ -1 +1 @@ -8 1.102743943057e-16 2.220446049250e-16 +8 1.238475936291e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_103399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103399_typeGreen=0.txt new file mode 100644 index 00000000..ae3755c2 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_103399_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.251687730649e-16 1.887379141863e-15 diff --git a/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt index 452bd3f7..a9039708 100644 --- a/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt @@ -1 +1 @@ -8 6.901116042656e-03 5.203433555700e-02 +8 6.901116042656e-03 5.203433555701e-02 diff --git a/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt index 452bd3f7..a9039708 100644 --- a/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt @@ -1 +1 @@ -8 6.901116042656e-03 5.203433555700e-02 +8 6.901116042656e-03 5.203433555701e-02 diff --git a/samples/validation/data_ref/validation_3d_104099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104099_typeGreen=0.txt new file mode 100644 index 00000000..f3839c98 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_104099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.239982674130e-02 1.460032340803e-01 diff --git a/samples/validation/data_ref/validation_3d_104199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104199_typeGreen=0.txt new file mode 100644 index 00000000..fe4993d8 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_104199_typeGreen=0.txt @@ -0,0 +1,2 @@ +8 4.200258829336e-02 1.509764206207e-01 +17 8.486914331463e-03 2.887335804383e-02 diff --git a/samples/validation/data_ref/validation_3d_104499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104499_typeGreen=0.txt new file mode 100644 index 00000000..afab620e --- /dev/null +++ b/samples/validation/data_ref/validation_3d_104499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.506048052626e-02 1.040899031895e-01 diff --git a/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt index 939dd6b9..71b0bf34 100644 --- a/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt @@ -1 +1 @@ -8 2.408596549030e-16 8.881784197001e-16 +8 2.290475563610e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt index 6c8f6319..c00cf188 100644 --- a/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt @@ -1 +1 @@ -8 2.507388587558e-16 9.992007221626e-16 +8 2.352712468881e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt index 9100b9c4..0ce490e0 100644 --- a/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt @@ -1 +1 @@ -8 3.787315981161e-16 1.554312234475e-15 +8 3.338366326547e-16 1.221245327088e-15 diff --git a/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt index 007b8a2d..a42248f9 100644 --- a/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt @@ -1 +1 @@ -8 7.957068990635e-17 2.220446049250e-16 +8 9.231597913641e-17 2.775557561563e-16 diff --git a/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt index 007b8a2d..a42248f9 100644 --- a/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt @@ -1 +1 @@ -8 7.957068990635e-17 2.220446049250e-16 +8 9.231597913641e-17 2.775557561563e-16 diff --git a/samples/validation/data_ref/validation_3d_110099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110099_typeGreen=0.txt new file mode 100644 index 00000000..94786cf6 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_110099_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.325306069933e-16 6.106226635438e-16 diff --git a/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt index 7e4bc989..d3250967 100644 --- a/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt @@ -1 +1 @@ -8 2.285706986988e-16 8.881784197001e-16 +8 1.790517133086e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt index 37674592..e5550db5 100644 --- a/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt @@ -1 +1 @@ -8 2.289010105550e-16 9.992007221626e-16 +8 1.950118864312e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt index f826b1e0..84d7cccd 100644 --- a/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt @@ -1 +1 @@ -8 2.623586732967e-16 1.110223024625e-15 +8 3.606486436078e-16 1.443289932013e-15 diff --git a/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt index 782d4470..ab0f95e8 100644 --- a/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt @@ -1 +1 @@ -8 2.099284199178e-16 4.440892098501e-16 +8 2.156418977007e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt index 782d4470..ab0f95e8 100644 --- a/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt @@ -1 +1 @@ -8 2.099284199178e-16 4.440892098501e-16 +8 2.156418977007e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_110199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110199_typeGreen=0.txt new file mode 100644 index 00000000..08264a6c --- /dev/null +++ b/samples/validation/data_ref/validation_3d_110199_typeGreen=0.txt @@ -0,0 +1 @@ +8 1.856944304086e-16 4.996003610813e-16 diff --git a/samples/validation/data_ref/validation_3d_110499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110499_typeGreen=0.txt new file mode 100644 index 00000000..3f75abfd --- /dev/null +++ b/samples/validation/data_ref/validation_3d_110499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.741303848805e-02 1.583004445949e-01 diff --git a/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt index 4df24d16..2ff77cf7 100644 --- a/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt @@ -1 +1 @@ -8 2.072251583388e-16 6.661338147751e-16 +8 2.353091410498e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt index 12f4ed3d..0ff34348 100644 --- a/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt @@ -1 +1 @@ -8 2.515435231594e-16 9.992007221626e-16 +8 2.243585394794e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt index 17ffef2c..9337689c 100644 --- a/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt @@ -1 +1 @@ -8 2.654382863482e-16 9.992007221626e-16 +8 3.357334522952e-16 1.221245327088e-15 diff --git a/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt index 1abccb91..61a6627e 100644 --- a/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt @@ -1 +1 @@ -8 1.247644913917e-16 3.330669073875e-16 +8 1.264743095170e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt index 1abccb91..61a6627e 100644 --- a/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt @@ -1 +1 @@ -8 1.247644913917e-16 3.330669073875e-16 +8 1.264743095170e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_111099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111099_typeGreen=0.txt new file mode 100644 index 00000000..b22def0e --- /dev/null +++ b/samples/validation/data_ref/validation_3d_111099_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.451459512795e-16 6.106226635438e-16 diff --git a/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt index d6099504..46a89ff8 100644 --- a/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt @@ -1 +1 @@ -8 1.506065702201e-16 4.440892098501e-16 +8 1.361454041896e-16 4.996003610813e-16 diff --git a/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt index 25065ea3..ef28fbe7 100644 --- a/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt @@ -1 +1 @@ -8 1.430564401166e-16 6.661338147751e-16 +8 1.310056339657e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt index 0309783e..7f38bae6 100644 --- a/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt @@ -1 +1 @@ -8 2.036428399477e-16 7.771561172376e-16 +8 1.946209037840e-16 6.661338147751e-16 diff --git a/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt index b5399251..3da8440c 100644 --- a/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt @@ -1 +1 @@ -8 1.577740299644e-16 3.330669073875e-16 +8 1.582311261321e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt index b5399251..3da8440c 100644 --- a/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt @@ -1 +1 @@ -8 1.577740299644e-16 3.330669073875e-16 +8 1.582311261321e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_111199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111199_typeGreen=0.txt new file mode 100644 index 00000000..574f9a99 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_111199_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.231261729659e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_111499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111499_typeGreen=0.txt new file mode 100644 index 00000000..d8a97445 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_111499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.722214533751e-02 1.602958913385e-01 diff --git a/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt index b6db15e0..17a86932 100644 --- a/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt @@ -1 +1 @@ -8 1.711628015844e-16 5.551115123126e-16 +8 1.857410152917e-16 7.216449660064e-16 diff --git a/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt index cb99cdb7..06afceca 100644 --- a/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt @@ -1 +1 @@ -8 1.961000758343e-16 7.771561172376e-16 +8 1.893172848568e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt index ee58c910..f3e59879 100644 --- a/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt @@ -1 +1 @@ -8 2.383249998992e-16 8.326672684689e-16 +8 2.364862414871e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt index bded1cd8..3523a0cc 100644 --- a/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt @@ -1 +1 @@ -8 1.116709290277e-16 3.330669073875e-16 +8 1.152775633689e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt index bded1cd8..3523a0cc 100644 --- a/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt @@ -1 +1 @@ -8 1.116709290277e-16 3.330669073875e-16 +8 1.152775633689e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_113399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113399_typeGreen=0.txt new file mode 100644 index 00000000..42029051 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_113399_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.681568408891e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_114099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_114099_typeGreen=0.txt new file mode 100644 index 00000000..3f75abfd --- /dev/null +++ b/samples/validation/data_ref/validation_3d_114099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.741303848805e-02 1.583004445949e-01 diff --git a/samples/validation/data_ref/validation_3d_114199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_114199_typeGreen=0.txt new file mode 100644 index 00000000..d8a97445 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_114199_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.722214533751e-02 1.602958913385e-01 diff --git a/samples/validation/data_ref/validation_3d_114499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_114499_typeGreen=0.txt new file mode 100644 index 00000000..95f1fab1 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_114499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.992446487147e-02 1.135798757980e-01 diff --git a/samples/validation/data_ref/validation_3d_140099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_140099_typeGreen=0.txt new file mode 100644 index 00000000..d8a97445 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_140099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.722214533751e-02 1.602958913385e-01 diff --git a/samples/validation/data_ref/validation_3d_140199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_140199_typeGreen=0.txt new file mode 100644 index 00000000..dba36c18 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_140199_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.443019074090e-02 2.006714310501e-01 diff --git a/samples/validation/data_ref/validation_3d_140499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_140499_typeGreen=0.txt new file mode 100644 index 00000000..7c6fcb39 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_140499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_141099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141099_typeGreen=0.txt new file mode 100644 index 00000000..dba36c18 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_141099_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.443019074090e-02 2.006714310501e-01 diff --git a/samples/validation/data_ref/validation_3d_141199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141199_typeGreen=0.txt new file mode 100644 index 00000000..530f3ce4 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_141199_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.795591596726e-02 2.268934764004e-01 diff --git a/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt index 35e11ec4..b962f696 100644 --- a/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt @@ -1 +1 @@ -8 9.257074536541e-03 5.787589076140e-02 +8 9.257074536541e-03 5.787589076139e-02 diff --git a/samples/validation/data_ref/validation_3d_141499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141499_typeGreen=0.txt new file mode 100644 index 00000000..20066e2a --- /dev/null +++ b/samples/validation/data_ref/validation_3d_141499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.043648693180e-01 1.982452423276e+00 diff --git a/samples/validation/data_ref/validation_3d_143399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_143399_typeGreen=0.txt new file mode 100644 index 00000000..530f3ce4 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_143399_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.795591596726e-02 2.268934764004e-01 diff --git a/samples/validation/data_ref/validation_3d_144099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144099_typeGreen=0.txt new file mode 100644 index 00000000..7c6fcb39 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_144099_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt index 35e11ec4..b962f696 100644 --- a/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt @@ -1 +1 @@ -8 9.257074536541e-03 5.787589076140e-02 +8 9.257074536541e-03 5.787589076139e-02 diff --git a/samples/validation/data_ref/validation_3d_144199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144199_typeGreen=0.txt new file mode 100644 index 00000000..20066e2a --- /dev/null +++ b/samples/validation/data_ref/validation_3d_144199_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.043648693180e-01 1.982452423276e+00 diff --git a/samples/validation/data_ref/validation_3d_144499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144499_typeGreen=0.txt new file mode 100644 index 00000000..be85b941 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_144499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.100653287328e-01 1.507869829275e+00 diff --git a/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt index 9068183c..3bd4ba14 100644 --- a/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt @@ -1 +1 @@ -8 2.417264182289e-16 8.881784197001e-16 +8 2.224152959320e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt index a9c20297..3d4ec07e 100644 --- a/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt @@ -1 +1 @@ -8 5.004057813534e-16 1.332267629550e-15 +8 2.073204635671e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt index 82501401..cec42b28 100644 --- a/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt @@ -1 +1 @@ -8 4.222797590796e-16 1.665334536938e-15 +8 5.131031483671e-16 1.776356839400e-15 diff --git a/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt index fb079dcb..f9511d88 100644 --- a/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt @@ -1,2 +1,2 @@ -8 1.276643339166e-16 3.330669073875e-16 -16 1.706826512311e-16 7.771561172376e-16 +8 9.910722837601e-17 2.220446049250e-16 +16 1.629108020274e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt index c3ff9285..5bfcf01d 100644 --- a/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt @@ -1 +1 @@ -8 1.276643339166e-16 3.330669073875e-16 +8 9.910722837601e-17 2.220446049250e-16 diff --git a/samples/validation/data_ref/validation_3d_330099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330099_typeGreen=0.txt new file mode 100644 index 00000000..19338cce --- /dev/null +++ b/samples/validation/data_ref/validation_3d_330099_typeGreen=0.txt @@ -0,0 +1 @@ +8 1.837169624052e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt index 24624883..2aa404ad 100644 --- a/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt @@ -1 +1 @@ -8 2.926288778510e-16 1.221245327088e-15 +8 1.730680789872e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt index 38d63282..b1f14f10 100644 --- a/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt @@ -1 +1 @@ -8 2.115034829798e-16 7.771561172376e-16 +8 2.285725259941e-16 1.110223024625e-15 diff --git a/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt index eb39266d..0e4ac1c0 100644 --- a/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt @@ -1 +1 @@ -8 2.994365074576e-16 1.221245327088e-15 +8 3.055033613695e-16 1.443289932013e-15 diff --git a/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt index 2849122a..dd109706 100644 --- a/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt @@ -1 +1 @@ -8 1.242508934651e-16 4.440892098501e-16 +8 1.391703464091e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt index 2849122a..dd109706 100644 --- a/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt @@ -1 +1 @@ -8 1.242508934651e-16 4.440892098501e-16 +8 1.391703464091e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_330199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330199_typeGreen=0.txt new file mode 100644 index 00000000..d3679278 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_330199_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.613943634531e-16 8.326672684689e-16 diff --git a/samples/validation/data_ref/validation_3d_330499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330499_typeGreen=0.txt new file mode 100644 index 00000000..3f75abfd --- /dev/null +++ b/samples/validation/data_ref/validation_3d_330499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.741303848805e-02 1.583004445949e-01 diff --git a/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt index c285a991..0ca3a20e 100644 --- a/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt @@ -1 +1 @@ -8 3.072060405836e-16 9.992007221626e-16 +8 3.302831470286e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt index 03148b9a..3dde6644 100644 --- a/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt @@ -1 +1 @@ -8 2.666179212385e-16 8.881784197001e-16 +8 2.035206327463e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt index 511646d2..0aefd803 100644 --- a/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt @@ -1 +1 @@ -8 3.910393537349e-16 1.554312234475e-15 +8 3.954947985543e-16 1.554312234475e-15 diff --git a/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt index 80de8639..d7c1927f 100644 --- a/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt @@ -1 +1 @@ -8 1.344802348586e-16 3.330669073875e-16 +8 1.241933593859e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt index 80de8639..d7c1927f 100644 --- a/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt @@ -1 +1 @@ -8 1.344802348586e-16 3.330669073875e-16 +8 1.241933593859e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_331099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331099_typeGreen=0.txt new file mode 100644 index 00000000..45bfc424 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_331099_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.932939822763e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt index 61f42caa..18e8b4f8 100644 --- a/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt @@ -1 +1 @@ -8 1.616134902723e-16 4.996003610813e-16 +8 1.640437657289e-16 4.996003610813e-16 diff --git a/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt index da3ef3ea..ce78b3e7 100644 --- a/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt @@ -1 +1 @@ -8 1.790463298367e-16 7.771561172376e-16 +8 1.655274575850e-16 7.771561172376e-16 diff --git a/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt index cce39885..2c07a6b4 100644 --- a/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt @@ -1 +1 @@ -8 1.435599953163e-16 6.661338147751e-16 +8 1.500012415547e-16 5.551115123126e-16 diff --git a/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt index 2464e12f..8cfb5b71 100644 --- a/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt @@ -1 +1 @@ -8 1.203853098659e-16 3.330669073875e-16 +8 1.380822452361e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt index 2464e12f..8cfb5b71 100644 --- a/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt @@ -1 +1 @@ -8 1.203853098659e-16 3.330669073875e-16 +8 1.380822452361e-16 4.440892098501e-16 diff --git a/samples/validation/data_ref/validation_3d_331199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331199_typeGreen=0.txt new file mode 100644 index 00000000..bacdbbfc --- /dev/null +++ b/samples/validation/data_ref/validation_3d_331199_typeGreen=0.txt @@ -0,0 +1 @@ +8 2.600002712460e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_331499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331499_typeGreen=0.txt new file mode 100644 index 00000000..d8a97445 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_331499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.722214533751e-02 1.602958913385e-01 diff --git a/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt index 93a545bc..f5bb3df8 100644 --- a/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt @@ -1 +1 @@ -8 1.547554192428e-16 5.551115123126e-16 +8 1.826697591811e-16 6.106226635438e-16 diff --git a/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt index 0d7fe36d..3ce4e810 100644 --- a/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt @@ -1 +1 @@ -8 6.083466602000e-16 1.332267629550e-15 +8 3.648200634147e-16 1.110223024625e-15 diff --git a/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt index 1e5227ec..422f8ac0 100644 --- a/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt @@ -1 +1 @@ -8 6.819607188466e-16 1.887379141863e-15 +8 2.232551035605e-16 8.881784197001e-16 diff --git a/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt index 2a3c4e36..a557424f 100644 --- a/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt @@ -1 +1 @@ -8 1.231531498259e-16 3.330669073875e-16 +8 1.279468816630e-16 3.330669073875e-16 diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt index 80aaf1d6..4e03e797 100644 --- a/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt @@ -1,2 +1,3 @@ -8 1.183686628522e-16 3.330669073875e-16 -16 2.023798911980e-16 9.992007221626e-16 +16 2.506968496639e-16 9.992007221626e-16 +8 1.181651052418e-16 3.330669073875e-16 +16 2.506968496639e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=1.txt new file mode 100644 index 00000000..59c84cd2 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=1.txt @@ -0,0 +1 @@ +16 6.461989693825e-02 1.171077243925e-01 diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=2.txt new file mode 100644 index 00000000..07a61244 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=2.txt @@ -0,0 +1 @@ +16 3.530094381085e-01 6.397430813834e-01 diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=3.txt new file mode 100644 index 00000000..6af2669d --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=3.txt @@ -0,0 +1 @@ +16 3.494862970899e-01 6.333582518347e-01 diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=4.txt new file mode 100644 index 00000000..d30864c0 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=4.txt @@ -0,0 +1 @@ +16 3.380767319311e-01 6.126812115521e-01 diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=0.txt new file mode 100644 index 00000000..1ed2d0b8 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=0.txt @@ -0,0 +1,2 @@ +16 2.355266459011e-16 6.661338147751e-16 +8 4.191000011073e-16 9.992007221626e-16 diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=1.txt new file mode 100644 index 00000000..14d7dc72 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=1.txt @@ -0,0 +1,2 @@ +16 2.238092546229e-02 4.055994167232e-02 +17 1.977047785451e-02 3.920432575338e-02 diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=2.txt new file mode 100644 index 00000000..4fc1f137 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=2.txt @@ -0,0 +1 @@ +16 3.930370609172e-01 7.122833366619e-01 diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=3.txt new file mode 100644 index 00000000..4c1b4b4c --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=3.txt @@ -0,0 +1 @@ +16 2.280867649289e-01 4.133513557039e-01 diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=4.txt new file mode 100644 index 00000000..a4b43cc3 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=4.txt @@ -0,0 +1 @@ +16 1.008997206127e-01 1.828560123531e-01 diff --git a/samples/validation/data_ref/validation_3d_334099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334099_typeGreen=0.txt new file mode 100644 index 00000000..3f75abfd --- /dev/null +++ b/samples/validation/data_ref/validation_3d_334099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.741303848805e-02 1.583004445949e-01 diff --git a/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt index f0088cce..edb5f8a9 100644 --- a/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt @@ -1,2 +1,3 @@ 8 2.556243887413e-01 8.031750419599e-01 17 4.413576773139e-02 2.044117431074e-01 +17 4.413576773139e-02 2.044117431074e-01 diff --git a/samples/validation/data_ref/validation_3d_334199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334199_typeGreen=0.txt new file mode 100644 index 00000000..d8a97445 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_334199_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.722214533751e-02 1.602958913385e-01 diff --git a/samples/validation/data_ref/validation_3d_334499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334499_typeGreen=0.txt new file mode 100644 index 00000000..95f1fab1 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_334499_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.992446487147e-02 1.135798757980e-01 diff --git a/samples/validation/data_ref/validation_3d_400099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400099_typeGreen=0.txt new file mode 100644 index 00000000..3f75abfd --- /dev/null +++ b/samples/validation/data_ref/validation_3d_400099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.741303848805e-02 1.583004445949e-01 diff --git a/samples/validation/data_ref/validation_3d_400199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400199_typeGreen=0.txt new file mode 100644 index 00000000..5256d4db --- /dev/null +++ b/samples/validation/data_ref/validation_3d_400199_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.453121113062e-02 1.997372939511e-01 diff --git a/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt index ecbb818b..106b1622 100644 --- a/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt @@ -1 +1 @@ -8 9.233949105056e-03 5.785860761320e-02 +8 9.233949105056e-03 5.785860761319e-02 diff --git a/samples/validation/data_ref/validation_3d_400499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400499_typeGreen=0.txt new file mode 100644 index 00000000..506ba9d2 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_400499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.051484886687e-01 2.005866243539e+00 diff --git a/samples/validation/data_ref/validation_3d_401099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_401099_typeGreen=0.txt new file mode 100644 index 00000000..5256d4db --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401099_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.453121113062e-02 1.997372939511e-01 diff --git a/samples/validation/data_ref/validation_3d_401199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_401199_typeGreen=0.txt new file mode 100644 index 00000000..3b67f047 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401199_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.796571614602e-02 2.268632740773e-01 diff --git a/samples/validation/data_ref/validation_3d_401441_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_401441_typeGreen=1.txt new file mode 100644 index 00000000..df2c8feb --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401441_typeGreen=1.txt @@ -0,0 +1 @@ +16 3.000341400202e-03 4.344975347735e-02 diff --git a/samples/validation/data_ref/validation_3d_401444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_401444_typeGreen=1.txt new file mode 100644 index 00000000..61ec2be0 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401444_typeGreen=1.txt @@ -0,0 +1 @@ +16 3.015468995435e-03 4.222496564571e-02 diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=0.txt new file mode 100644 index 00000000..55fc664a --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=0.txt @@ -0,0 +1,2 @@ +16 9.439111015658e-02 5.894889259406e-01 +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=1.txt new file mode 100644 index 00000000..fcad07a2 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=1.txt @@ -0,0 +1 @@ +16 6.256321865795e-03 3.781128583175e-02 diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=2.txt new file mode 100644 index 00000000..dc8a911d --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=2.txt @@ -0,0 +1 @@ +16 9.579721195958e-02 5.537345525604e-01 diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=3.txt new file mode 100644 index 00000000..3f6d95a7 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=3.txt @@ -0,0 +1 @@ +16 5.156406574464e-02 3.080377026025e-01 diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=4.txt new file mode 100644 index 00000000..1f9d9051 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=4.txt @@ -0,0 +1 @@ +16 2.777470852546e-02 1.665997142499e-01 diff --git a/samples/validation/data_ref/validation_3d_403399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_403399_typeGreen=0.txt new file mode 100644 index 00000000..3b67f047 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_403399_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.796571614602e-02 2.268632740773e-01 diff --git a/samples/validation/data_ref/validation_3d_404099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_404099_typeGreen=0.txt new file mode 100644 index 00000000..506ba9d2 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_404099_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.051484886687e-01 2.005866243539e+00 diff --git a/samples/validation/data_ref/validation_3d_404199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_404199_typeGreen=0.txt new file mode 100644 index 00000000..7c6fcb39 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_404199_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_404499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_404499_typeGreen=0.txt new file mode 100644 index 00000000..b52c1f22 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_404499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.102639526126e-01 1.513172916610e+00 diff --git a/samples/validation/data_ref/validation_3d_410099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_410099_typeGreen=0.txt new file mode 100644 index 00000000..d8a97445 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_410099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.722214533751e-02 1.602958913385e-01 diff --git a/samples/validation/data_ref/validation_3d_410199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_410199_typeGreen=0.txt new file mode 100644 index 00000000..dba36c18 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_410199_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.443019074090e-02 2.006714310501e-01 diff --git a/samples/validation/data_ref/validation_3d_410499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_410499_typeGreen=0.txt new file mode 100644 index 00000000..7c6fcb39 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_410499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_411099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_411099_typeGreen=0.txt new file mode 100644 index 00000000..dba36c18 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_411099_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.443019074090e-02 2.006714310501e-01 diff --git a/samples/validation/data_ref/validation_3d_411199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_411199_typeGreen=0.txt new file mode 100644 index 00000000..530f3ce4 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_411199_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.795591596726e-02 2.268934764004e-01 diff --git a/samples/validation/data_ref/validation_3d_411444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_411444_typeGreen=1.txt new file mode 100644 index 00000000..0c995230 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_411444_typeGreen=1.txt @@ -0,0 +1 @@ +16 2.989390717874e-03 4.161211731720e-02 diff --git a/samples/validation/data_ref/validation_3d_411499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_411499_typeGreen=0.txt new file mode 100644 index 00000000..20066e2a --- /dev/null +++ b/samples/validation/data_ref/validation_3d_411499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.043648693180e-01 1.982452423276e+00 diff --git a/samples/validation/data_ref/validation_3d_413399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_413399_typeGreen=0.txt new file mode 100644 index 00000000..530f3ce4 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_413399_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.795591596726e-02 2.268934764004e-01 diff --git a/samples/validation/data_ref/validation_3d_414099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414099_typeGreen=0.txt new file mode 100644 index 00000000..7c6fcb39 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_414099_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.046385573823e-01 1.989076837382e+00 diff --git a/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt index 35e11ec4..b962f696 100644 --- a/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt @@ -1 +1 @@ -8 9.257074536541e-03 5.787589076140e-02 +8 9.257074536541e-03 5.787589076139e-02 diff --git a/samples/validation/data_ref/validation_3d_414199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414199_typeGreen=0.txt new file mode 100644 index 00000000..20066e2a --- /dev/null +++ b/samples/validation/data_ref/validation_3d_414199_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.043648693180e-01 1.982452423276e+00 diff --git a/samples/validation/data_ref/validation_3d_414444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_414444_typeGreen=1.txt new file mode 100644 index 00000000..6e6ffeef --- /dev/null +++ b/samples/validation/data_ref/validation_3d_414444_typeGreen=1.txt @@ -0,0 +1 @@ +16 3.001897712375e-03 4.042950163304e-02 diff --git a/samples/validation/data_ref/validation_3d_414499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414499_typeGreen=0.txt new file mode 100644 index 00000000..be85b941 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_414499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.100653287328e-01 1.507869829275e+00 diff --git a/samples/validation/data_ref/validation_3d_440099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_440099_typeGreen=0.txt new file mode 100644 index 00000000..95f1fab1 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_440099_typeGreen=0.txt @@ -0,0 +1 @@ +8 4.992446487147e-02 1.135798757980e-01 diff --git a/samples/validation/data_ref/validation_3d_440199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_440199_typeGreen=0.txt new file mode 100644 index 00000000..43d8083f --- /dev/null +++ b/samples/validation/data_ref/validation_3d_440199_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.680462636025e-02 1.465168649220e-01 diff --git a/samples/validation/data_ref/validation_3d_440444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_440444_typeGreen=1.txt new file mode 100644 index 00000000..b3e5f03d --- /dev/null +++ b/samples/validation/data_ref/validation_3d_440444_typeGreen=1.txt @@ -0,0 +1 @@ +16 3.033581271861e-03 4.102111411487e-02 diff --git a/samples/validation/data_ref/validation_3d_440499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_440499_typeGreen=0.txt new file mode 100644 index 00000000..b52c1f22 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_440499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.102639526126e-01 1.513172916610e+00 diff --git a/samples/validation/data_ref/validation_3d_441099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_441099_typeGreen=0.txt new file mode 100644 index 00000000..43d8083f --- /dev/null +++ b/samples/validation/data_ref/validation_3d_441099_typeGreen=0.txt @@ -0,0 +1 @@ +8 5.680462636025e-02 1.465168649220e-01 diff --git a/samples/validation/data_ref/validation_3d_441199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_441199_typeGreen=0.txt new file mode 100644 index 00000000..f77e0f83 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_441199_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.954437578025e-02 1.767108271899e-01 diff --git a/samples/validation/data_ref/validation_3d_441499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_441499_typeGreen=0.txt new file mode 100644 index 00000000..be85b941 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_441499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.100653287328e-01 1.507869829275e+00 diff --git a/samples/validation/data_ref/validation_3d_443399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_443399_typeGreen=0.txt new file mode 100644 index 00000000..f77e0f83 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_443399_typeGreen=0.txt @@ -0,0 +1 @@ +8 8.954437578025e-02 1.767108271899e-01 diff --git a/samples/validation/data_ref/validation_3d_443399_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_443399_typeGreen=3.txt new file mode 100644 index 00000000..76ccd048 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_443399_typeGreen=3.txt @@ -0,0 +1 @@ +17 1.299759222394e-01 4.610254128717e-01 diff --git a/samples/validation/data_ref/validation_3d_444099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444099_typeGreen=0.txt new file mode 100644 index 00000000..b52c1f22 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_444099_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.102639526126e-01 1.513172916610e+00 diff --git a/samples/validation/data_ref/validation_3d_444199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444199_typeGreen=0.txt new file mode 100644 index 00000000..be85b941 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_444199_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.100653287328e-01 1.507869829275e+00 diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt index 2b2a9d88..2e40d19d 100644 --- a/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt +++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt @@ -1,3 +1,2 @@ 16 1.271883888163e-02 1.739957177053e-01 -32 3.225108970826e-03 5.041207253504e-02 -64 8.090864625062e-04 1.307047437423e-02 +8 4.884531403115e-02 3.680868204613e-01 diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=1.txt new file mode 100644 index 00000000..7b571f13 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=1.txt @@ -0,0 +1 @@ +16 3.017234157142e-03 3.927536957671e-02 diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt index 84ad044e..66222a90 100644 --- a/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt +++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt @@ -1,3 +1 @@ 16 5.171085302138e-02 6.277588695484e-01 -32 2.213405736435e-02 3.301075527807e-01 -64 6.683903540655e-03 1.066411041943e-01 diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt index 316b601f..0564224e 100644 --- a/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt +++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt @@ -1,3 +1 @@ 16 3.098524288986e-02 4.030347242545e-01 -32 5.612919251869e-03 9.135896990668e-02 -64 5.099579703039e-04 8.679550799164e-03 diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt index d2ba42ef..07a63c24 100644 --- a/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt +++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt @@ -1,3 +1 @@ 16 1.814399512337e-02 2.443536829148e-01 -32 1.368220040893e-03 2.194920130034e-02 -64 3.781077087410e-05 5.800217852716e-04 diff --git a/samples/validation/data_ref/validation_3d_444499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444499_typeGreen=0.txt new file mode 100644 index 00000000..8969b7db --- /dev/null +++ b/samples/validation/data_ref/validation_3d_444499_typeGreen=0.txt @@ -0,0 +1 @@ +8 3.157704605151e-01 1.123512937953e+00 diff --git a/samples/validation/run/juwels_kernel_valid.sh b/samples/validation/run/juwels_kernel_valid.sh new file mode 100755 index 00000000..dd29e816 --- /dev/null +++ b/samples/validation/run/juwels_kernel_valid.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Submission script for JUWELS +#SBATCH --account=prpa79 +#SBATCH --job-name=scaling +# +#SBATCH --output=flups_%j.out +#SBATCH --error=flups_%j.err + +export OMP_NUM_THREADS=${MY_NTHREADS} + +export I_MPI_DEBUG=+5 + +echo "----------------- Load modules -----------------" +module purge +#module load Intel/2019.3.199-GCC-8.3.0 +#module load IntelMPI/2018.5.288 +#module load IntelMPI/2019.3.199 + +module load intel-para/2019a + +# module use /p/software/juwels/otherstages/ +# module load Stages/Devel-2019a +# module load Intel +# module load IntelMPI/2019.6.RC20191024 + + +module load FFTW/3.3.8 +module load HDF5/1.10.5 +module load METIS/5.1.0 +module list + +#CHANGING COMMUNICATION METHOD, OTHERWISE MPI USE TOO MUCH MEMORY ! +export PSP_UCP=1 +export UCX_TLS=ud_mlx5,self,sm +#export PSI_LOGGERDEBUG=1 +#export PSI_FORWARDERDEBUG=1 + +echo "----------------- launching job -----------------" +echo "launch command: srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0" + +srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0 + +scontrol show job ${SLURM_JOB_ID} + +sacct --format="JobID,NCPUS,NNodes,Elapsed,MaxRSS,MaxVMSize,ExitCode" | grep "$SLURM_JOB_ID" \ No newline at end of file diff --git a/samples/validation/run/juwels_strongscaling.sh b/samples/validation/run/juwels_strongscaling.sh new file mode 100755 index 00000000..1fce77d8 --- /dev/null +++ b/samples/validation/run/juwels_strongscaling.sh @@ -0,0 +1,534 @@ +#!/bin/sh + +HOME_FLUPS=/p/project/prpa79/flups/samples/validation +KERNEL=juwels_kernel_valid.sh + +## fixed parameters +export INITIAL_SIZE_X=1152 +export INITIAL_SIZE_Y=1152 +export INITIAL_SIZE_Z=1152 + +export ver=$1 + +if [[ -z $1 ]]; +then + echo "you must specify a version (small/large) as an argument" + exit 1 +else + echo "starting as $1" +fi + +export nPerSwitch=1 #number of process per switch, unknown +export SW_TIMEOUT=1440 #minutes to wait before releasing the constraint on switches + + +if [ "$ver" = "small" ]; then +############################################################ +############################################################ +############################################################ +# SMALL (<=4k) +############################################################ +############################################################ +############################################################ + +export PARTITION=batch + +############################################################ +# ALL TO ALL +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_a2a + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_a2a_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + +#================== 1152 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=8 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 2304 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=16 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 4608 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + + + +############################################################ +# NON-BLOCKING +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_nb + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_nb_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 1152 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=8 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 2304 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=16 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 4608 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +elif [ "$ver" = "large" ]; then + +############################################################ +############################################################ +############################################################ +# LARGE (>4k, <=18k) +############################################################ +############################################################ +############################################################ +export PARTITION=batch + + +# ############################################################ +# # ALL TO ALL +# #----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_a2a + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_a2a_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 9216 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 18432 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + + + +############################################################ +# NON-BLOCKING +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_nb + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_nb_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 9216 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=large ./${KERNEL} + + +#================== 18432 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +elif [ "$ver" = "Xlarge" ]; then + +############################################################ +############################################################ +############################################################ +# EXTRA LARGE (>18k) +############################################################ +############################################################ +############################################################ +export PARTITION=large + +# ############################################################ +# # ALL TO ALL +# #----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_a2a + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_a2a_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 36,864 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 73,728 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=48 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + + + +############################################################ +# NON-BLOCKING +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_nb + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_nb_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 36,864 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 73,728 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=48 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=1.0 +export L_Z=1.0 +#-- global size +export SIZE_X=$INITIAL_SIZE_X +export SIZE_Y=$INITIAL_SIZE_Y +export SIZE_Z=$INITIAL_SIZE_Z +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +fi diff --git a/samples/validation/run/juwels_weakscaling.sh b/samples/validation/run/juwels_weakscaling.sh new file mode 100755 index 00000000..16b2d373 --- /dev/null +++ b/samples/validation/run/juwels_weakscaling.sh @@ -0,0 +1,531 @@ +#!/bin/sh + +HOME_FLUPS=/p/project/prpa79/flups/samples/validation +KERNEL=juwels_kernel_valid.sh + +## fixed parameters +export SIZE_PER_PROC=64 + +export ver=$1 + +if [[ -z $1 ]]; +then + echo "you must specify a version (small/large) as an argument" + exit 1 +else + echo "starting as $1" +fi + +export nPerSwitch=1152 #number of process per switch +export SW_TIMEOUT=1440 #minutes to wait before releasing the constraint on switches + + +if [ "$ver" = "small" ]; then +############################################################ +############################################################ +############################################################ +# SMALL (<=4k) +############################################################ +############################################################ +############################################################ +export PARTITION=batch + +############################################################ +# ALL TO ALL +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_a2a + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_a2a_${SIZE_PER_PROC}_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + +#================== 1152 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=8 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 2304 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=16 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 4608 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + + + +############################################################ +# NON-BLOCKING +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_nb + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_nb_${SIZE_PER_PROC}_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 1152 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=8 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 2304 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=16 +export MY_NY=12 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 4608 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +elif [ "$ver" = "large" ]; then + +############################################################ +############################################################ +############################################################ +# LARGE (>4k, <=18k) +############################################################ +############################################################ +############################################################ +export PARTITION=batch + + +# ############################################################ +# # ALL TO ALL +# #----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_a2a + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_a2a_${SIZE_PER_PROC}_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 9216 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 18432 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + + + +############################################################ +# NON-BLOCKING +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_nb + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_nb_${SIZE_PER_PROC}_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 9216 CPU's ================ +#-- requested walltime +export WT='00:10:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 18432 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +elif [ "$ver" = "Xlarge" ]; then + +############################################################ +############################################################ +############################################################ +# EXTRA LARGE (>18k) +############################################################ +############################################################ +############################################################ +export PARTITION=large + +# ############################################################ +# # ALL TO ALL +# #----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_a2a + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_a2a_${SIZE_PER_PROC}_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 36,864 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 73,728 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=32 +export MY_NY=48 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + + + +############################################################ +# NON-BLOCKING +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_nb + +SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_nb_${SIZE_PER_PROC}_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/${KERNEL} $SCRATCH +# go to it +cd $SCRATCH + + +#================== 36,864 CPU's ================ +#-- requested walltime +export WT='00:15:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +#================== 73,728 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=32 +export MY_NY=48 +export MY_NZ=48 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./${KERNEL}" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL} +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL} + + +fi diff --git a/samples/validation/run/marenostrum_kernel_valid.sh b/samples/validation/run/marenostrum_kernel_valid.sh index 2bb21796..6d88bf10 100755 --- a/samples/validation/run/marenostrum_kernel_valid.sh +++ b/samples/validation/run/marenostrum_kernel_valid.sh @@ -1,11 +1,14 @@ #!/bin/bash # Submission script for Marenostrum #SBATCH --job-name=scaling -#SBATCH --time=00:10:00 # #SBATCH --output=flups_%j.out #SBATCH --error=flups_%j.err #SBATCH --qos=prace +#SBATCH --exclude=s07r2b[01-24] +#--> one of these failed at FFTW plans alloc +#SBATCH --exclude=s05r1b[01-24] +#--> proc s05r1b16 gave invalid address or slot during writev export OMP_NUM_THREADS=${MY_NTHREADS} @@ -22,3 +25,5 @@ echo "----------------- launching job -----------------" echo "launch command: srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0" srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0 + +scontrol show job ${SLURM_JOB_ID} \ No newline at end of file diff --git a/samples/validation/run/marenostrum_weakscaling.sh b/samples/validation/run/marenostrum_weakscaling.sh index f3c1501e..bc2dd256 100755 --- a/samples/validation/run/marenostrum_weakscaling.sh +++ b/samples/validation/run/marenostrum_weakscaling.sh @@ -1,21 +1,45 @@ #!/bin/sh ## RM the previous validation dir -HOME_FLUPS=/home/pr1ekp00/pr1ekp02/flups/samples/validation +HOME_FLUPS=/home/pr1ekp00/$(whoami)/flups/samples/validation ## fixed parameters export SIZE_PER_PROC=128 +export ver=$1 + +if [[ -z $1 ]]; +then + echo "you must specify a version (small/large) as an argument" + exit 1 +else + echo "starting as $1" +fi + +export nPerSwitch=1152 #number of process per switch +export SW_TIMEOUT=1440 #minutes to wait before releasing the constraint on switches + + +if [ "$ver" = "small" ]; then +############################################################ +############################################################ +############################################################ +# SMALL (<=4k) +############################################################ +############################################################ +############################################################ + + ############################################################ # ALL TO ALL #----------------------------------------------------------- export EXEC_FLUPS=flups_validation_a2a -SCRATCH=/gpfs/scratch/pr1ekp00/pr1ekp02/flups_weak_a2a_align16 +SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_a2a_${ver} # clean the validation dir -rm -rf ${SCRATCH} +# rm -rf ${SCRATCH} mkdir -p $SCRATCH mkdir -p $SCRATCH/data mkdir -p $SCRATCH/prof @@ -26,8 +50,9 @@ cp $HOME_FLUPS/run/marenostrum_kernel_valid.sh $SCRATCH # go to it cd $SCRATCH - #================== 1152 CPU's ================ +#-- requested walltime +export WT='00:10:00' #-- proc domain export MY_NX=8 export MY_NY=12 @@ -42,11 +67,24 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) #-- 1 thread export MY_NTHREADS=1 -export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ)) -sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh +# #-- 4 thread +# export WT='00:20:00' +# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} ") +# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2") +# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} / 2") +# export MY_NTHREADS=4 +# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh #================== 2304 CPU's ================ +#-- requested walltime +export WT='00:15:00' #-- proc domain export MY_NX=16 export MY_NY=12 @@ -61,8 +99,57 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) #-- 1 thread export MY_NTHREADS=1 -export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ)) -sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + +# #-- 4 thread +# export WT='00:30:00' +# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2") +# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2") +# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ") +# export MY_NTHREADS=4 +# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh + + +#================== 4608 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + +# #-- 4 thread +# export WT='00:40:00' +# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2") +# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2") +# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ") +# export MY_NTHREADS=4 +# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh + + + + ############################################################ @@ -70,10 +157,10 @@ sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel #----------------------------------------------------------- export EXEC_FLUPS=flups_validation_nb -SCRATCH=/gpfs/scratch/pr1ekp00/pr1ekp02/flups_weak_nb_align16 +SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_nb_${ver} # clean the validation dir -rm -rf ${SCRATCH} +# rm -rf ${SCRATCH} mkdir -p $SCRATCH mkdir -p $SCRATCH/data mkdir -p $SCRATCH/prof @@ -86,6 +173,8 @@ cd $SCRATCH #================== 1152 CPU's ================ +#-- requested walltime +export WT='00:10:00' #-- proc domain export MY_NX=8 export MY_NY=12 @@ -100,11 +189,24 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) #-- 1 thread export MY_NTHREADS=1 -export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ)) -sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh +# #-- 4 thread +# export WT='00:20:00' +# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} ") +# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2") +# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} / 2") +# export MY_NTHREADS=4 +# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh #================== 2304 CPU's ================ +#-- requested walltime +export WT='00:15:00' #-- proc domain export MY_NX=16 export MY_NY=12 @@ -119,5 +221,210 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) #-- 1 thread export MY_NTHREADS=1 -export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ)) -sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + +# #-- 4 thread +# export WT='00:30:00' +# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2") +# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2") +# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ") +# export MY_NTHREADS=4 +# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh + +#================== 4608 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=12 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + +# #-- 4 thread +# export WT='00:40:00' +# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2") +# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2") +# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ") +# export MY_NTHREADS=4 +# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh" +# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh + + +elif [ "$ver" = "large" ]; then + +############################################################ +############################################################ +############################################################ +# LARGE (>4k, <=18k) +############################################################ +############################################################ +############################################################ + + + +# ############################################################ +# # ALL TO ALL +# #----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_a2a + +SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_a2a_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/marenostrum_kernel_valid.sh $SCRATCH +# go to it +cd $SCRATCH + + +#================== 9216 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + + +#================== 18432 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + + + + +############################################################ +# NON-BLOCKING +#----------------------------------------------------------- +export EXEC_FLUPS=flups_validation_nb + +SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_nb_${ver} + +# clean the validation dir +# rm -rf ${SCRATCH} +mkdir -p $SCRATCH +mkdir -p $SCRATCH/data +mkdir -p $SCRATCH/prof + +# copy the needed info +cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH +cp $HOME_FLUPS/run/marenostrum_kernel_valid.sh $SCRATCH +# go to it +cd $SCRATCH + + +#================== 9216 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=16 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + + +#================== 18432 CPU's ================ +#-- requested walltime +export WT='00:20:00' +#-- proc domain +export MY_NX=32 +export MY_NY=24 +export MY_NZ=24 +#-- domain length +export L_X=1.0 +export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX") +export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX") +#-- global size +export SIZE_X=$(($SIZE_PER_PROC*$MY_NX)) +export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY)) +export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ)) +#-- 1 thread +export MY_NTHREADS=1 +export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1") +export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES +echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} -- ./marenostrum_kernel_valid.sh" +sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh + + +elif [ "$ver" = "Xlarge" ]; then + +############################################################ +############################################################ +############################################################ +# EXTRA LARGE (>18k) +############################################################ +############################################################ +############################################################ + +echo "must be done" + +fi \ No newline at end of file diff --git a/samples/validation/run/zenobe_convergence_a2a.sh b/samples/validation/run/zenobe_convergence_a2a.sh index 1e183d41..d83e98c5 100755 --- a/samples/validation/run/zenobe_convergence_a2a.sh +++ b/samples/validation/run/zenobe_convergence_a2a.sh @@ -1,10 +1,12 @@ #!/bin/sh ## RM the previous validation dir -HOME_FLUPS=/home/acad/ucl-tfl/dcaprace/FLUPS/flups_green/samples/validation +#HOME_FLUPS=/home/acad/ucl-tfl/dcaprace/FLUPS/flups_green/samples/validation +HOME_FLUPS=/home/acad/ucl-tfl/tgillis/flups/samples/validation EXEC_FLUPS=flups_validation_a2a -SCRATCH=/SCRATCH/acad/examples/dcaprace/flups_convergence_a2a +#SCRATCH=/SCRATCH/acad/examples/dcaprace/flups_convergence_a2a +SCRATCH=/SCRATCH/acad/examples/tgillis/flups_convergence_a2a # clean the validation dir rm -rf ${SCRATCH} @@ -21,18 +23,22 @@ cd $SCRATCH ## 256 export MY_RES=256 -qsub -q main -v MY_NX=1,MY_NY=2,MY_NZ=2,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=1:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh +#qsub -q main -v MY_NX=1,MY_NY=2,MY_NZ=2,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=1:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh +qsub -q main -v MY_NX=2,MY_NY=2,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=1:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh ## 512 export MY_RES=512 -qsub -q main -v MY_NX=2,MY_NY=2,MY_NZ=4,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=4:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh +#qsub -q main -v MY_NX=2,MY_NY=2,MY_NZ=4,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=4:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh +qsub -q main -v MY_NX=4,MY_NY=4,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=4:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh ## 1024 export MY_RES=1024 -qsub -q large -v MY_NX=4,MY_NY=6,MY_NZ=8,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=8:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh +#qsub -q large -v MY_NX=4,MY_NY=6,MY_NZ=8,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=8:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh +qsub -q large -v MY_NX=16,MY_NY=12,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=8:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh ## 2048 export MY_RES=2048 -qsub -q large -v MY_NX=8,MY_NY=12,MY_NZ=16,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=64:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh +#qsub -q large -v MY_NX=8,MY_NY=12,MY_NZ=16,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=64:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh +qsub -q large -v MY_NX=32,MY_NY=48,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=64:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh #end of file diff --git a/samples/validation/run/zenobe_kernelConv_a2a.sh b/samples/validation/run/zenobe_kernelConv_a2a.sh index 8b08fd8b..5eb2206e 100755 --- a/samples/validation/run/zenobe_kernelConv_a2a.sh +++ b/samples/validation/run/zenobe_kernelConv_a2a.sh @@ -3,7 +3,7 @@ #PBS -N convergence #PBS -r y #PBS -W group_list=examples -#PBS -l walltime=00:10:00 +#PBS -l walltime=00:20:00 exec > ${PBS_O_WORKDIR}/${PBS_JOBNAME}_${PBS_JOBID}.log echo "------------------ Work dir --------------------" @@ -40,6 +40,8 @@ MY_SIZE_Z=$((${MY_SIZE} * ${L_Z}/${L_X})) ######################### -bc 4 4 4 4 4 4 ########################### echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 >> stdout_${PBS_JOBID}" @@ -51,6 +53,8 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ######################### -bc 0 0 1 0 3 3 ########################### echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID} +#echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}" +#mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}" @@ -62,6 +66,8 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ######################### -bc 4 0 4 4 1 4 ########################### echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID} +# echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}" +# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}" @@ -73,6 +79,8 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ######################### -bc 3 3 4 4 4 4 ########################### echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID} +# echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}" +# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}" @@ -80,6 +88,60 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}" mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID} +############################################################################################################################################################################################################################################### +# 2D +############################################################################################################################################################################################################################################### + +######################### -bc 4 4 4 4 9 9 ########################### +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID} + + +######################### -bc 0 0 1 0 3 3 ########################### +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID} +# echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}" +# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID} + + +######################### -bc 4 0 4 4 1 4 ########################### +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID} + + +######################### -bc 3 3 4 4 4 4 ########################### +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID} +# echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}" +# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID} +echo "launching mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}" +mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID} ################## diff --git a/samples/validation/scripts/test_3D_bcs.py b/samples/validation/scripts/test_3D_bcs.py index eecfc710..a3cc5ff9 100644 --- a/samples/validation/scripts/test_3D_bcs.py +++ b/samples/validation/scripts/test_3D_bcs.py @@ -15,15 +15,21 @@ n_success = 0 n_failure = 0 +BC2 = BC1.copy() +BC2.append(["9","9"]) + i = 0 for bcx in BC1 : for bcy in BC1 : - for bcz in BC1: + for bcz in BC2: i+=1 code = bcx[0] + bcx[1] + bcy[0] + bcy[1] + bcz[0] + bcz[1] #Launching test - r = subprocess.run(["./flups_validation_nb"] + ["-res"] + ["8"] + ["8"] + ["8"] + ["-bc"] + bcx + bcy + bcz, capture_output=True) + if(bcz == ["9","9"]): + r = subprocess.run(["./flups_validation_nb"] + ["-res"] + ["8"] + ["8"] + ["1"] + ["-bc"] + bcx + bcy + bcz, capture_output=True) + else: + r = subprocess.run(["./flups_validation_nb"] + ["-res"] + ["8"] + ["8"] + ["8"] + ["-bc"] + bcx + bcy + bcz, capture_output=True) if r.returncode != 0 : print("test %i (BCs : "%i + code + ") failed with error code ",r.returncode) diff --git a/samples/validation/scripts/test_3D_kerns.py b/samples/validation/scripts/test_3D_kerns.py index aaf9bc29..9a50aebd 100644 --- a/samples/validation/scripts/test_3D_kerns.py +++ b/samples/validation/scripts/test_3D_kerns.py @@ -8,9 +8,12 @@ ["4","1","1","4","4","4"], ["4","1","4","4","4","4"], ["4","0","1","4","4","4"], - ["4","0","1","4","4","1"]] + ["4","0","1","4","4","1"], + ["3","3","3","3","3","3"], + ["4","0","1","4","9","9"], + ["3","3","3","3","9","9"]] -Kernels = ['0','2','3','4'] +Kernels = ['0','1','2','3','4'] #Running all combinations of bcs and all kernels n_success = 0 @@ -27,9 +30,15 @@ code = ''.join(bcs) - # Launching test - #+ ["-oversubscribe"] - r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["-np"] + ["1"] + ["1"] + ["2"] + ["-k"] + [kern] + ["-res"] + ["16"] + ["16"] + ["16"] + ["-nres"] + ["1"] + ["-bc"] + bcs, capture_output=True) + # if kernel = LGF, we only do the unbounded, if not, we do everything + # if ((kern=='1' and (bcs==["4","4","4","4","4","4"] or bcs==["3","3","3","3","9","9"])) or (kern != '1') ): + # Launching test + #+ ["-oversubscribe"] + if(bcs[4:6] == ["9","9"]): + # print("kikouuu from "%i + code) + r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["-np"] + ["1"] + ["2"] + ["1"] + ["-k"] + [kern] + ["-res"] + ["16"] + ["16"] + ["1"] + ["-nres"] + ["1"] + ["-bc"] + bcs, capture_output=True) + else: + r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["-np"] + ["1"] + ["2"] + ["1"] + ["-k"] + [kern] + ["-res"] + ["16"] + ["16"] + ["16"] + ["-nres"] + ["1"] + ["-bc"] + bcs, capture_output=True) if r.returncode != 0 : print("test %i (BCs : "%i + code + "with kernel "+kern+") failed with error code ",r.returncode) @@ -51,7 +60,8 @@ print("test %i (BCs : "%i + code + " and k="+ kern+ ") failed with wrong values.") print("/!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ \n") n_failure += 1 - + # else: + # print("test %i (BCs : "%i + code + " and k="+ kern+ ") does not apply") print("%i test succeed out of %i" % (n_success,n_success+n_failure)) exit(n_failure) diff --git a/samples/validation/src/main.cpp b/samples/validation/src/main.cpp index d6d6eea0..c7ab630a 100644 --- a/samples/validation/src/main.cpp +++ b/samples/validation/src/main.cpp @@ -51,7 +51,7 @@ static void print_help(){ printf(" --nresolution, -nres Nr : Nr is the number of higher resolutions that will be tested, with a resolution (R * 2^[0:Nr-1])\n"); printf(" --nsolve, -ns Ns : Ns is the number of times each validation case will be run (for statistics on the profiler) \n"); printf(" --length, -L Lx Ly Lz : Lx,Ly,Lz is the dimension of the physical domain \n"); - printf(" --kernel, -k [0,2-4]: the Green kernel 0=CHAT2, 2=HEJ2, 3=HEJ4, 4=HEJ6 \n"); + printf(" --kernel, -k [0-4]: the Green kernel 0=CHAT2, 1=LGF2, 2=HEJ2, 3=HEJ4, 4=HEJ6 \n"); printf(" --boundary-conditions, -bc \n "); printf(" Bxl Bxr Byl Byr Bzl Bzr : the boundary conditions in x/y/z on each side l/r. 0=EVEN, 1=ODD, 3=PERiodic, 4=UNBounded \n"); printf(" --predefined-test, -pt : runs a predefined validation test with several combination of UNB BCs and all the Green Kernels (excludes -L, -k and -bc) \n "); @@ -101,8 +101,8 @@ int static parse_args(int argc, char *argv[], int nprocs[3], double L[3], FLUPS_ for (int j = 0; j<3;j++){ if (i + j + 1 < argc) { // Make sure we aren't at the end of argv! L[j] = atof(argv[i+j+1]); - if(L[j]<=0.0){ - fprintf(stderr, "L must be >0\n"); + if(L[j]<0.0){ + fprintf(stderr, "L must be >=0\n"); return 1; } } else { //Missing argument diff --git a/samples/validation/src/validation_3d.cpp b/samples/validation/src/validation_3d.cpp index 99046516..4288a411 100644 --- a/samples/validation/src/validation_3d.cpp +++ b/samples/validation/src/validation_3d.cpp @@ -248,6 +248,8 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const manuRHS[dir] = &d2dx2_fUnbSpietz; manuSol[dir] = &fUnbSpietz; } else { + manuRHS[dir] = &fZero; + manuSol[dir] = &fCst; // FLUPS_ERROR("I don''t know how to generate an analytical solution for this combination of BC.", LOCATION); } } @@ -367,6 +369,14 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const double lerr2 = 0.0; double lerri = 0.0; + //determine the volume associated to a mesh + double vol = 1.0; + for (int id = 0; id < 3; id++) { + if (mybc[id][0] != NONE && mybc[id][1] != NONE) { + vol *= h[id]; + } + } + { const int ax0 = flups_topo_get_axis(topo); const int ax1 = (ax0 + 1) % 3; @@ -379,7 +389,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const const double err = sol[id] - field[id]; lerri = max(lerri, fabs(err)); - lerr2 += (err * err) * h[0] * h[1] * h[2]; + lerr2 += (err * err) * vol; } } } @@ -407,7 +417,8 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const fprintf(myfile, "%d %12.12e %12.12e\n", nglob[0], err2, erri); fclose(myfile); } else { - // FLUPS_CHECK(false, "unable to open file %s", filename, LOCATION); + printf("unable to open file %s ! Here is what I would have written:", filename); + printf("%d %12.12e %12.12e\n", nglob[0], err2, erri); } } diff --git a/src/FFTW_plan_dim.cpp b/src/FFTW_plan_dim.cpp index 440b0c70..eadd5fa6 100644 --- a/src/FFTW_plan_dim.cpp +++ b/src/FFTW_plan_dim.cpp @@ -80,6 +80,14 @@ FFTW_plan_dim::FFTW_plan_dim(const int dimID, const double h[3], const double L[ _volfact = h[_dimID]; _kfact = c_2pi / (2.0 * L[_dimID]); _koffset = 0.0; + } else if (mytype == EMPTY) { + _type = EMPTY; + // chosen to have no influence + _normfact = 1.0; + _volfact = 1.0; + _kfact = 0.0; + _koffset = 0.0; + _isSpectral = false; } else { FLUPS_ERROR("Invalid combination of BCs", LOCATION); } @@ -128,9 +136,11 @@ void FFTW_plan_dim::init(const int size[3], const bool isComplex) { _init_mixunbounded(size, isComplex); } else if (_type == PERPER) { //this is the only transform that could give a R2C on data and being spectral for green - _init_periodic(size, isComplex); + _init_periodic(size, isComplex); } else if (_type == UNBUNB) { _init_unbounded(size, isComplex); + } else if (_type == EMPTY) { + FLUPS_INFO_1("No plan required for this direction"); } END_FUNC; } @@ -473,6 +483,8 @@ void FFTW_plan_dim::_allocate_plan_real(const Topology *topo, double* data) { _plan = fftw_plan_r2r_1d(_n_in, data, data, _kind, FFTW_FLAG); } else if (topo->nf() == 2) { + // if the topology is complex and I will do a R2R + // I do a the FFT only on the real part _fftw_stride = memsize[_dimID] * topo->nf(); _plan = fftw_plan_many_r2r(1, (int*)(&_n_in), 1, data, NULL, topo->nf(), memsize[_dimID] * topo->nf(), @@ -565,6 +577,23 @@ void FFTW_plan_dim::_allocate_plan_complex(const Topology *topo, double* data) { } else { FLUPS_CHECK(topo->nf() == 2, "the nf of the input topology has to be 1 = real topo",LOCATION); + FLUPS_INFO("------------------------------------------"); + if (_type == PERPER) { + FLUPS_INFO("## C2C plan created for plan periodic-periodic (=%d)", _type); + } else if (_type == UNBUNB) { + FLUPS_INFO("## C2C plan created for plan unbounded (=%d)", _type); + } + if (_sign == FLUPS_FORWARD) { + FLUPS_INFO("FORWARD transfrom"); + } else if (_sign == FLUPS_BACKWARD) { + FLUPS_INFO("BACKWARD transfrom"); + } + FLUPS_INFO("memsize = %d x %d x %d", memsize[0], memsize[1], memsize[2]); + FLUPS_INFO("dimID = %d", _dimID); + FLUPS_INFO("howmany = %d", _howmany); + FLUPS_INFO("fftw stride = %d", _fftw_stride); + FLUPS_INFO("size n = %d", _n_in); + FLUPS_INFO("------------------------------------------"); _plan = fftw_plan_dft_1d(_n_in, (fftw_complex*)data, (fftw_complex*)data, _sign, FFTW_FLAG); } END_FUNC; @@ -591,6 +620,9 @@ void FFTW_plan_dim::execute_plan(const Topology *topo, double* data) const { FLUPS_INFO(">> Doing plan periodic-periodic for dim %d", _dimID); } else if (_type == UNBUNB) { FLUPS_INFO(">> Doing plan unbounded for dim %d", _dimID); + } else if (_type == EMPTY) { + FLUPS_INFO(">> Doing no plan for dim %d", _dimID); + return; } const int howmany = _howmany; @@ -604,7 +636,16 @@ void FFTW_plan_dim::execute_plan(const Topology *topo, double* data) const { #ifndef NDEBUG for (int id = 0; id < howmany; id++) { // get the memory - double* mydata = (double*)data + id * fftw_stride; + double* mydata; + if (_type == SYMSYM || _type == MIXUNB) { + mydata = (double*)data + id * fftw_stride; + } else if (_type == PERPER || _type == UNBUNB) { + if (_isr2c) { + mydata = (double*)data + id * fftw_stride; + } else { + mydata = (double*)data + id * fftw_stride * 2; + } + } // check the alignment FLUPS_CHECK(fftw_alignment_of(mydata) == 0, "data for FFTW have to be aligned on the FFTW alignement! Alignment is %d with id = %d and fftw_stride = %d", fftw_alignment_of(mydata), id, _fftw_stride, LOCATION); } diff --git a/src/FFTW_plan_dim.hpp b/src/FFTW_plan_dim.hpp index aa112ffc..892a0264 100644 --- a/src/FFTW_plan_dim.hpp +++ b/src/FFTW_plan_dim.hpp @@ -35,6 +35,7 @@ * */ class FFTW_plan_dim { + public: /** * @brief PlanType is the type of plan considered and is computed as the sum of both BoundaryType variables * @@ -47,7 +48,8 @@ class FFTW_plan_dim { SYMSYM = 2, /**< type real 2 real (DCT / DST) : EE (0) , EO/OE (1) , OO (2) */ MIXUNB = 5, /**< type unbounded and a symetry condition: UE/EU (4) , UO/OU (5) */ PERPER = 6, /**< type periodic - periodic: PERPER (6) */ - UNBUNB = 8 /**< type fully unbounded UU (8) */ + UNBUNB = 8, /**< type fully unbounded UU (8) */ + EMPTY = 18 /**< type empty, i.e. this direction is not used */ }; protected: @@ -55,21 +57,21 @@ class FFTW_plan_dim { const int _dimID; /**< @brief the dimension of the plan in the field reference */ const int _sign; /**< @brief FFT_FORWARD (-1) or FFT_BACKWARD(+1) */ - bool _ignoreMode = false; /**< @brief do we have to ignore a mode in the output? k=0 if _shiftgreen=1 or k=end if _shiftgreen = 0*/ - bool _isr2c = false; /**< @brief is this plan the one that changes to complex?*/ - bool _imult = false; /**< @brief boolean to determine if we have to multiply by (i=sqrt(-1)) or not*/ - bool _isSpectral = false; /**< @brief indicate if the Green's function has to be done spectrally (leading to a helmolz problem) */ + bool _ignoreMode = false; /**< @brief do we have to ignore a mode in the output? k=0 if _shiftgreen=1 or k=end if _shiftgreen = 0*/ + bool _isr2c = false; /**< @brief is this plan the one that changes to complex?*/ + bool _imult = false; /**< @brief boolean to determine if we have to multiply by (i=sqrt(-1)) or not*/ + bool _isSpectral = false; /**< @brief indicate if the Green's function has to be done spectrally (leading to a helmolz problem) */ int _fftw_stride = 0; - int _howmany = 0; - int _fieldstart = 0; /**< @brief the starting index for the field copy in the direction of the plan*/ - int _n_in = 0; /**< @brief the number of element in the transform*/ - int _n_out = 0; /**< @brief the number of element coming out of the transform*/ - int _shiftgreen = 0; /**< @brief the shift to set in the Green's function when doing the convolution*/ - double _symstart = 0.0; /**< @brief the first index to be copied for the symmetry done on the Green's function, set to 0 if no symmetry is needed*/ - double _normfact = 0.0; /**< @brief factor you need to multiply to get the transform on the right scaling*/ - double _volfact = 0.0; /**< @brief volume factor*/ - double _kfact = 0.0; /**< @brief multiplication factor to have the correct k numbers*/ - double _koffset = 0.0; /**< @brief additive factor to have the correct k numbers*/ + int _howmany = 0; + int _fieldstart = 0; /**< @brief the starting index for the field copy in the direction of the plan*/ + int _n_in = 1; /**< @brief the number of element in the transform*/ + int _n_out = 1; /**< @brief the number of element coming out of the transform*/ + int _shiftgreen = 0; /**< @brief the shift to set in the Green's function when doing the convolution*/ + double _symstart = 0.0; /**< @brief the first index to be copied for the symmetry done on the Green's function, set to 0 if no symmetry is needed*/ + double _normfact = 1.0; /**< @brief factor you need to multiply to get the transform on the right scaling*/ + double _volfact = 1.0; /**< @brief volume factor*/ + double _kfact = 0.0; /**< @brief multiplication factor to have the correct k numbers*/ + double _koffset = 0.0; /**< @brief additive factor to have the correct k numbers*/ PlanType _type; /**< @brief type of this plan, see #PlanType*/ BoundaryType _bc[2]; /**< @brief boundary condition [0]=LEFT/MIN - [1]=RIGHT/MAX*/ diff --git a/src/Profiler.cpp b/src/Profiler.cpp index ad27a0e7..b5e668ae 100644 --- a/src/Profiler.cpp +++ b/src/Profiler.cpp @@ -258,7 +258,9 @@ void TimerAgent::disp(FILE* file,const int level, const double totalTime){ // printf the important information if (rank == 0) { printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", myname.c_str(), glob_percent, loc_percent, meanTime, selfTime, meanTimePerCount, minTimePerCount, maxTimePerCount, meanCount,meanBandwidth); - fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", _name.c_str(), glob_percent, loc_percent, meanTime, selfTime, meanTimePerCount, minTimePerCount, maxTimePerCount, meanCount,meanBandwidth); + if (file != NULL) { + fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", _name.c_str(), glob_percent, loc_percent, meanTime, selfTime, meanTimePerCount, minTimePerCount, maxTimePerCount, meanCount,meanBandwidth); + } } } // recursive call to the childrens @@ -437,8 +439,13 @@ void Profiler::disp(const std::string ref) { string filename = folder + "/" + _name + "_parent.csv"; file = fopen(filename.c_str(), "w+"); - _timeMap["root"]->writeParentality(file,0); - fclose(file); + + if (file != NULL) { + _timeMap["root"]->writeParentality(file,0); + fclose(file); + } else { + printf("unable to open file %s !", filename.c_str()); + } } @@ -447,7 +454,7 @@ void Profiler::disp(const std::string ref) { //------------------------------------------------------------------------- if (rank == 0) { - string filename = "prof/" + _name + "_time.csv"; + string filename = "./prof/" + _name + "_time.csv"; file = fopen(filename.c_str(), "w+"); } // display the header @@ -475,6 +482,11 @@ void Profiler::disp(const std::string ref) { printf("Max time - the max time / call spend in that timer among the processors\n"); printf("Mean cnt - the total number of time the timer has been called (averaged among the processors)\n"); printf("===================================================================================================================================================\n"); - fclose(file); + + if (file != NULL) { + fclose(file); + } else { + printf("unable to open file for profiling !"); + } } } diff --git a/src/Solver.cpp b/src/Solver.cpp index 8ac6eb86..b9c176b0 100644 --- a/src/Solver.cpp +++ b/src/Solver.cpp @@ -97,9 +97,12 @@ Solver::Solver(Topology *topo, const BoundaryType mybc[3][2], const double h[3], //------------------------------------------------------------------------- /** - For each dim, create the plans given the BC and sort them by type */ //------------------------------------------------------------------------- - for (int id = 0; id < 3; id++) + for (int id = 0; id < 3; id++){ _hgrid[id] = h[id]; + } + // we allocate 3 plans + // it might be empty ones but we keep them since we need some information inside... for (int id = 0; id < 3; id++) { _plan_forward[id] = new FFTW_plan_dim(id, h, L, mybc[id], FLUPS_FORWARD, false); _plan_backward[id] = new FFTW_plan_dim(id, h, L, mybc[id], FLUPS_BACKWARD, false); @@ -111,6 +114,16 @@ Solver::Solver(Topology *topo, const BoundaryType mybc[3][2], const double h[3], _sort_plans(_plan_green); FLUPS_INFO("I will proceed with forward transforms in the following direction order: %d, %d, %d", _plan_forward[0]->dimID(), _plan_forward[1]->dimID(), _plan_forward[2]->dimID()); + //------------------------------------------------------------------------- + /** - compute the real problem size using forward plans, i.e. are we 2D or 3D? */ + //------------------------------------------------------------------------- + _ndim = 3; + for(int id=0; id<3; id++){ + if(_plan_forward[id]->type() == FFTW_plan_dim::EMPTY){ + _ndim --; + } + } + //------------------------------------------------------------------------- /** - Initialise the topos, the plans and the SwitchTopos */ //------------------------------------------------------------------------- @@ -170,8 +183,9 @@ double* Solver::setup(const bool changeTopoComm) { /** - Precompute the communication graph */ //------------------------------------------------------------------------- // get the communication size - int worldsize; + int worldsize, rank; MPI_Comm_size(_topo_phys->get_comm(), &worldsize); + MPI_Comm_rank(_topo_phys->get_comm(), &rank); // initialize the sources, sources weights, destination and destination weights int* sources = (int*)flups_malloc(worldsize * sizeof(int)); @@ -200,11 +214,11 @@ double* Solver::setup(const bool changeTopoComm) { // ranks not having a self block) ! // if we can change the topology, do it for every swithTopo if (changeTopoComm) { - for (int i = 0; i < 3; i++) { + for (int i = 0; i < _ndim; i++) { _switchtopo[i]->add_toGraph(sourcesW, destsW); } } else { - for (int i = 1; i < 3; i++) { + for (int i = 1; i < _ndim; i++) { _switchtopo[i]->add_toGraph(sourcesW, destsW); } } @@ -214,37 +228,30 @@ double* Solver::setup(const bool changeTopoComm) { //------------------------------------------------------------------------- MPI_Comm graph_comm; #ifndef HAVE_METIS - MPI_Dist_graph_create_adjacent(_topo_phys->get_comm(), worldsize, sources, sourcesW, \ - worldsize, dests, destsW, \ - MPI_INFO_NULL, 1, &graph_comm); - - flups_free(sources); - flups_free(sourcesW); - flups_free(dests); - flups_free(destsW); + MPI_Dist_graph_create_adjacent(_topo_phys->get_comm(), worldsize, sources, sourcesW, + worldsize, dests, destsW, + MPI_INFO_NULL, 1, &graph_comm); - #if defined(VERBOSE) && VERBOSE==2 +#if defined(VERBOSE) && VERBOSE == 2 int inD, outD, wei; MPI_Dist_graph_neighbors_count(graph_comm, &inD, &outD, &wei); - printf("[FGRAPH] inD:%d outD:%d wei:%d\n",inD,outD,wei); + printf("[FGRAPH] inD:%d outD:%d wei:%d\n", inD, outD, wei); - int* Sour = (int*) malloc(sizeof(int)*inD); - int* SourW = (int*) malloc(sizeof(int)*inD); - int* Dest = (int*) malloc(sizeof(int)*outD); - int* DestW = (int*) malloc(sizeof(int)*outD); + int *Sour = (int *)malloc(sizeof(int) * inD); + int *SourW = (int *)malloc(sizeof(int) * inD); + int *Dest = (int *)malloc(sizeof(int) * outD); + int *DestW = (int *)malloc(sizeof(int) * outD); - MPI_Dist_graph_neighbors(graph_comm, inD, Sour, SourW, - outD, Dest, DestW); + MPI_Dist_graph_neighbors(graph_comm, inD, Sour, SourW, + outD, Dest, DestW); printf("[FGRAPH] INedges: "); - for (int i=0; iget_comm(), &rank); - //switch indices by a random number: -#ifdef DEV_REORDER_SHIFT - int shift = DEV_REORDER_SHIFT; -#else - int shift = worldsize/2; -#endif + #ifdef DEV_REORDER_SHIFT + int shift = DEV_REORDER_SHIFT; + #else + int shift = worldsize/2; + #endif int* outRanks = (int*) flups_malloc(sizeof(int)*worldsize); if(rank == 0){ @@ -286,18 +290,26 @@ double* Solver::setup(const bool changeTopoComm) { flups_free(outRanks); #endif +//end simulate_graph + + #ifdef PROF + //writing reordering to console + int newrank; + MPI_Comm_rank(graph_comm, &newrank); + printf("[MPI ORDER] %i : %i \n", rank, newrank); + #endif #else //Use METIS to find a smart partition of the graph int *order = (int *)flups_malloc(sizeof(int) * worldsize); - reorder_metis(_topo_phys->get_comm(), sources, sourcesW, dests, destsW, order); + _reorder_metis(_topo_phys->get_comm(), sources, sourcesW, dests, destsW, order); // create a new comm based on the order given by metis MPI_Group group_in, group_out; MPI_Comm_group(_topo_phys->get_comm(), &group_in); //get the group of the current comm MPI_Group_incl(group_in, worldsize, order, &group_out); //manually reorder the ranks MPI_Comm_create(_topo_phys->get_comm(), group_out, &graph_comm); // create the new comm flups_free(order); -#endif +#endif // METIS flups_free(sources); flups_free(sourcesW); @@ -312,7 +324,7 @@ double* Solver::setup(const bool changeTopoComm) { // The first switch topo will serve to redistribute // data following the optimized topology on the cluster, with reordered // ranks - for(int i=0;i<3;i++){ + for(int i=0;i<_ndim;i++){ _topo_hat[i]->change_comm(graph_comm); _topo_green[i]->change_comm(graph_comm); } @@ -320,9 +332,9 @@ double* Solver::setup(const bool changeTopoComm) { _topo_phys->change_comm(graph_comm); } -#ifdef PERF_VERBOSE + #ifdef PERF_VERBOSE _topo_hat[0]->disp_rank(); -#endif + #endif #endif //REORDER_RANKS @@ -352,7 +364,7 @@ double* Solver::setup(const bool changeTopoComm) { // finalize green by replacing some data in full spectral if needed by the kernel, // and by doing a last switch to the field topo if (_prof != NULL) _prof->start("green_final"); - _finalizeGreenFunction(_topo_hat[2], _green, _topo_green[2], _plan_green); + _finalizeGreenFunction(_topo_hat[_ndim-1], _green, _topo_green[_ndim-1], _plan_green); if (_prof != NULL) _prof->stop("green_final"); //------------------------------------------------------------------------- @@ -384,7 +396,7 @@ double* Solver::setup(const bool changeTopoComm) { //------------------------------------------------------------------------- /** - Setup the SwitchTopo, this will take the latest comm into account */ //------------------------------------------------------------------------- - _allocate_switchTopo(3, _switchtopo, &_sendBuf, &_recvBuf); + _allocate_switchTopo(_ndim, _switchtopo, &_sendBuf, &_recvBuf); if (_prof != NULL) _prof->stop("setup"); @@ -413,7 +425,7 @@ Solver::~Solver() { // cleanup the communicator if any #ifdef REORDER_RANKS - MPI_Comm mycomm = _topo_hat[2]->get_comm(); + MPI_Comm mycomm = _topo_hat[_ndim-1]->get_comm(); MPI_Comm_free(&mycomm); #endif _delete_topologies(_topo_hat); @@ -440,7 +452,7 @@ const Topology* Solver::get_innerTopo_physical() { * */ const Topology* Solver::get_innerTopo_spectral() { - return _topo_hat[2]; + return _topo_hat[_ndim-1]; } /** @@ -578,7 +590,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi //------------------------------------------------------------------------- bool isComplex = false; //this refers to the "current state" of the data during dry run int nproc[3]; - for (int ip = 0; ip < 3; ip++) { + for (int ip = 0; ip < _ndim; ip++) { // initialize the plan (for Green only, using info from _plan_forward) planmap[ip]->init(size_tmp, isComplex); // update the size_tmp variable and get the complex information @@ -597,15 +609,12 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi if (!isGreen && topomap != NULL && switchtopo != NULL) { // determines the proc repartition using the previous one if available if (ip == 0) { - //This was to keep an aspect ratio of the pencils in ax0 close to 1: - // pencil_nproc(dimID, nproc, comm_size, size_tmp); - //--------- - //Finally, we opt for the following, which will maximize the total number of subcoms that we will be able to do - // over the 3 switchtopos: + // for the first switchTopo, we keep the number of proc constant in the 3rd direction const int nproc_hint[3] = {topo->nproc(0), topo->nproc(1), topo->nproc(2)}; pencil_nproc_hint(dimID, nproc, comm_size, dimOrder[1], nproc_hint); } else { const int nproc_hint[3] = {current_topo->nproc(0), current_topo->nproc(1), current_topo->nproc(2)}; + // for the other switchtopos, we keep constant the id that is not mine, neither the old topo id pencil_nproc_hint(dimID, nproc, comm_size, planmap[ip - 1]->dimID(), nproc_hint); } // create the new topology corresponding to planmap[ip] in the output layout (size and isComplex) @@ -660,7 +669,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi current_topo = NULL; // isComplex = false; //Change this for Helmolz: we will always need to fill Green in complex if (isGreen && topomap != NULL && switchtopo != NULL) { - for (int ip = 2; ip >= 0; ip--) { + for (int ip = _ndim-1; ip >= 0; ip--) { // get the fastest rotating index int dimID = planmap[ip]->dimID(); // store the correspondance of the transposition @@ -670,10 +679,10 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi } // get the proc repartition - if(ip>1){ + if(ip>_ndim-2){ //it has to be the same as the field in full spectral for(int i = 0;i<3;i++){ - nproc[i]=_topo_hat[2]->nproc(i); + nproc[i]=_topo_hat[_ndim-1]->nproc(i); } }else{ const int nproc_hint[3] = {current_topo->nproc(0), current_topo->nproc(1), current_topo->nproc(2)}; @@ -683,7 +692,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi // create the new topology in the output layout (size and isComplex) topomap[ip] = new Topology(dimID, size_tmp, nproc, isComplex, dimOrder, _fftwalignment, _topo_phys->get_comm()); //switchmap only to be done for topo0->topo1 and topo1->topo2 - if (ip < 2) { + if (ip < _ndim-1) { // get the fieldstart = the point where the old topo has to begin in the new int fieldstart[3] = {0}; // it shouldn't be different from 0 for the moment @@ -736,7 +745,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi //------------------------------------------------------------------------- /** - reset the topologies to real if needed, in order to prepare them for their execution */ //------------------------------------------------------------------------- - for (int ip = 0; ip < 3; ip++) { + for (int ip = 0; ip < _ndim; ip++) { if (!isGreen && planmap[ip]->isr2c() && topomap != NULL) { topomap[ip]->switch2real(); } @@ -801,7 +810,7 @@ void Solver::_deallocate_switchTopo(SwitchTopo **switchtopo, opt_double_ptr *sen */ void Solver::_allocate_plans(const Topology *const topo[3], FFTW_plan_dim *planmap[3], double *data) { BEGIN_FUNC; - for (int ip = 0; ip < 3; ip++) { + for (int ip = 0; ip < _ndim; ip++) { planmap[ip]->allocate_plan(topo[ip], data); } END_FUNC; @@ -826,7 +835,7 @@ void Solver::_allocate_data(const Topology *const topo[3], const Topology *topo_ //------------------------------------------------------------------------- // the biggest size will be along the pencils size_t size_tot = 1; - for (int id = 0; id < 3; id++) { + for (int id = 0; id < _ndim; id++) { size_tot = std::max(topo[id]->memsize(), size_tot); } if (topo_phys != NULL) { @@ -867,17 +876,18 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim //------------------------------------------------------------------------- bool isSpectral[3] = {false}; - double hfact[3]; // multiply the index by this factor to obtain the position (1/2/3 corresponds to x/y/z ) - double kfact[3]; // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z ) - double koffset[3]; // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z ) - double symstart[3]; - double epsilon = _alphaGreen * _hgrid[0]; //the smoothing length scale of the HEJ kernels + double hfact[3] = {0.0, 0.0, 0.0}; // multiply the index by this factor to obtain the position (1/2/3 corresponds to x/y/z ) + double kfact[3] = {0.0, 0.0, 0.0}; // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z ) + double koffset[3] = {0.0, 0.0, 0.0}; // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z ) + double symstart[3] = {0.0, 0.0, 0.0}; + double epsilon = _alphaGreen * _hgrid[0]; //the smoothing length scale of the HEJ kernels - if ((_typeGreen == HEJ_2 || _typeGreen == HEJ_4 || _typeGreen == HEJ_6) && (_hgrid[0] != _hgrid[1] || _hgrid[1] != _hgrid[2])) { - FLUPS_ERROR("You are trying to use a regularized kernel while not having dx=dy=dz.",LOCATION); + if ((_typeGreen == HEJ_2 || _typeGreen == HEJ_4 || _typeGreen == HEJ_6 || _typeGreen == LGF_2) && ((_ndim == 3 && (_hgrid[0] != _hgrid[1] || _hgrid[1] != _hgrid[2])) || (_ndim == 2 && _hgrid[0] != _hgrid[1]))) { + FLUPS_ERROR("You are trying to use a regularized kernel or a LGF while not having dx=dy=dz.", LOCATION); } - for (int ip = 0; ip < 3; ip++) { + // get the infor + determine which green function to use: + for (int ip = 0; ip < _ndim; ip++) { const int dimID = planmap[ip]->dimID(); // get usefull datas isSpectral[dimID] = planmap[ip]->isSpectral(); @@ -891,11 +901,15 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim kfact[dimID] = planmap[ip]->kfact(); koffset[dimID] = planmap[ip]->koffset(); } + if (planmap[ip]->type() == FFTW_plan_dim::EMPTY) { + // kill the hfact to have no influence in the green's functions + hfact[dimID] = 0.0; + } } - // count the number of spectral dimensions + // count the number of spectral dimensions and the green dimension int nbr_spectral = 0; - for (int id = 0; id < 3; id++) { + for (int id = 0; id < _ndim; id++) { if (isSpectral[id]) { nbr_spectral++; } @@ -904,22 +918,21 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim //------------------------------------------------------------------------- /** - get the expression of Green in the full domain*/ //------------------------------------------------------------------------- - if (GREEN_DIM == 3) { - if (nbr_spectral == 0) { - FLUPS_INFO(">> using Green function type %d on 3 dir unbounded",_typeGreen); - cmpt_Green_3D_3dirunbounded_0dirspectral(topo[0], hfact, symstart, green, _typeGreen, epsilon); - } else if (nbr_spectral == 1) { - FLUPS_INFO(">> using Green function of type %d on 2 dir unbounded - 1 dir spectral",_typeGreen); - cmpt_Green_3D_2dirunbounded_1dirspectral(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon); - } else if (nbr_spectral == 2) { - FLUPS_INFO(">> using Green function of type %d on 1 dir unbounded - 2 dir spectral",_typeGreen); - cmpt_Green_3D_1dirunbounded_2dirspectral(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon); - } else if (nbr_spectral == 3) { - FLUPS_INFO(">> using Green function of type %d on 3 dir spectral",_typeGreen); - cmpt_Green_3D_0dirunbounded_3dirspectral(topo[0], kfact, koffset, symstart, green, _typeGreen, epsilon); - } + int n_unbounded = _ndim - nbr_spectral; + if ((n_unbounded) == 3) { + FLUPS_INFO(">> using Green function type %d on 3 dir unbounded", _typeGreen); + cmpt_Green_3dirunbounded(topo[0], hfact, symstart, green, _typeGreen, epsilon); + } else if ((n_unbounded) == 2) { + FLUPS_INFO(">> using Green function of type %d on 2 dir unbounded", _typeGreen); + cmpt_Green_2dirunbounded(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon); + } else if ((n_unbounded) == 1) { + FLUPS_INFO(">> using Green function of type %d on 1 dir unbounded", _typeGreen); + cmpt_Green_1dirunbounded(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon); + } else if ((n_unbounded) == 0) { + FLUPS_INFO(">> using Green function of type %d on 3 dir spectral", _typeGreen); + cmpt_Green_0dirunbounded(topo[0], _hgrid[0], kfact, koffset, symstart, green, _typeGreen, epsilon); } else { - FLUPS_ERROR("Sorry, the Green's function for 2D problems are not provided in this version.", LOCATION); + FLUPS_ERROR("Sorry, the number of unbounded directions does not match: %d = %d - %d", n_unbounded, _ndim, nbr_spectral, LOCATION); } // dump the green func @@ -932,7 +945,7 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim //------------------------------------------------------------------------- /** - compute a symmetry and do the forward transform*/ //------------------------------------------------------------------------- - for (int ip = 0; ip < 3; ip++) { + for (int ip = 0; ip < _ndim; ip++) { const int dimID = planmap[ip]->dimID(); // go to the topology for the plan, if we are not already on it @@ -955,13 +968,13 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim //------------------------------------------------------------------------- // - Explixitely destroying mode 0 ? no need to do that: we impose Green[0] is 0 // in full spectral. - _scaleGreenFunction(topo[2], green, false); + _scaleGreenFunction(topo[_ndim-1], green, false); //------------------------------------------------------------------------- /** - Complete the Green function in 2dirunbounded regularized case: we rewrite on the whole domain * except the plane where k=0 in the spectral direction, as this was correctly computed. */ // No need to scale this as that part of the Green function has a volfact = 1 - if (GREEN_DIM == 3 && nbr_spectral == 1 && (_typeGreen==HEJ_2||_typeGreen==HEJ_4||_typeGreen==HEJ_6)) { + if (_ndim == 3 && nbr_spectral == 1 && (_typeGreen==HEJ_2||_typeGreen==HEJ_4||_typeGreen==HEJ_6)) { int istart_cstm[3] = {0, 0, 0}; //global for (int ip = 0; ip < 3; ip++) { @@ -971,22 +984,9 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim kfact[dimID] = planmap[ip]->kfact(); koffset[dimID] += planmap[ip]->shiftgreen(); //accounts for shifted modes which affect the value of k } - cmpt_Green_3D_0dirunbounded_3dirspectral(topo[2], kfact, koffset, symstart, green, _typeGreen, epsilon, istart_cstm, NULL); + cmpt_Green_0dirunbounded(topo[2], _hgrid[0], kfact, koffset, symstart, green, _typeGreen, epsilon, istart_cstm, NULL); } - //------------------------------------------------------------------------- - // This is what you would fo if you had to fill only the first plan: - // { - // int iend_cstm[3] = {topo->nloc(0), topo->nloc(1), topo->nloc(2)}; //global - // for (int ip = 0; ip < 3; ip++) { - // const int dimID = planmap[ip]->dimID(); - // iend_cstm[ip] = isSpectral[ip] ? 1 - planmap[ip]->shiftgreen() : topo->nloc(ip); //selecting only mode 0 = cte (in per and even-even) - // kfact[dimID] = planmap[ip]->kfact(); - // koffset[dimID] += planmap[ip]->shiftgreen(); //accounts for shifted modes which affect the value of k - // } - // cmpt_Green_3D_0dirunbounded_3dirspectral(topo, kfact, koffset, symstart, green, _typeGreen, epsilon, NULL, iend_cstm); - // } - #ifdef DUMP_DBG hdf5_dump(topo[2], "green_h", green); #endif @@ -1056,15 +1056,19 @@ void Solver::_finalizeGreenFunction(Topology *topo_field, double *green, const T /** - If needed, we create a new switchTopo from the current Green topo to the field one */ //simulate that we have done the transforms - if(planmap[0]->isr2c() || planmap[1]->isr2c() || planmap[2]->isr2c()){ + bool isr2c = false; + for(int id=0; id<_ndim; id++){ + isr2c = isr2c || planmap[id]->isr2c(); + } + if(isr2c){ topo_field->switch2complex(); } - if (planmap[2]->ignoreMode()) { - const int dimID = planmap[2]->dimID(); + if (planmap[_ndim-1]->ignoreMode()) { + const int dimID = planmap[_ndim-1]->dimID(); // get the shift int fieldstart[3] = {0}; - fieldstart[dimID] = -planmap[2]->shiftgreen(); + fieldstart[dimID] = -planmap[_ndim-1]->shiftgreen(); // we do the link between topo of Green and the field topo #if defined(COMM_NONBLOCK) SwitchTopo *switchtopo = new SwitchTopo_nb(topo, topo_field, fieldstart, NULL); @@ -1092,7 +1096,7 @@ void Solver::_finalizeGreenFunction(Topology *topo_field, double *green, const T FLUPS_CHECK(topo->nglob(2) == topo_field->nglob(2), "Topo of Green has to be the same as Topo of field", LOCATION); } //coming back (only if the last plan was r2c. No need it if was c2c or r2r...) - if(planmap[2]->isr2c()){ + if(planmap[_ndim-1]->isr2c()){ topo_field->switch2real(); } END_FUNC; @@ -1144,18 +1148,16 @@ void Solver::solve(double *field, double *rhs, const SolverType type) { do_FFT(mydata, FLUPS_FORWARD); #ifdef DUMP_DBG - hdf5_dump(_topo_hat[2], "rhs_h", mydata); + hdf5_dump(_topo_hat[_ndim-1], "rhs_h", mydata); #endif //------------------------------------------------------------------------- /** - Perform the magic */ //------------------------------------------------------------------------- do_mult(mydata, type); - if (_prof != NULL) _prof->stop("domagic"); - #ifdef DUMP_DBG // io if needed - hdf5_dump(_topo_hat[2], "sol_h", mydata); + hdf5_dump(_topo_hat[_ndim-1], "sol_h", mydata); #endif //------------------------------------------------------------------------- /** - go back to reals */ @@ -1169,7 +1171,7 @@ void Solver::solve(double *field, double *rhs, const SolverType type) { #ifdef DUMP_DBG // io if needed - hdf5_dump(_topo_phys, "sol", myfield); + hdf5_dump(_topo_phys, "sol", field); #endif // stop the whole timer if (_prof != NULL) _prof->stop("solve"); @@ -1281,7 +1283,7 @@ void Solver::do_FFT(double *data, const int sign){ opt_double_ptr mydata = data; if (sign == FLUPS_FORWARD) { - for (int ip = 0; ip < 3; ip++) { + for (int ip = 0; ip < _ndim; ip++) { // go to the correct topo _switchtopo[ip]->execute(mydata, FLUPS_FORWARD); // run the FFT @@ -1294,7 +1296,7 @@ void Solver::do_FFT(double *data, const int sign){ } } } else { //FLUPS_BACKWARD - for (int ip = 2; ip >= 0; ip--) { + for (int ip = _ndim-1; ip >= 0; ip--) { if (_prof != NULL) _prof->start("fftw"); _plan_backward[ip]->execute_plan(_topo_hat[ip], mydata); if (_prof != NULL) _prof->stop("fftw"); @@ -1321,7 +1323,7 @@ void Solver::do_mult(double *data, const SolverType type){ if (_prof != NULL) _prof->start("domagic"); if (type == SRHS) { - if (!_topo_hat[2]->isComplex()) { + if (!_topo_hat[_ndim-1]->isComplex()) { //-> there is only the case of 3dirSYM in which we could stay real for the whole process if (_nbr_imult == 0) dothemagic_rhs_real(data); @@ -1339,19 +1341,22 @@ void Solver::do_mult(double *data, const SolverType type){ } else { FLUPS_CHECK(false, "type of solver %d not implemented", type, LOCATION); - // - Obtain what's needed to compute k - double kfact[3]; // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z ) - double koffset[3]; // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z ) + // - Obtain what's needed to compute k + double kfact[3] = {0.0, 0.0, 0.0}; // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z ) + double koffset[3] = {0.0, 0.0, 0.0}; // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z ) - for (int ip = 0; ip < 3; ip++) { + for (int ip = 0; ip < _ndim; ip++) { const int dimID = _plan_forward[ip]->dimID(); kfact[dimID] = _plan_forward[ip]->kfact(); koffset[dimID] = _plan_forward[ip]->koffset() + _plan_forward[ip]->shiftgreen(); } // todo: if topo is not complex, need to handle the fact that we will multiply by i* + // WARNING: need to adapt the LDA of the topology WARNING + //dothemagic... } + if (_prof != NULL) _prof->stop("domagic"); END_FUNC; } @@ -1362,10 +1367,11 @@ void Solver::do_mult(double *data, const SolverType type){ */ void Solver::dothemagic_rhs_real(double *data) { BEGIN_FUNC; - FLUPS_CHECK(_topo_hat[2]->nf() == 1, "The topo_hat[2] has to be real", LOCATION); + int cdim = _ndim-1; // get current dim + FLUPS_CHECK(_topo_hat[cdim]->nf() == 1, "The topo_hat[2] has to be real", LOCATION); // get the axis - const int ax0 = _topo_hat[2]->axis(); + const int ax0 = _topo_hat[cdim]->axis(); const int ax1 = (ax0 + 1) % 3; const int ax2 = (ax0 + 2) % 3; // get the factors @@ -1375,12 +1381,12 @@ void Solver::dothemagic_rhs_real(double *data) { FLUPS_ASSUME_ALIGNED(mydata,FLUPS_ALIGNMENT); FLUPS_ASSUME_ALIGNED(mygreen,FLUPS_ALIGNMENT); { - const size_t onmax = _topo_hat[2]->nloc(ax1) * _topo_hat[2]->nloc(ax2); - const size_t inmax = _topo_hat[2]->nloc(ax0); - const int nmem[3] = {_topo_hat[2]->nmem(0), _topo_hat[2]->nmem(1), _topo_hat[2]->nmem(2)}; + const size_t onmax = _topo_hat[cdim]->nloc(ax1) * _topo_hat[cdim]->nloc(ax2); + const size_t inmax = _topo_hat[cdim]->nloc(ax0); + const int nmem[3] = {_topo_hat[cdim]->nmem(0), _topo_hat[cdim]->nmem(1), _topo_hat[cdim]->nmem(2)}; - FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); - FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); + FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); + FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); // do the loop #pragma omp parallel for default(none) proc_bind(close) schedule(static) firstprivate(onmax, inmax, nmem, mydata, mygreen, normfact, ax0) @@ -1403,9 +1409,10 @@ void Solver::dothemagic_rhs_real(double *data) { */ void Solver::dothemagic_rhs_complex_nmult0(double *data) { BEGIN_FUNC; - FLUPS_CHECK(_topo_hat[2]->nf() == 2, "The topo_hat[2] (field) has to be complex", LOCATION); + int cdim = _ndim-1; // get current dim + FLUPS_CHECK(_topo_hat[cdim]->nf() == 2, "The topo_hat[2] (field) has to be complex", LOCATION); // get the axis - const int ax0 = _topo_hat[2]->axis(); + const int ax0 = _topo_hat[cdim]->axis(); const int ax1 = (ax0 + 1) % 3; const int ax2 = (ax0 + 2) % 3; // get the factors @@ -1415,12 +1422,12 @@ void Solver::dothemagic_rhs_complex_nmult0(double *data) { FLUPS_ASSUME_ALIGNED(mydata,FLUPS_ALIGNMENT); FLUPS_ASSUME_ALIGNED(mygreen,FLUPS_ALIGNMENT); { - const size_t onmax = _topo_hat[2]->nloc(ax1) * _topo_hat[2]->nloc(ax2); - const size_t inmax = _topo_hat[2]->nloc(ax0); - const int nmem[3] = {_topo_hat[2]->nmem(0), _topo_hat[2]->nmem(1), _topo_hat[2]->nmem(2)}; + const size_t onmax = _topo_hat[cdim]->nloc(ax1) * _topo_hat[cdim]->nloc(ax2); + const size_t inmax = _topo_hat[cdim]->nloc(ax0); + const int nmem[3] = {_topo_hat[cdim]->nmem(0), _topo_hat[cdim]->nmem(1), _topo_hat[cdim]->nmem(2)}; - FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); - FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); + FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); + FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION); // do the loop #pragma omp parallel for default(none) proc_bind(close) schedule(static) firstprivate(onmax, inmax, nmem, mydata, mygreen, normfact, ax0) @@ -1471,3 +1478,288 @@ void Solver::dothemagic_rhs_complex_nmult3(double *data) { FLUPS_CHECK(false, "not implemented yet", LOCATION); END_FUNC; } + + +/** + * @brief reorder the MPI-ranks using metis + * + * @warning this functions assume an evenly distributed amount of procs on the nodes + * + * @param comm + * @param sources + * @param sourcesW + * @param dests + * @param destsW + * @param n_nodes + * @param order + */ +void Solver::_reorder_metis(MPI_Comm comm, int *sources, int *sourcesW, int *dests, int *destsW, int *order) { + int comm_size; + int comm_rank; + MPI_Comm_rank(comm, &comm_rank); + MPI_Comm_size(comm, &comm_size); + +#ifdef HAVE_METIS + + //------------------------------------------------------------------------- + /** - get the total number of nodes */ + //------------------------------------------------------------------------- + // create a group where everybody can create a shared memory region + MPI_Comm nodecomm; + MPI_Info mpinfo; + MPI_Info_create(&mpinfo); + MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, comm_rank, mpinfo, &nodecomm); + // we store the comm size + int local_nodesize; + MPI_Comm_size(nodecomm, &local_nodesize); + + // gather on proc 1 the number of proc per node + int *vec_nodesize = (int *)flups_malloc(sizeof(int) * comm_size); + MPI_Allgather(&local_nodesize, 1, MPI_INT, vec_nodesize, 1, MPI_INT, comm); + + // count the number of partitions we'll need: + int n_nodes = 0; + int id = 0; + while( id < comm_size){ + id += vec_nodesize[id]; + n_nodes++; + } + +#ifdef DEV_SIMULATE_GRAPHCOMM + //CHEATING: imposing that there will be 2 groups (there needs to be at least 4 procs) + n_nodes = 2; + for (int ip = 0; ip 0 && i != comm_rank) n_neighbours++; + } + // allocate the number of neighbours and their weights + int *neighbours = (int *)flups_malloc(sizeof(int) * n_neighbours); + int *weights = (int *)flups_malloc(sizeof(int) * n_neighbours); + n_neighbours = 0; + for (int i = 0; i < comm_size; ++i) { + if (sourcesW[i] + destsW[i] > 0 && i != comm_rank) { + neighbours[n_neighbours] = i; + weights[n_neighbours] = sourcesW[i] + destsW[i]; + n_neighbours++; + } + } + + //------------------------------------------------------------------------- + /** - build the graph on proc 0 and ask for partioning + * The graph structure follows metis rules: + * the edges (= id of the destination of the edges) starting from proc k are located + * from adj[xadj[k]] to adj[xadj[k+1]-1] + * Same structure is used for the weights with the ajdw + * */ + //------------------------------------------------------------------------- + if (comm_rank == 0) { + int *xadj = (int *)flups_malloc((comm_size + 1) * sizeof(int)); + int *nadj = (int *)flups_malloc((comm_size) * sizeof(int)); + + // get the number of neighbours from everybody + MPI_Gather(&n_neighbours, 1, MPI_INT, nadj, 1, MPI_INT, 0, comm); + // get the starting indexes of the neighbour description for everybody + xadj[0] = 0; + for (int i = 0; i < comm_size; ++i) { + xadj[i + 1] = xadj[i] + nadj[i]; + } + + // allocate the adjency list + weights and fill it with the neighbour list from everybody + int *adj = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); + int *adjw = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); + MPI_Gatherv(neighbours, n_neighbours, MPI_INT, adj, nadj, xadj, MPI_INT, 0, comm); + MPI_Gatherv(weights, n_neighbours, MPI_INT, adjw, nadj, xadj, MPI_INT, 0, comm); +#ifdef PROF + { + //writing graph to file, CSR format + string filename = "prof/graph.csr"; + FILE* file = fopen(filename.c_str(), "w+"); + if(file==NULL){FLUPS_ERROR("Could not create file in ./prof. Did you create the folder?",LOCATION);} + for(int i=0; i<=comm_size; i++){ + fprintf(file, "%d ",xadj[i]); + } + fprintf(file,"\n"); + for(int i=0; i 0; --ip) { + rids[ip] = rids[ip-1]; //offset by 1 + } + rids[0] = 0; + if(!succeed){ + FLUPS_INFO("METIS: attempt failed."); + }else{ + // assign the rank value and redistribute + for (int i = 0; i < comm_size; ++i) { + order[i] = rids[part[i]]++ ; + } + break; + } + } + // check that we did not reach max_iter + if(iter>=max_iter){ + FLUPS_WARNING("Failed to find a graph partitioning with the current allocation. I will not change the rank orderegin in the graph_comm!",LOCATION); + for (int i = 0; i < comm_size; ++i) { + order[i] = i; + } + } + + // result of the partitioning + #ifdef PART_OF_EQUAL_SIZE + FLUPS_INFO("I have partitioned the graph in %d chunks of size %d\n",n_nodes,comm_size/n_nodes); + #else + FLUPS_INFO("I have partitioned the graph in %d chunks.",n_nodes); + #endif +#ifdef PROF + //writing graph to file, CSR format + string filename = "prof/partitions.txt"; + FILE* file = fopen(filename.c_str(), "w+"); + #ifdef PART_OF_EQUAL_SIZE + fprintf(file,"%d partitions of size %d\n",n_nodes,comm_size/n_nodes); + #else + fprintf(file,"%d partitions of size:\n",n_nodes); + for(int i=0; i #include "FFTW_plan_dim.hpp" #include "defines.hpp" -#include "green_functions_3d.hpp" +#include "green_functions.hpp" #include "hdf5_io.hpp" #include "SwitchTopo.hpp" @@ -70,6 +70,7 @@ class Solver { // even is the dimension is 2, we allocate arrays of dimension 3 protected: + int _ndim = 3; /**@brief the dimension of the problem, i.e. 2D or 3D */ int _fftwalignment = 0; /**< @brief alignement assumed by the FFTW Solver */ int _orderdiff = 0; /**< @brief the order of derivative (spectral = 0) */ int _nbr_imult = 0; /**< @brief the number of time we have applied a DST transform */ @@ -141,6 +142,7 @@ class Solver { */ void _allocate_switchTopo(const int ntopo, SwitchTopo** switchtopo, opt_double_ptr* send_buff, opt_double_ptr* recv_buff); void _deallocate_switchTopo(SwitchTopo** switchtopo, opt_double_ptr* send_buff, opt_double_ptr* recv_buff); + void _reorder_metis(MPI_Comm comm, int *sources, int *sourcesW, int *dests, int *destsW, int *order); /**@} */ /** @@ -182,7 +184,7 @@ class Solver { */ size_t get_allocSize() { size_t size_tot = 1; - for (int id = 0; id < 3; id++) { + for (int id = 0; id < _ndim; id++) { size_tot = std::max(_topo_hat[id]->memsize(), size_tot); } return size_tot; @@ -193,6 +195,7 @@ class Solver { * * @param kfact multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z ) * @param koffset add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z ) + * @param symstart returns the first index of the symmetry */ void get_spectralInfo(double kfact[3], double koffset[3], double symstart[3]) { for (int ip = 0; ip < 3; ip++) { @@ -231,61 +234,61 @@ class Solver { /**@} */ }; -/** - * @brief compute the pencil layout given the pencil direction - * - * The pencil layout is computed so as to obtain pencils with an aspect - * ratio close to 1, i.e. the same number points per proc in the the 2 other directions than id. - * - * @param id the pencil direction - * @param nproc the number of proc in each direction - * @param comm_size the total communicator size - * @param nglob the domain size in each direction - */ -static inline void pencil_nproc(const int id, int nproc[3], const int comm_size, const int nglob[3]) { - int id1 = (id + 1) % 3; - int id2 = (id + 2) % 3; - - nproc[id] = 1; - - double n1 = 1; - double n2 = (double) comm_size; - //invert indexes so that id1 is the dimension where nglob is the smallest - if( nglob[id1] > nglob[id2]){ - const int tmp = id2; - id2 = id1; - id1 = tmp; - } - double np1 = (double) nglob[id1]; - double np2 = (double) nglob[id2]/ comm_size; - const double npsquare = sqrt((double)(nglob[id1] * nglob[id2]) / comm_size); //target number of points per dimension - - //keep on deviding as long as ncurr/2>nsquare - //we want to leave n1=1, and we do not want to reach n2=1 - while ( (np1 > npsquare) && std::floor(n2*.5) == n2*.5) { - n1 *= 2.0; - np1 *= 0.5; - n2 *= 0.5; - np2 *= 2.0; - } - nproc[id1] = (int)n1; - nproc[id2] = (int)n2; - - FLUPS_INFO("my proc repartition is %d %d %d",nproc[0],nproc[1],nproc[2]); - if(nproc[0] * nproc[1] * nproc[2] != comm_size){ - FLUPS_ERROR("the number of proc %d %d %d does not match the comm size %d", nproc[0], nproc[1], nproc[2], comm_size, LOCATION); - } - if(comm_size>8 && (n1==1||n2==1)){ - FLUPS_WARNING("A slab decomposition was used instead of a pencil decomposition in direction %d. This may increase communication time.",id, LOCATION); - //Loss of performance may originate in slab decompositions, as an actual All2All communication is required, whereas with the pencils, - // we manage to do All2All communications in subcoms of size sqrt(comm_size). - //We could prevent this to happen by doing something like: - // if(n2==1){ - // n2*=2; - // n1*=0.5; - // } - } -} +// /** +// * @brief compute the pencil layout given the pencil direction +// * +// * The pencil layout is computed so as to obtain pencils with an aspect +// * ratio close to 1, i.e. the same number points per proc in the the 2 other directions than id. +// * +// * @param id the pencil direction +// * @param nproc the number of proc in each direction +// * @param comm_size the total communicator size +// * @param nglob the domain size in each direction +// */ +// static inline void pencil_nproc(const int id, int nproc[3], const int comm_size, const int nglob[3]) { +// int id1 = (id + 1) % 3; +// int id2 = (id + 2) % 3; + +// nproc[id] = 1; + +// double n1 = 1; +// double n2 = (double) comm_size; +// //invert indexes so that id1 is the dimension where nglob is the smallest +// if( nglob[id1] > nglob[id2]){ +// const int tmp = id2; +// id2 = id1; +// id1 = tmp; +// } +// double np1 = (double) nglob[id1]; +// double np2 = (double) nglob[id2]/ comm_size; +// const double npsquare = sqrt((double)(nglob[id1] * nglob[id2]) / comm_size); //target number of points per dimension + +// //keep on deviding as long as ncurr/2>nsquare +// //we want to leave n1=1, and we do not want to reach n2=1 +// while ( (np1 > npsquare) && std::floor(n2*.5) == n2*.5) { +// n1 *= 2.0; +// np1 *= 0.5; +// n2 *= 0.5; +// np2 *= 2.0; +// } +// nproc[id1] = (int)n1; +// nproc[id2] = (int)n2; + +// FLUPS_INFO("my proc repartition is %d %d %d",nproc[0],nproc[1],nproc[2]); +// if(nproc[0] * nproc[1] * nproc[2] != comm_size){ +// FLUPS_ERROR("the number of proc %d %d %d does not match the comm size %d", nproc[0], nproc[1], nproc[2], comm_size, LOCATION); +// } +// if(comm_size>8 && (n1==1||n2==1)){ +// FLUPS_WARNING("A slab decomposition was used instead of a pencil decomposition in direction %d. This may increase communication time.",id, LOCATION); +// //Loss of performance may originate in slab decompositions, as an actual All2All communication is required, whereas with the pencils, +// // we manage to do All2All communications in subcoms of size sqrt(comm_size). +// //We could prevent this to happen by doing something like: +// // if(n2==1){ +// // n2*=2; +// // n1*=0.5; +// // } +// } +// } /** * @brief compute the pencil layout given the pencil direction, compatible with another pencil decoposition given as a hint @@ -293,8 +296,9 @@ static inline void pencil_nproc(const int id, int nproc[3], const int comm_size, * @param id the pencil direction * @param nproc the number of proc in each direction * @param comm_size the total communicator size - * @param id_hint the axis of the pencils in another decomposition, which we want this decomposition to be compatible with + * @param id_hint the axis where we allow the proc decomposition to change * @param nproc_hint the number of procs in the other decomposition we want to be compatible with + * */ static inline void pencil_nproc_hint(const int id, int nproc[3], const int comm_size, const int id_hint, const int nproc_hint[3]) { // get the id shared between the hint topo @@ -317,306 +321,5 @@ static inline void pencil_nproc_hint(const int id, int nproc[3], const int comm_ } } -/** - * @brief reorder the MPI-ranks using metis - * - * @warning this functions assume an evenly distributed amount of procs on the nodes - * - * @param comm - * @param sources - * @param sourcesW - * @param dests - * @param destsW - * @param n_nodes - * @param order - */ -static void reorder_metis(MPI_Comm comm, int *sources, int *sourcesW, int *dests, int *destsW, int *order) { - int comm_size; - int comm_rank; - MPI_Comm_rank(comm, &comm_rank); - MPI_Comm_size(comm, &comm_size); - -#ifdef HAVE_METIS - - //------------------------------------------------------------------------- - /** - get the total number of nodes */ - //------------------------------------------------------------------------- - // create a group where everybody can create a shared memory region - MPI_Comm nodecomm; - MPI_Info mpinfo; - MPI_Info_create(&mpinfo); - MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, comm_rank, mpinfo, &nodecomm); - // we store the comm size - int local_nodesize; - MPI_Comm_size(nodecomm, &local_nodesize); - -// #define PART_OF_EQUAL_SIZE -#ifdef PART_OF_EQUAL_SIZE - //_______ OPTION 1 with gcd (suboptimal)________ - // gather on each proc the gcd - int *vec_nodesize = (int *)flups_malloc(sizeof(int) * comm_size); - MPI_Allgather(&local_nodesize, 1, MPI_INT, vec_nodesize, 1, MPI_INT, comm); - // get the Greatest Common Divider among every process - int nodesize = comm_size; - for (int ip = 0; ip < comm_size; ip++) { - nodesize = gcd(nodesize, vec_nodesize[ip]); - } - // store the number of nodes - int n_nodes = comm_size / nodesize; - double* tpwgts = NULL; -#else - //_______ OPTION 2 with various size partitions________ - // gather on proc 1 the number of proc per node - int *vec_nodesize = (int *)flups_malloc(sizeof(int) * comm_size); - MPI_Allgather(&local_nodesize, 1, MPI_INT, vec_nodesize, 1, MPI_INT, comm); - - // count the number of partitions we'll need: - int n_nodes = 0; - int id = 0; - while( id < comm_size){ - id += vec_nodesize[id]; - n_nodes++; - } - -#ifdef DEV_SIMULATE_GRAPHCOMM - //CHEATING: imposing that there will be 2 groups (there needs to be at least 4 procs) - n_nodes = 2; - for (int ip = 0; ip 0 && i != comm_rank) n_neighbours++; - } - // allocate the number of neighbours and their weights - int *neighbours = (int *)flups_malloc(sizeof(int) * n_neighbours); - int *weights = (int *)flups_malloc(sizeof(int) * n_neighbours); - n_neighbours = 0; - for (int i = 0; i < comm_size; ++i) { - if (sourcesW[i] + destsW[i] > 0 && i != comm_rank) { - neighbours[n_neighbours] = i; - weights[n_neighbours] = sourcesW[i] + destsW[i]; - n_neighbours++; - } - } - - //------------------------------------------------------------------------- - /** - build the graph on proc 0 and ask for partioning - * The graph structure follows metis rules: - * the edges (= id of the destination of the edges) starting from proc k are located - * from adj[xadj[k]] to adj[xadj[k+1]-1] - * Same structure is used for the weights with the ajdw - * */ - //------------------------------------------------------------------------- - if (comm_rank == 0) { - int *xadj = (int *)flups_malloc((comm_size + 1) * sizeof(int)); - int *nadj = (int *)flups_malloc((comm_size) * sizeof(int)); - - // get the number of neighbours from everybody - MPI_Gather(&n_neighbours, 1, MPI_INT, nadj, 1, MPI_INT, 0, comm); - // get the starting indexes of the neighbour description for everybody - xadj[0] = 0; - for (int i = 0; i < comm_size; ++i) { - xadj[i + 1] = xadj[i] + nadj[i]; - } - - // allocate the adjency list + weights and fill it with the neighbour list from everybody - int *adj = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); - int *adjw = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); - MPI_Gatherv(neighbours, n_neighbours, MPI_INT, adj, nadj, xadj, MPI_INT, 0, comm); - MPI_Gatherv(weights, n_neighbours, MPI_INT, adjw, nadj, xadj, MPI_INT, 0, comm); -#ifdef PROF - { - //writing graph to file, CSR format - string filename = "prof/graph.csr"; - FILE* file = fopen(filename.c_str(), "w+"); - if(file==NULL){FLUPS_ERROR("Could not create file in ./prof. Did you create the folder?",LOCATION);} - for(int i=0; i<=comm_size; i++){ - fprintf(file, "%d ",xadj[i]); - } - fprintf(file,"\n"); - for(int i=0; i 0; --ip) { - rids[ip] = rids[ip-1]; //offset by 1 - } - rids[0] = 0; - if(!succeed){ - FLUPS_INFO("METIS: attempt failed."); - }else{ - // assign the rank value and redistribute - for (int i = 0; i < comm_size; ++i) { - order[i] = rids[part[i]]++ ; - } - break; - } - } - // check that we did not reach max_iter - if(iter>=max_iter){ - FLUPS_WARNING("Failed to find a graph partitioning with the current allocation. I will not change the rank orderegin in the graph_comm!",LOCATION); - for (int i = 0; i < comm_size; ++i) { - order[i] = i; - } - } - - // result of the partitioning - #ifdef PART_OF_EQUAL_SIZE - FLUPS_INFO("I have partitioned the graph in %d chunks of size %d\n",n_nodes,comm_size/n_nodes); - #else - FLUPS_INFO("I have partitioned the graph in %d chunks.",n_nodes); - #endif -#ifdef PROF - //writing graph to file, CSR format - string filename = "prof/partitions.txt"; - FILE* file = fopen(filename.c_str(), "w+"); - #ifdef PART_OF_EQUAL_SIZE - fprintf(file,"%d partitions of size %d\n",n_nodes,comm_size/n_nodes); - #else - fprintf(file,"%d partitions of size:\n",n_nodes); - for(int i=0; i -void SwitchTopo::_cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int oend[3],int nByBlock[3]){ +/** + * @brief computes nByBlock, the unit block size + * + * @param istart the starting indexes on this rank in the input topology + * @param iend the end indexes on this rank in the input topology + * @param ostart the starting indexes on this rank in the output topology + * @param oend the end indexes on this rank in the output topology + * @param nByBlock + */ +void SwitchTopo::_cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int oend[3], int nByBlock[3]) { BEGIN_FUNC; int comm_size; - MPI_Comm_size(_inComm,&comm_size); + MPI_Comm_size(_inComm, &comm_size); int* onProc = (int*)flups_malloc(comm_size * sizeof(int)); for (int id = 0; id < 3; id++) { // get the gcd between send and receive - int isend = (iend[id] - istart[id]); - int osend = (oend[id] - ostart[id]); - // // compute the exchanged size same if from the input or output - // MPI_Allreduce(&isend, &_exSize[id], 1, MPI_INT, MPI_SUM, _inComm); - // // we have summed the size nproc(id+1)*size nproc(id+2) * size, so we divide - // _exSize[id] /= _topo_in->nproc((id+1)%3) * _topo_in->nproc((id+2)%3); - - // // if I am the last one, I decrease the blocksize by one if needed - // if (_topo_in->rankd(id) == (_topo_in->nproc(id) - 1)) { - // isend = isend - _exSize[id] % 2; - // } - // if (_topo_out->rankd(id) == (_topo_out->nproc(id) - 1)) { - // osend = osend - _exSize[id] % 2; - // } - int npoints = gcd(isend,osend); + int isend = (iend[id] - istart[id]); + int osend = (oend[id] - ostart[id]); + int npoints = gcd(isend, osend); // gather on each proc the gcd MPI_Allgather(&npoints, 1, MPI_INT, onProc, 1, MPI_INT, _inComm); // get the Greatest Common Divider among every process @@ -72,75 +69,7 @@ void SwitchTopo::_cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int o } /** - * @brief compute the destination rank for every block on the current processor - * - * @param nBlock the number of block on the current proc (012-indexing) - * @param blockIDStart the global starting id of the block (0,0,0) in the current topo - * @param topo the destination topology - * @param nBlockOnProc the number of block on each proc in the destination topology - * @param destRank the computed destination rank for each block - */ -void SwitchTopo::_cmpt_blockDestRankAndTag(const int nBlock[3], const int blockIDStart[3], const Topology *topo, const int *startBlockEachProc, const int *nBlockEachProc, int *destRank, int *destTag) { - BEGIN_FUNC; - int comm_size; - MPI_Comm_size(_inComm, &comm_size); - // go through each block - for (int ib = 0; ib < nBlock[0] * nBlock[1] * nBlock[2]; ib++) { - // get the split index - int bidv[3]; - localSplit(ib, nBlock, 0, bidv, 1); - // initialize the destrank - int global_bid[3] = {0, 0, 0}; - int destrankd[3] = {0, 0, 0}; - // determine the dest rank for each dimension - for (int id = 0; id < 3; id++) { - // we go trough every rank on the given dim - global_bid[id] = bidv[id] + blockIDStart[id]; - for (int ir = 0; ir < topo->nproc(id); ir++) { - // update the destination rank - destrankd[id] = ir; - - // update the number of block already visited - int minBlockLocal = startBlockEachProc[id * comm_size + rankindex(destrankd, topo)]; - int maxBlockLocal = minBlockLocal + nBlockEachProc[id * comm_size + rankindex(destrankd, topo)]; - - // if we have already visited more block than my block id then we have found the destination rank - if (global_bid[id] >= minBlockLocal && global_bid[id] < maxBlockLocal) { - break; - } - } - } - - // get the global destination rank - const int destrank = rankindex(destrankd, topo); - // get the global destination tag - destRank[ib] = destrank; - - FLUPS_CHECK(destrank < comm_size, "the destination rank is > than the commsize: %d = %d %d %d vs %d", destrank, destrankd[0], destrankd[1], destrankd[2], comm_size, LOCATION); - if (destTag != NULL) { - // get the number of block in the destination rank - int dest_nBlock[3] = {nBlockEachProc[0 * comm_size + destrank], - nBlockEachProc[1 * comm_size + destrank], - nBlockEachProc[2 * comm_size + destrank]}; - // store the destination tag = local block index in the destination rank - // get the number of block in the destination rank - int dest_iBlock[3] = {global_bid[0]-startBlockEachProc[0 * comm_size + destrank], - global_bid[1]-startBlockEachProc[1 * comm_size + destrank], - global_bid[2]-startBlockEachProc[2 * comm_size + destrank]}; - // create the tag - destTag[ib] = localIndex(0, dest_iBlock[0], dest_iBlock[1], dest_iBlock[2], 0, dest_nBlock, 1); - } - } - - //if the communicator of topo is not the same as the reference communicator, we need to adapt the destrank - //for now, it has been computed in the comm of topo. We thus change for the reference _inComm. - translate_ranks(nBlock[0] * nBlock[1] * nBlock[2], destRank, topo->get_comm(), _inComm); - - END_FUNC; -} - -/** - * @brief compute the destination rank for every block on the current processor + * @brief compute the destination rank for every unit block on the current processor * * @param nBlock the number of block on the current proc (012-indexing) * @param blockIDStart the global starting id of the block (0,0,0) in the current topo @@ -164,12 +93,13 @@ void SwitchTopo::_cmpt_blockDestRank(const int nBlock[3],const int nByBlock[3],c // determine the dest rank for each dimension for (int id = 0; id < 3; id++) { // get the global starting index in my current topo = topo_in - global_id[id] = bidv[id] * nByBlock[id] + topo_in->nbyproc(id) * topo_in->rankd(id) + istart[id]; + global_id[id] = bidv[id] * nByBlock[id] + topo_in->cmpt_start_id(id) + istart[id]; // the (0,0,0) in topo in is located in shift in topo_out - FLUPS_INFO_4("block %d starts at %d / %d ",ib,(global_id[id] + shift[id]),topo_out->nbyproc(id)); - destrankd[id] = (global_id[id] + shift[id]) / topo_out->nbyproc(id); + FLUPS_INFO_4("block %d starts at %d ",ib,(global_id[id] + shift[id])); + // destrankd[id] = (global_id[id] + shift[id]) / topo_out->nbyproc(id); + destrankd[id] = topo_out->cmpt_rank_fromid(global_id[id] + shift[id],id); // if the last proc has more data than the other ones, we need to max the destrank - destrankd[id] = std::min(destrankd[id],topo_out->nproc(id)-1); + // destrankd[id] = std::min(destrankd[id],topo_out->nproc(id)-1); } destRank[ib] = rankindex(destrankd, topo_out); @@ -183,40 +113,7 @@ void SwitchTopo::_cmpt_blockDestRank(const int nBlock[3],const int nByBlock[3],c } /** - * @brief compute the size of the blocks inside the given topology - * - * @param nBlock - * @param blockIDStart - * @param nByBlock - * @param topo - * @param nBlockSize - */ -void SwitchTopo::_cmpt_blockSize(const int nBlock[3], const int blockIDStart[3], const int nByBlock[3], const int istart[3], const int iend[3], int *nBlockSize[3]) { - BEGIN_FUNC; - // go through each block - for (int ib2 = 0; ib2 < nBlock[2]; ib2++) { - for (int ib1 = 0; ib1 < nBlock[1]; ib1++) { - for (int ib0 = 0; ib0 < nBlock[0]; ib0++) { - // get the global block index - const int bidv[3] = {ib0, ib1, ib2}; - const int bid = localIndex(0, ib0, ib1, ib2, 0, nBlock, 1); - // determine the size in each direction - for (int id = 0; id < 3; id++) { - //if I am the last block, I forgive a small difference between the blocksizes - if (bidv[id] == (nBlock[id] - 1)) { - nBlockSize[id][bid] = (iend[id] - istart[id]) - bidv[id] * nByBlock[id]; - } else { - nBlockSize[id][bid] = nByBlock[id]; - } - } - } - } - } - END_FUNC; -} - -/** - * @brief given a topology, try to merge the blocks that go to the same destination + * @brief given a topology, merges the unit blocks that go to the same destination in order to create one big block for each proc * * @param [in] topo the topology * @param [in] nByBlock the number of unknowns by blocks @@ -238,6 +135,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar int* nblockToEachProc = (int*)flups_malloc(sizeof(int) * commsize); std::memset(nblockToEachProc, 0, sizeof(int) * commsize); + //------------------------------------------------------------------------- + /** - count the number of block going to each proc */ + //------------------------------------------------------------------------- const int old_nBlock = nBlockv[0] * nBlockv[1] * nBlockv[2]; for (int ib = 0; ib < old_nBlock; ib++) { @@ -251,6 +151,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar } } + //------------------------------------------------------------------------- + /** - initialize destination rank, block sizes and blockiStart arrays */ + //------------------------------------------------------------------------- // allocate the new arrays: rank, tag, blocksize, block istart int* newBlockSize[3] = {NULL, NULL, NULL}; int* newblockiStart[3] = {NULL, NULL, NULL}; @@ -284,8 +187,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar // free the temp array flups_free(nblockToEachProc); - // recompute the number of blocks on each proc and the starting index - + //------------------------------------------------------------------------- + /** - Gathering blocks: recompute the blocksize, the blockiStart and the destination rank */ + //------------------------------------------------------------------------- // loop over the blocks and store the information for (int nib = 0; nib < newNBlock; nib++) { // FLUPS_INFO(">>> looking for new block %d", nib); @@ -311,9 +215,6 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar // get the last index of the block int nib_start[3] = {newblockiStart[0][nib], newblockiStart[1][nib], newblockiStart[2][nib]}; int nib_end[3] = {nib_start[0] + newBlockSize[0][nib], nib_start[1] + newBlockSize[1][nib], nib_start[2] + newBlockSize[2][nib]}; - - // FLUPS_INFO(">>> old block lim = %d %d %d -> %d %d %d",ib_start[0],ib_start[1],ib_start[2],ib_end[0],ib_end[1],ib_end[2]); - // FLUPS_INFO(">>> new block lim = %d %d %d -> %d %d %d",nib_start[0],nib_start[1],nib_start[2],nib_end[0],nib_end[1],nib_end[2]); // get the new starting index (and overwrittes the INT_MAX if any!!) newblockiStart[0][nib] = std::min(nib_start[0], ib_start[0]); @@ -331,6 +232,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar } } + //------------------------------------------------------------------------- + /** - free old arrays and store the new ones */ + //------------------------------------------------------------------------- // store the new block number (*nBlock) = newNBlock; @@ -427,25 +331,14 @@ void SwitchTopo::_gather_tags(MPI_Comm comm, const int inBlock, const int onBloc } /** - * @brief compute the number of blocks, the starting indexes of the block (0,0,0) and the number of block in each proc - * - * This function computes several usefull indexes for the block: - * - the number of blocks on the current procs - * - the starting index in the topo of the block (0,0,0) - * - the number of block on each proc. + * @brief compute the number of blocks on each rank * - * For a given proc, nBlockEachProc[comm_size * id + ip] is the number of proc in the dimension id on the proc ip - * - * @param istart the starting indexes on this proc - * @param iend the end indexes on this proc + * @param istart the starting local indexes on this proc + * @param iend the end local indexes on this proc * @param nByBlock the number of unkowns in one block (012-indexing) * @param topo the current topology * @param nBlock the number of block in this proc - * @param blockIDStart the starting id of the block (0,0,0) - * @param nBlockEachProc the number of blocks on each proc */ -// void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology *topo, -// int nBlock[3], int blockIDStart[3], int *startBlockEachProc, int *nBlockEachProc) { void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology *topo,int nBlock[3]) { BEGIN_FUNC; int comm_size; @@ -453,24 +346,8 @@ void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], cons for (int id = 0; id < 3; id++) { // send/recv number of block on my proc nBlock[id] = (iend[id] - istart[id]) / nByBlock[id]; - // // get the list of number of procs - // MPI_Allgather(&(nBlock[id]), 1, MPI_INT, &(nBlockEachProc[comm_size * id]), 1, MPI_INT, topo->get_comm()); - // // set the starting indexes to 0 - // blockIDStart[id] = 0; - // // compute the starting index - // const int myrankd = topo->rankd(id); - // int rankd[3] = {topo->rankd(0), topo->rankd(1), topo->rankd(2)}; - // for (int ir = 0; ir < myrankd; ir++) { - // // update the rankd - // rankd[id] = ir; - // // increment the block counter - // blockIDStart[id] += nBlockEachProc[comm_size * id + rankindex(rankd, topo)]; - // } // do some checks FLUPS_CHECK(nBlock[id] > 0, "The number of proc in one direction cannot be 0: istart = %d %d %d to iend = %d %d %d ", istart[0], istart[1], istart[2], iend[0], iend[1], iend[2], LOCATION); - - //everybody needs to know the startID of the first block in each proc - // MPI_Allgather(&(blockIDStart[id]), 1, MPI_INT, &(startBlockEachProc[comm_size * id]), 1, MPI_INT, topo->get_comm()); } END_FUNC; } @@ -478,8 +355,8 @@ void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], cons /** * @brief split the _inComm communicator into subcomms * - * We here find the colors of the call graph, i.e. ranks communicating together have the same color. - * Once the color are known, we divide the graph into subcomms. + * We here find the colors of the comm, i.e. ranks communicating together have the same color. + * Once the color are known, we divide the current communicator into subcomms. * * */ @@ -490,7 +367,6 @@ void SwitchTopo::_cmpt_commSplit(){ MPI_Comm_rank(_inComm,&rank); MPI_Comm_size(_inComm,&comm_size); - //------------------------------------------------------------------------- /** - Set the starting color and determine who I wish to get in my group */ //------------------------------------------------------------------------- @@ -589,7 +465,7 @@ void SwitchTopo::_cmpt_commSplit(){ } /** - * @brief setup the lists according to the master and sub communicators + * @brief setup the subcommunicator form the destRank and the _inComm communicator * * We setup the following lists: * - destRank: transformed from the values in the world comm to the values in the new comm. @@ -616,10 +492,6 @@ void SwitchTopo::_setup_subComm(const int nBlock,int* blockSize[3], int* destRan int* subRanks = (int*)flups_malloc(worldsize * sizeof(int)); MPI_Allgather(&subrank, 1, MPI_INT, subRanks, 1, MPI_INT, _inComm); - - // int* destRank_cpy = (int*) flups_malloc(nBlock[0] * nBlock[1] * nBlock[2] * sizeof(int)); - // memcpy(destRank,destRank_cpy,nBlock[0] * nBlock[1] * nBlock[2] * sizeof(int)); - // replace the old ranks by the newest ones for (int ib = 0; ib < nBlock; ib++) { destRank[ib] = subRanks[destRank[ib]]; @@ -637,6 +509,16 @@ void SwitchTopo::_setup_subComm(const int nBlock,int* blockSize[3], int* destRan END_FUNC; } +/** + * @brief compute the start and count arrays needed for the all to all communication + * + * @param comm the communicator to use + * @param nBlock the number of block + * @param blockSize the block sizes + * @param destRank the destination rank of each block + * @param count the count array + * @param start the start array + */ void SwitchTopo::_cmpt_start_and_count(MPI_Comm comm, const int nBlock,int* blockSize[3], int* destRank, int** count, int** start) { BEGIN_FUNC; const int nf = std::max(_topo_in->nf(),_topo_out->nf()); diff --git a/src/SwitchTopo.hpp b/src/SwitchTopo.hpp index 17decdc6..c9413e62 100644 --- a/src/SwitchTopo.hpp +++ b/src/SwitchTopo.hpp @@ -51,15 +51,8 @@ class SwitchTopo { MPI_Comm _inComm = NULL; /**<@brief the reference input communicator */ MPI_Comm _outComm = NULL; /**<@brief the reference output communicator */ MPI_Comm _subcomm = NULL; /**<@brief the subcomm for this switchTopo */ - // int _exSize[3]; /**<@brief exchanged size in each dimension (012-indexing) */ int _shift[3]; /**<@brief the shift in memory */ - // int _nByBlock[3]; /**<@brief The number of data per blocks in each dim (!same on each process! and 012-indexing) */ - // int _istart[3]; /**<@brief the starting index for #_topo_in to be inside #_topo_out */ - // int _ostart[3]; /**<@brief the starting index for #_topo_out to be inside #_topo_in */ - // int _iend[3]; /**<@brief the ending index for #_topo_in to be inside #_topo_out */ - // int _oend[3]; /**<@brief the ending index for #_topo_out to be inside #_topo_in */ - int _inBlock; /**<@brief the local number of block in each dim in the input topology */ int _onBlock; /**<@brief the local number of block in each dim in the output topology */ @@ -93,31 +86,12 @@ class SwitchTopo { virtual void execute(opt_double_ptr v, const int sign) const = 0; virtual void disp() const = 0; - // /** - // * @brief return the memory size of a block (including the padding for odd numbers if needed) - // * - // * @return size_t - // */ - // inline size_t get_blockMemSize() const { - // // get the max block size - // size_t total = 1; - // for (int id = 0; id < 3; id++) { - // // if the block size is 1, no need to pad :) - // total *= (_nByBlock[id] == 1) ? 1 : (size_t)(_nByBlock[id] + _exSize[id] % 2); - // } - // // the nf at the moment of the switchTopo is ALWAYS the one from the output topo!! - // total *= (size_t)_topo_out->nf(); - // // add the difference with the alignement to be always aligned - // size_t alignDelta = ((total*sizeof(double))%FLUPS_ALIGNMENT == 0) ? 0 : (FLUPS_ALIGNMENT - (total*sizeof(double))%FLUPS_ALIGNMENT )/sizeof(double); - // // FLUPS_INFO("alignDelta = %d for a total of %d = %d %d %d",alignDelta,total,_nByBlock[0] + _exSize[0] % 2,_nByBlock[1] + _exSize[1] % 2,_nByBlock[2] + _exSize[2] % 2); - // total = total + alignDelta; - // FLUPS_CHECK((total*sizeof(double))%FLUPS_ALIGNMENT == 0 , "The total size of one block HAS to match the alignement size",LOCATION); - // // return the total size - // return total; - // }; - /** * @brief Get the memory size of a block padded to ensure alignment + * + * @warning + * Since we use gathered blocks, it is NOT STRAIGHTFORWARD to impose a common size for every block on every proc. + * Therefore, we chose not to do it!! * * @param ib the block id * @param nf the number of fields inside an element @@ -174,9 +148,6 @@ class SwitchTopo { protected: void _cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int oend[3],int nByBlock[3]); void _cmpt_blockDestRank(const int nBlock[3], const int nByBlock[3], const int shift[3], const int istart[3], const Topology* topo_in, const Topology* topo_out, int* destRank); - void _cmpt_blockDestRankAndTag(const int nBlock[3], const int blockIDStart[3], const Topology* topo, const int* startBlockEachProc, const int* nBlockEachProc, int* destRank, int* destTag); - void _cmpt_blockSize(const int nBlock[3], const int blockIDStart[3], const int nByBlock[3], const int istart[3], const int iend[3], int* nBlockSize[3]); - // void _cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology* topo, int nBlock[3], int blockIDStart[3], int* startBlockEachProc, int* nBlockEachProc); void _cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology *topo,int nBlock[3]); void _cmpt_commSplit(); @@ -201,7 +172,7 @@ static inline int gcd(int a, int b) { * @param inComm input communicator * @param outComm output communicator */ -inline static void translate_ranks(int size, int* ranks, MPI_Comm inComm, MPI_Comm outComm) { +inline static void translate_ranks(int size, int* ranks, MPI_Comm inComm, MPI_Comm outComm) { BEGIN_FUNC; int comp; diff --git a/src/SwitchTopo_a2a.cpp b/src/SwitchTopo_a2a.cpp index 9701f0e5..267b827e 100644 --- a/src/SwitchTopo_a2a.cpp +++ b/src/SwitchTopo_a2a.cpp @@ -130,7 +130,15 @@ SwitchTopo_a2a::SwitchTopo_a2a(const Topology* topo_input, const Topology* topo_ } /** - * @brief initialize the blocks: compute their index, their number, their size and their source/destination + * @brief initialize the communication blocks + * + * First, we compute nByBlock[3], the smallest size of unknowns that goes from one proc to another. + * This small nByBlock is the same accross each rank. + * + * Then, for each of this unit block (of size nByBlock[3]), we compute their destination rank. + * + * Afterwards, using the rank of those unit blocks, we try to gather them by destination ranks. + * all the kernels blocks that have the same destination will be packed together for the communication. * */ void SwitchTopo_a2a::_init_blockInfo(const Topology* topo_in, const Topology* topo_out){ @@ -153,14 +161,6 @@ void SwitchTopo_a2a::_init_blockInfo(const Topology* topo_in, const Topology* to int oend[3]; int nByBlock[3]; - // int iblockIDStart[3]; - // int oblockIDStart[3]; - // int* inBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - // int* onBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - // int* istartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - // int* ostartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - - //------------------------------------------------------------------------- /** - Compute intersection ids */ //------------------------------------------------------------------------- @@ -174,41 +174,22 @@ void SwitchTopo_a2a::_init_blockInfo(const Topology* topo_in, const Topology* to //------------------------------------------------------------------------- _cmpt_nByBlock(istart,iend,ostart,oend,nByBlock); - // _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv, iblockIDStart, istartBlockEachProc, inBlockEachProc); - // _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv, oblockIDStart, ostartBlockEachProc, onBlockEachProc); _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv); _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv); - // // allocte the block size - // for (int id = 0; id < 3; id++) { - // _iBlockSize[id] = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int)); - // _oBlockSize[id] = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int)); - // } - // allocate the destination ranks _i2o_destRank = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int)); _o2i_destRank = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int)); - // // get the size of the blocks - // _cmpt_blockSize(inBlockv, iblockIDStart, nByBlock, istart, iend, _iBlockSize); - // _cmpt_blockSize(onBlockv, oblockIDStart, nByBlock, ostart, oend, _oBlockSize); - // get the ranks // shift if the root position of the topo_in in the topo_out _cmpt_blockDestRank(inBlockv,nByBlock,_shift,istart,topo_in,topo_out,_i2o_destRank); _cmpt_blockDestRank(onBlockv,nByBlock,mshift,ostart,topo_out,topo_in,_o2i_destRank); - // _cmpt_blockDestRankAndTag(inBlockv, iblockIDStart, topo_out, ostartBlockEachProc, onBlockEachProc, _i2o_destRank, NULL); - // _cmpt_blockDestRankAndTag(onBlockv, oblockIDStart, topo_in, istartBlockEachProc, inBlockEachProc, _o2i_destRank,NULL); // try to gather blocks together if possible, rewrittes the sizes, the blockistart, the number of blocks, the ranks and the tags _gather_blocks(topo_in, nByBlock, istart,iend, inBlockv, _iBlockSize, _iBlockiStart, &_inBlock, &_i2o_destRank); _gather_blocks(topo_out, nByBlock, ostart,oend, onBlockv, _oBlockSize, _oBlockiStart, &_onBlock, &_o2i_destRank); - // free the temp arrays - // flups_free(inBlockEachProc); - // flups_free(onBlockEachProc); - // flups_free(istartBlockEachProc); - // flups_free(ostartBlockEachProc); END_FUNC; } @@ -259,7 +240,8 @@ void SwitchTopo_a2a::setup() { int compIn, compOut; MPI_Comm_compare(inComm, _inComm, &compIn); MPI_Comm_compare(outComm, _outComm, &compOut); - if( compIn != MPI_IDENT || compOut != MPI_IDENT){ + //if the graph communicator has the same numbering as the old commn we will skip the following + if( compIn != MPI_CONGRUENT || compOut != MPI_CONGRUENT){ if (rank == 0){ FLUPS_WARNING("The inComm and/or outComm have changed since this switchtopo was created. I will recompute the communication scheme.",LOCATION); } @@ -312,6 +294,29 @@ void SwitchTopo_a2a::setup() { _is_all2all = _is_all2all && (tmp_size == _o2i_count[ir]); } + //------------------------------------------------------------------------- + /** - Check that everybody is in the same communication mode*/ + //------------------------------------------------------------------------- + // determine if every proc is in the all_to_all mode + bool global_is_alltoall; + MPI_Allreduce(&_is_all2all, &global_is_alltoall, 1, MPI_CXX_BOOL, MPI_LAND, _subcomm); + // determine if at least one proc is in the all to all mode + bool any_is_alltoall; + MPI_Allreduce(&_is_all2all,&any_is_alltoall,1,MPI_CXX_BOOL,MPI_LOR,_subcomm); + // generate an error if it is not compatible + if (_is_all2all && (!global_is_alltoall)){ + int rlen; + char myname[MPI_MAX_OBJECT_NAME]; + MPI_Comm_get_name(_subcomm, myname, &rlen); + FLUPS_ERROR("communicator %s: at least one process is NOT in the all to all communication scheme",myname,LOCATION); + } + if((!_is_all2all) && any_is_alltoall){ + int rlen; + char myname[MPI_MAX_OBJECT_NAME]; + MPI_Comm_get_name(_subcomm, myname, &rlen); + FLUPS_ERROR("communicator %s: at least one process is in the all to all communication scheme",myname,LOCATION); + } + // if we are all to all, clean the start array if (_is_all2all) { if (_i2o_start != NULL) { diff --git a/src/SwitchTopo_nb.cpp b/src/SwitchTopo_nb.cpp index 554c93ea..d4e7906c 100644 --- a/src/SwitchTopo_nb.cpp +++ b/src/SwitchTopo_nb.cpp @@ -125,7 +125,17 @@ SwitchTopo_nb::SwitchTopo_nb(const Topology* topo_input, const Topology* topo_ou } /** - * @brief initialize the blocks: compute their index, their number, their size and their source/destination + * @brief initialize the communication blocks + * + * First, we compute nByBlock[3], the smallest size of unknowns that goes from one proc to another. + * This small nByBlock is the same accross each rank. + * + * Then, for each of this unit block (of size nByBlock[3]), we compute their destination rank. + * + * Afterwards, using the rank of those unit blocks, we try to gather them by destination ranks. + * all the kernels blocks that have the same destination will be packed together for the communication. + * + * Finally, we compute the destination tag of each block. It is defined as the local block id of the block in the received topology. * */ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* topo_out){ @@ -137,7 +147,6 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top FLUPS_CHECK(ocomm_size==comm_size,"In and out communicators must have the same size.",LOCATION); - //------------------------------------------------------------------------- /** - get the number of blocks and for each block get the size and the destination rank */ //------------------------------------------------------------------------- @@ -149,13 +158,6 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top int oend[3]; int nByBlock[3]; - // int iblockIDStart[3]; - // int oblockIDStart[3]; - // int* inBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - // int* onBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - // int* istartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - // int* ostartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int)); - //------------------------------------------------------------------------- /** - Compute intersection ids */ //------------------------------------------------------------------------- @@ -169,33 +171,16 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top //------------------------------------------------------------------------- _cmpt_nByBlock(istart,iend,ostart,oend,nByBlock); - // _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv, iblockIDStart, istartBlockEachProc, inBlockEachProc); - // _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv, oblockIDStart, ostartBlockEachProc, onBlockEachProc); _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv); _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv); - // // allocte the block size - // for (int id = 0; id < 3; id++) { - // _iBlockSize[id] = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int)); - // _oBlockSize[id] = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int)); - // } - // allocate the destination ranks _i2o_destRank = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int)); _o2i_destRank = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int)); - // // allocate the destination tags - // _i2o_destTag = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int)); - // _o2i_destTag = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int)); - - // // get the size of the blocks - // _cmpt_blockSize(inBlockv, iblockIDStart, nByBlock, istart, iend, _iBlockSize); - // _cmpt_blockSize(onBlockv, oblockIDStart, nByBlock, ostart, oend, _oBlockSize); // get the ranks _cmpt_blockDestRank(inBlockv,nByBlock,_shift,istart,topo_in,topo_out,_i2o_destRank); _cmpt_blockDestRank(onBlockv,nByBlock,mshift,ostart,topo_out,topo_in,_o2i_destRank); - // _cmpt_blockDestRankAndTag(inBlockv, iblockIDStart, topo_out, ostartBlockEachProc, onBlockEachProc, _i2o_destRank, _i2o_destTag); - // _cmpt_blockDestRankAndTag(onBlockv, oblockIDStart, topo_in, istartBlockEachProc, inBlockEachProc, _o2i_destRank,_o2i_destTag); // try to gather blocks together if possible, rewrittes the sizes, the blockistart, the number of blocks, the ranks and the tags _gather_blocks(topo_in, nByBlock, istart, iend, inBlockv, _iBlockSize, _iBlockiStart, &_inBlock, &_i2o_destRank); @@ -209,12 +194,6 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top _o2i_sendRequest = (MPI_Request*)flups_malloc(_onBlock * sizeof(MPI_Request)); _o2i_recvRequest = (MPI_Request*)flups_malloc(_inBlock * sizeof(MPI_Request)); - // free the temp arrays - // flups_free(inBlockEachProc); - // flups_free(onBlockEachProc); - // flups_free(istartBlockEachProc); - // flups_free(ostartBlockEachProc); - END_FUNC; } @@ -271,8 +250,11 @@ void SwitchTopo_nb::setup(){ int compIn, compOut; MPI_Comm_compare(inComm, _inComm, &compIn); MPI_Comm_compare(outComm, _outComm, &compOut); - if( compIn != MPI_IDENT || compOut != MPI_IDENT){ - FLUPS_WARNING("The inComm and/or outComm have changed since this switchtopo was created. I will recompute the communication scheme.",LOCATION); + //if the graph communicator has the same numbering as the old commn we will skip the following + if( compIn != MPI_CONGRUENT || compOut != MPI_CONGRUENT){ + if (rank == 0){ + FLUPS_WARNING("The inComm and/or outComm have changed since this switchtopo was created. I will recompute the communication scheme.",LOCATION); + } _inComm = inComm; _outComm = outComm; diff --git a/src/SwitchTopo_nb.hpp b/src/SwitchTopo_nb.hpp index 8150ec24..3838c94b 100644 --- a/src/SwitchTopo_nb.hpp +++ b/src/SwitchTopo_nb.hpp @@ -40,7 +40,7 @@ #include "SwitchTopo.hpp" /** - * @brief Takes care of the switch between to different topologies + * @brief Switch between to different topologies using non-blocking communications * * Reorganize the memory between 2 different topologies, also accounting for a * "principal axis" which is aligned with the fast rotating index. @@ -56,8 +56,8 @@ class SwitchTopo_nb : public SwitchTopo { protected: int _selfBlockN=0; - int* _iselfBlockID = NULL; - int* _oselfBlockID = NULL; + int* _iselfBlockID = NULL; /**<@brief The list of the block iD that stays on the current rank in the input topology (used while output to input) */ + int* _oselfBlockID = NULL; /**<@brief The list of the block iD that stays on the current rank in the output topoloy (used while input to ouput) */ int* _i2o_destTag = NULL; /**<@brief The destination rank in the output topo of each block */ int* _o2i_destTag = NULL; /**<@brief The destination rank in the output topo of each block */ diff --git a/src/Topology.cpp b/src/Topology.cpp index b21bdcb6..53a33172 100644 --- a/src/Topology.cpp +++ b/src/Topology.cpp @@ -96,34 +96,17 @@ Topology::Topology(const int axis, const int nglob[3], const int nproc[3], const void Topology::cmpt_sizes() { BEGIN_FUNC; for (int id = 0; id < 3; id++) { - // compute the _nbyproc - // number of unknows everywhere except the last one - _nbyproc[id] = _nglob[id] / _nproc[id]; // integer division = floor - // if we don't change anything - int nlastProc = std::max(_nbyproc[id], _nglob[id] - _nbyproc[id] * (_nproc[id] - 1)); - // if the last proc has too much unknows compare to the other - // and we are able to give up some points - while((nlastProc - _nbyproc[id]) > 1 && nlastProc >= _nproc[id]){ - _nbyproc[id] += 1; - nlastProc -= (_nproc[id] - 1); - } - // if we are the last rank in the direction, we take everything what is left - if ((_rankd[id] < (_nproc[id] - 1))) { - _nloc[id] = _nbyproc[id]; - // the memory size is the same as the local size - _nmem[id] = _nloc[id]; - } else { - // we get the max between the nglob and - _nloc[id] = _nglob[id] - _nbyproc[id] * (_nproc[id] - 1); - _nmem[id] = _nloc[id]; - // if we are in the axis, we padd to ensure that every pencil is ok with alignment - if (id == _axis) { - // compute by how many we are not aligned: the global size in double = nglob * nf - const int modulo = (_nglob[id] * _nf * sizeof(double)) % _alignment; - // compute the number of points to add (in double indexing) - const int delta = (_alignment - modulo) / sizeof(double); - _nmem[id] += (modulo == 0) ? 0 : delta / _nf; - } + // we get the max between the nglob and + _nloc[id] = cmpt_nbyproc(id); + _nmem[id] = _nloc[id]; + // if we are in the axis and the last proc, we padd to ensure that every pencil is ok with alignment + // if (id == _axis && _rankd[id] == (_nproc[id] - 1)) { + if (id == _axis) { + // compute by how many we are not aligned: the global size in double = nglob * nf + const int modulo = (_nloc[id] * _nf * sizeof(double)) % _alignment; + // compute the number of points to add (in double indexing) + const int delta = (_alignment - modulo) / sizeof(double); + _nmem[id] += (modulo == 0) ? 0 : delta / _nf; } } END_FUNC; @@ -182,7 +165,7 @@ void Topology::cmpt_intersect_id(const int shift[3], const Topology* other, int // for the input configuration for (int i = 0; i < _nloc[id]; ++i) { // get the global id in the other topology - int oid_global = _rankd[id] * _nbyproc[id] + i + shift[id]; + int oid_global = cmpt_start_id(id) + i + shift[id]; if (oid_global <= 0) start[id] = i; if (oid_global < onglob) end[id] = i + 1; } @@ -209,7 +192,7 @@ void Topology::disp() const { FLUPS_INFO(" - nmem = %d %d %d", _nmem[0], _nmem[1], _nmem[2]); FLUPS_INFO(" - nproc = %d %d %d", _nproc[0], _nproc[1], _nproc[2]); FLUPS_INFO(" - rankd = %d %d %d", _rankd[0], _rankd[1], _rankd[2]); - FLUPS_INFO(" - nbyproc = %d %d %d", _nbyproc[0], _nbyproc[1], _nbyproc[2]); + // FLUPS_INFO(" - nbyproc = %d %d %d", _nbyproc[0], _nbyproc[1], _nbyproc[2]); FLUPS_INFO(" - axproc = %d %d %d", _axproc[0], _axproc[1], _axproc[2]); FLUPS_INFO(" - isComplex = %d", _nf == 2); // FLUPS_INFO(" - h = %f %f %f",_h[0],_h[1],_h[2]); diff --git a/src/Topology.hpp b/src/Topology.hpp index cd7f301c..eaf54372 100644 --- a/src/Topology.hpp +++ b/src/Topology.hpp @@ -36,6 +36,12 @@ * * A topology describes the layout of the data on the current processor. * + * The number of unkowns in each direction owned by a rank divides them in two groups. + * First, we compute the integer division, nbyproc, between _nglob and _nproc. + * + * The first group, named g0, owns nbyproc+1 unknowns. The group starts at rank 0 and ends in rank mod(_nglob,_nproc)-1, included. + * The second group, named g1, owns nbyproc unknowns. The group starts at rank mod(_nglob,_nproc) to rank _nproc, included. + * */ class Topology { protected: @@ -47,7 +53,7 @@ class Topology { int _axis; /**<@brief fastest rotating index in the topology */ int _rankd[3]; /**<@brief rank of the current process per dim (012-indexing) */ int _nglob[3]; /**<@brief number of unknows per dim, global (012-indexing) */ - int _nbyproc[3]; /**<@brief mean number of unkows per dim = nloc except for the last one (012-indexing) */ + // int _nbyproc[3]; /**<@brief mean number of unkows per dim = nloc except for the last one (012-indexing) */ const int _alignment; MPI_Comm _comm; /**<@brief the comm associated with the topo, with ranks potentially optimized for switchtopos */ @@ -83,10 +89,36 @@ class Topology { inline int nmem(const int dim) const { return _nmem[dim]; } inline int nproc(const int dim) const { return _nproc[dim]; } inline int rankd(const int dim) const { return _rankd[dim]; } - inline int nbyproc(const int dim) const { return _nbyproc[dim]; } - inline int axproc(const int dim) const { return _axproc[dim]; } - inline MPI_Comm get_comm() const {return _comm; } + // inline int nbyproc(const int dim) const { return _nbyproc[dim]; } + inline int axproc(const int dim) const { return _axproc[dim]; } + inline MPI_Comm get_comm() const { return _comm; } + /** + * @brief compute the number of unknowns on each proc + * + * @param id + * @return int + */ + inline int cmpt_nbyproc(const int id) const { + return (_nglob[id] / _nproc[id]) + 1 * ((_nglob[id] % _nproc[id]) > _rankd[id]); + } + + /** + * @name Functions to compute the starting index of each topology + */ + inline int cmpt_start_id(const int id) const { + return (_rankd[id]) * (_nglob[id] / _nproc[id]) + std::min(_rankd[id], _nglob[id] % _nproc[id]); + } + + inline int cmpt_rank_fromid(const int global_id, const int id) const{ + const int nproc_g0 = _nglob[id]%_nproc[id]; // number of procs that have a +1 in their unkowns + const int nbyproc = _nglob[id]/_nproc[id]; // the number of unknowns in the integer division + const int global_g0 = nproc_g0*(nbyproc+1); // the number of unknowns in the first group of procs + + return (global_id < global_g0)? global_id/(nbyproc+1) : (global_id-global_g0)/nbyproc + nproc_g0; + } + + /** * @name Functions to compute intersection data with other Topologies * * @{ @@ -118,9 +150,9 @@ class Topology { * */ inline void get_istart_glob(int istart[3]) const { - istart[0] = _rankd[0] * _nbyproc[0]; - istart[1] = _rankd[1] * _nbyproc[1]; - istart[2] = _rankd[2] * _nbyproc[2]; + istart[0] = cmpt_start_id(0); + istart[1] = cmpt_start_id(1); + istart[2] = cmpt_start_id(2); } /** @@ -133,7 +165,7 @@ class Topology { _nglob[_axis] /= 2; _nloc[_axis] /= 2; _nmem[_axis] /= 2; - _nbyproc[_axis] /= 2; + // _nbyproc[_axis] /= 2; } } /** @@ -146,7 +178,7 @@ class Topology { _nglob[_axis] *= 2; _nloc[_axis] *= 2; _nmem[_axis] *= 2; - _nbyproc[_axis] *= 2; + // _nbyproc[_axis] *= 2; } } @@ -331,22 +363,6 @@ static inline void localSplit(const size_t id, const int size[3], const int axtr (*id2) = id / (size0 * size[ax1]); } -/** - * @brief Get the istart in global indexing - * - * @param istart start index along the ax0 direction (fast rotating index in current topo), ax1 and ax2 - * @param topo - */ -inline static void get_istart_glob(int istart[3], const Topology *topo) { - const int ax0 = topo->axis(); - const int ax1 = (ax0 + 1) % 3; - const int ax2 = (ax0 + 2) % 3; - - istart[ax0] = topo->rankd(ax0) * topo->nbyproc(ax0); - istart[ax1] = topo->rankd(ax1) * topo->nbyproc(ax1); - istart[ax2] = topo->rankd(ax2) * topo->nbyproc(ax2); -} - /** * @brief compute the global symmetrized index of a given point. * @@ -360,6 +376,19 @@ inline static void get_istart_glob(int istart[3], const Topology *topo) { * @param symstart * @param axtrg * @param is + * + * Symmetry computation: + * We have to take the symmetry around symstart. + * E.g. in X direction: + * `symstart[0] - (ix - symstart[0]) = 2 symstart[0] - ix` + * In some cases when we have an R2C transform, it ask for 2 additional doubles. + * The value is meaningless but we would like to avoid segfault and nan's. + * To do so, we use 2 tricks: + * - The `abs` is used to stay on the positivie side and hence avoid negative memory access + * - The `max` is used to prevent the computation of the value in 0, which is never used in the symmetry. + * + * The final formula is then ( in the X direction): + * `max( abs(2.0 symstart[0] - ix) , 1)` */ inline static void cmpt_symID(const int axsrc, const int i0, const int i1, const int i2, const int istart[3], const double symstart[3], const int axtrg, int is[3]) { // get the global indexes in the axsrc configuration diff --git a/src/defines.hpp b/src/defines.hpp index 3c97f410..41897af4 100644 --- a/src/defines.hpp +++ b/src/defines.hpp @@ -35,8 +35,6 @@ #include "mpi.h" #include "flups.h" -#define GREEN_DIM 3 - //============================================================================= // LOCATORS //============================================================================= @@ -429,8 +427,6 @@ static inline void FLUPS_CHECK(bool a, std::string b, T1 c, T2 d, T3 e, T4 f, T5 // CONSTANTS AND OTHERS //============================================================================= -#define GAMMA 0.5772156649015328606 - template static inline bool FLUPS_ISALIGNED(T a) { return ((uintptr_t)(const void*)a) % FLUPS_ALIGNMENT == 0; diff --git a/src/expint.hpp b/src/expint.hpp index f0d630c2..0cae7a1d 100644 --- a/src/expint.hpp +++ b/src/expint.hpp @@ -16,7 +16,7 @@ #include -static const double c_gamma = 0.577215664901532861; +static const double c_gamma = 0.5772156649015328606; static double expint1(double x); static double expint2(double x); diff --git a/src/flups.h b/src/flups.h index e53af561..f10d46cc 100644 --- a/src/flups.h +++ b/src/flups.h @@ -42,7 +42,7 @@ extern "C" { //============================================================================= /** - * @name Common definitions + * @name STRUCTURES AND DEFINITIONS * @{ */ //============================================================================= @@ -56,7 +56,8 @@ enum FLUPS_BoundaryType { EVEN = 0, /**< EVEN boundary condition = zero flux */ ODD = 1, /**< ODD boundary condition = zero value */ PER = 3, /**< PERiodic boundary conditions */ - UNB = 4 /**< UNBounded boundary condition */ + UNB = 4, /**< UNBounded boundary condition */ + NONE = 9 /**< No boundary condition = dimension not used */ }; /** @@ -83,19 +84,19 @@ enum FLUPS_SolverType { }; /** - * @brief to be used as "sign" for all of the FORARD tranform + * @brief to be used as "sign" for all of the FORWARD tranform * */ -#define FLUPS_FORWARD -1 // = FFTW_FORWARD +#define FLUPS_FORWARD -1 // equivalent to FFTW_FORWARD /** * @brief to be used as "sign" for all of the BACKWARD tranform * */ -#define FLUPS_BACKWARD 1 // = FFTW_BACKWARD +#define FLUPS_BACKWARD 1 // equivalen to FFTW_BACKWARD /** - * @brief Memory alignment constant in bytes. + * @brief Memory alignment in bytes. * */ #define FLUPS_ALIGNMENT 16 @@ -129,12 +130,14 @@ typedef enum FLUPS_SolverType FLUPS_SolverType; * * @param size the data to be allocated */ -void * flups_malloc(size_t size); +void* flups_malloc(size_t size); /** * * @brief Free the memory allocated with flups_malloc * + * @warning You must free the memory allocate using flups_malloc using this function. + * * @param data the data to be freed */ void flups_free(void* data); @@ -143,19 +146,26 @@ void flups_free(void* data); * @brief compute the memory local index for a point (i0,i1,i2) in axsrc-indexing in a memory. * The returned value is in the axtrg-indexing * - * For example if going through a topology following the standard indexing: + * For example if going through a complex topology following the standard indexing: * @code{.cpp} - * const int ax0 = flups_topo_get_axis(topo); + // the topology is complex + const int nf = 2; + // get the topology indexing + const int ax0 = flups_topo_get_axis(topo); + // the memory size is given in the 012 order const int nmem[3] = {flups_topo_get_nmem(topo,0),flups_topo_get_nmem(topo,1), flups_topo_get_nmem(topo,2)}; for (int i2 = 0; i2 < flups_topo_get_nloc(topo,2); i2++) { for (int i1 = 0; i1 < flups_topo_get_nloc(topo,1); i1++) { for (int i0 = 0; i0 < flups_topo_get_nloc(topo,0); i0++) { - const size_t id = flups_locID(0, i0, i1, i2, ax0, nmem, 1); - * - * data[id] = ...; - * } - * } - * } + // the i0, i1 and i2 are given in a 0-indexing + // the id is aimed for an array in the ax0-indexing + const size_t id = flups_locID(0, i0, i1, i2, ax0, nmem, nf); + + data[id+0] = ...; + data[id+1] = ...; + } + } + } * @endcode * * @param axsrc the FRI, reference axis aligned with index i0 @@ -163,7 +173,7 @@ void flups_free(void* data); * @param i1 the index in the (axsrc+1)%3 direction * @param i2 the index in the (axsrc+2)%3 direction * @param axtrg the topology FRI, i.e. the way the memory is aligned in the current topology - * @param size the size of the memory (012-indexing) + * @param size the size of the memory (given in the 012-order) * @param nf the number of unknows in one element * @return size_t */ @@ -179,16 +189,17 @@ static inline size_t flups_locID(const int axsrc, const int i0, const int i1, co } /** - * @brief compute the local k-index in spectral coordinates for a point (i0,i1,i2) in axsrc-indexing. + * @brief compute them symmetrized local index for a point (i0,i1,i2) in axsrc-indexing in an extended topology (e.g. spectral topologies). * The returned value is in the axtrg-indexing. * - * For example if going through a topology following the standard indexing: + * For example if going through a complex topology following the standard indexing, one can get the spectral indexing: * @code{.cpp} - * const int ax0 = flups_topo_get_axis(topoSpec); + const int ax0 = flups_topo_get_axis(topoSpec); const int ax1 = (ax0 + 1) % 3; const int ax2 = (ax0 + 2) % 3; const int nf = 2; //topo is complex + // get the memory size of the spectral array int nmemSpec[3]; for(int i=0;i<3;i++){ nmemSpec[i] = flups_topo_get_nmem(topoSpec,i); @@ -200,6 +211,7 @@ static inline size_t flups_locID(const int axsrc, const int i0, const int i1, co const size_t id = flups_locID(ax0, 0, i1, i2, ax0, nmemSpec,nf); for (int i0 = 0; i0 < flups_topo_get_nloc(topoSpec,ax0); i0++) { int is[3]; + // get the symmetrized ID flups_symID(ax0, i0, i1, i2, istartSpec, symstart, 0, is); // the (symmetrized) wave numbers: @@ -218,8 +230,8 @@ static inline size_t flups_locID(const int axsrc, const int i0, const int i1, co * @param i0 the index in the axsrc direction * @param i1 the index in the (axsrc+1)%3 direction * @param i2 the index in the (axsrc+2)%3 direction - * @param istart start index of the local block (as provided by @flups_get_istartGlob) - * @param symstart indexes where the symmetry starts (as provided by @flups_get_spectralInfo) + * @param istart start index of the local block (as provided by @ref flups_get_istartGlob) + * @param symstart indexes where the symmetry starts, i.e. the first index which is symmetrized (as provided by @ref flups_get_spectralInfo) * @param axtrg the FRI of the target topology, i.e. the way the memory is aligned in the current topology * @param is the spectral index */ @@ -248,7 +260,11 @@ static inline void flups_symID(const int axsrc, const int i0, const int i1, cons //============================================================================= /** - * @brief Create and returns a topology. + * @brief Creates and returns a topology. + * + * @warning Once specified, the fastest rotating inde defines the memory layout. + * We assume fortran memory layout, i.e. if the FRI is 2, the next dimension is 0 and the last one is 1. + * This is opposed to the C indexing: when the FRI is 2, the next dimension is 1 and the last one is 0. * * @param axis The direction which is aligned with the fastest rotating index * @param nglob The global number of points in each direction of the domain @@ -292,7 +308,10 @@ int flups_topo_get_axis(const FLUPS_Topology* t); */ int flups_topo_get_nglob(const FLUPS_Topology* t, const int dim); /** - * @brief Determines the local number of points in the domain (on this process) in a given direction + * @brief Determines the local number of points in the domain (on this rank) in a given direction + * + * @warning due to some memory padding to ensure memory alignement for the FFTs, @ref flups_topo_get_nloc may + * not return the same result as @ref flups_topo_get_nmem * * @param t * @param dim @@ -300,7 +319,10 @@ int flups_topo_get_nglob(const FLUPS_Topology* t, const int dim); */ int flups_topo_get_nloc(const FLUPS_Topology* t, const int dim); /** - * @brief Determines the local memory usage per direction + * @brief Determines the local memory size per direction + * + * @warning due to some memory padding to ensure memory alignement for the FFTs, @ref flups_topo_get_nloc may + * not return the same result as @ref flups_topo_get_nmem * * @param t * @param dim @@ -324,21 +346,21 @@ int flups_topo_get_nproc(const FLUPS_Topology* t, const int dim); void flups_topo_get_istartGlob(const FLUPS_Topology* t, int istart[3]); /** - * @brief returns the local size of on this proc + * @brief returns the local size of on this rank, i.e. the number of unknowns in this rank * * @return long */ size_t flups_topo_get_locsize(const FLUPS_Topology* t); /** - * @brief returns the memory size of on this proc + * @brief returns the memory size of on this proc, i.e. the number of bytes in this proc, including padded memory * * @return long */ size_t flups_topo_get_memsize(const FLUPS_Topology* t); /** - * @brief returns the communicator of the topology + * @brief returns the MPI-communicator of the topology * * @param t the Topology of interest * @param comm the communicator @@ -364,21 +386,22 @@ MPI_Comm flups_topo_get_comm(FLUPS_Topology* t); */ FLUPS_Solver* flups_init(FLUPS_Topology* t, const FLUPS_BoundaryType bc[3][2], const double h[3], const double L[3]); /** - * @brief Same as @ref flups_init, with a profiler for the timing of the code (if compiled with PROF) + * @brief Same as @ref flups_init, with a profiler for the timing of the code (if compiled with PROF, if not, it will not use the profiler). * * @param prof */ FLUPS_Solver* flups_init_timed(FLUPS_Topology* t, const FLUPS_BoundaryType bc[3][2], const double h[3], const double L[3],FLUPS_Profiler* prof); /** - * @brief must be called before execution terminates + * @brief must be called before execution terminates as it frees the memory used by the solver * * @param s */ void flups_cleanup(FLUPS_Solver* s); /** - * @brief + * @brief sets the type of the Green's function used by the solver + * * @warning must be done before @ref flups_setup * * @param s @@ -387,14 +410,14 @@ void flups_cleanup(FLUPS_Solver* s); void flups_set_greenType(FLUPS_Solver* s, const FLUPS_GreenType type); /** - * @brief setup the solver + * @brief setup the solver and do the memory allocation * - * @warning after this call the solver cannot change anymore! + * @warning after this call the solver cannot been change anymore! * * @warning if changeComm is true, you need to update MPI rank based on the new communicator that is provided by @ref flups_topo_get_comm * * @param s - * @param changeComm indicate if FLUPS is allowed to change the communicator of the Topology used to initialize the solver (only if compiled with RORDER_RANKS) + * @param changeComm indicate if FLUPS is allowed to change the communicator of the Topology used to initialize the solver (only valid if compiled with RORDER_RANKS) * @return double* */ double* flups_setup(FLUPS_Solver* s,const bool changeComm); @@ -425,23 +448,80 @@ void flups_solve(FLUPS_Solver* s, double* field, double* rhs, const FLUPS_Solver */ /** - * @brief get the total amount of memory allocated by FLUPS + * @brief get the maximun amount of memory required by FLUPS * * @param s * @return size_t */ size_t flups_get_allocSize(FLUPS_Solver* s); +/** + * @brief get information required to compute the spectral mode associated with each spectral field entry + * + * The spectral mode in direction i is given by (index[i] + koffset[i])*kfact[i] + * + * @param s the FLUPS solver + * @param kfact returns the multiplication factor to used to get + * @param koffset returns the spectral offeset given the type of boundary condition used + * @param symstart the first point which is symmetrized, to use with @ref flups_symID + */ void flups_get_spectralInfo(FLUPS_Solver* s, double kfact[3], double koffset[3], double symstart[3]); +/** + * @brief while using Hejlesen kernels, set the alpha factor, i.e. the number of grid points in the smoothing Gaussian + * + * @param s + * @param alpha + */ void flups_set_alpha(FLUPS_Solver* s, const double alpha); //must be done before setup + +/** + * @brief sets the order of derivative while using divergence or rotational formulation + * + * @param s + * @param order + */ void flups_set_OrderDiff(FLUPS_Solver* s, const int order); //must be done before setup +/** + * @brief returns the physical topology, i.e. the one used for rhs and solution + * + * @param s + * @return const FLUPS_Topology* + */ const FLUPS_Topology* flups_get_innerTopo_physical(FLUPS_Solver* s); +/** + * @brief returns the spectral topology, i.e. the one which is fully spectral + * + * @param s + * @return const FLUPS_Topology* + */ const FLUPS_Topology* flups_get_innerTopo_spectral(FLUPS_Solver* s); +/** + * @brief do the copy from the data provided by the user to FLUPS owned data arrays + * + * @param s + * @param topo + * @param data + * @param sign + */ void flups_do_copy(FLUPS_Solver* s, const FLUPS_Topology* topo, double* data, const int sign); +/** + * @brief compute the FFT, go from the physical space to the spectral one + * + * @param s + * @param data + * @param sign + */ void flups_do_FFT(FLUPS_Solver* s, double* data, const int sign); +/** + * @brief compute the multiplication between the Green's function and the field + * + * @param s + * @param data + * @param type + */ void flups_do_mult(FLUPS_Solver* s, double* data, const FLUPS_SolverType type); /**@} */ @@ -453,10 +533,37 @@ void flups_do_mult(FLUPS_Solver* s, double* data, const FLUPS_SolverType type); * @{ */ +/** + * @brief create a timer using the default name "default". + * + * @return FLUPS_Profiler* + */ FLUPS_Profiler* flups_profiler_new(); +/** + * @brief create a timer with a name "name" + * + * @param name + * @return FLUPS_Profiler* + */ FLUPS_Profiler* flups_profiler_new_n(const char name[]); +/** + * @brief free the profiler created + * + * @param p + */ void flups_profiler_free(FLUPS_Profiler* p); +/** + * @brief display the profiler using the "root" as a reference + * + * @param p + */ void flups_profiler_disp_root(FLUPS_Profiler* p); +/** + * @brief display the profiler using "name" as reference + * + * @param p + * @param name + */ void flups_profiler_disp(FLUPS_Profiler* p,const char name[]); /**@} */ diff --git a/src/green_functions_3d.cpp b/src/green_functions.cpp similarity index 56% rename from src/green_functions_3d.cpp rename to src/green_functions.cpp index 7aa8b966..f81f778c 100644 --- a/src/green_functions_3d.cpp +++ b/src/green_functions.cpp @@ -1,5 +1,5 @@ /** - * @file green_functions_3d.cpp + * @file green_functions.cpp * @author Thomas Gillis and Denis-Gabriel Caprace * @copyright Copyright © UCLouvain 2019 * @@ -23,59 +23,17 @@ * */ -#include "green_functions_3d.hpp" - -// **Symmetry computation:** -// -// We have to take the symmetry around symstart. e.g. in X direction: `symstart[0] - (ix - symstart[0]) = 2 symstart[0] - ix` -// -// In some cases when we have an R2C transform, it ask for 2 additional doubles. -// The value is meaningless but we would like to avoid segfault and nan's. -// To do so, we use 2 tricks: -// - The `abs` is used to stay on the positivie side and hence avoid negative memory access -// - The `max` is used to prevent the computation of the value in 0, which is never used in the symmetry. -// -// As an example, the final formula is then ( in the X direction): -// `max( abs(2 symstart[0] - ix) , 1)` +#include "green_functions.hpp" +#include "green_kernels.hpp" /** * @brief generic type for Green kernel, takes a table of parameters that can be used depending on the kernel * */ -typedef double (*GreenKernel)(const void* ); - - -/** - * @name 3 directions unbounded - 0 direction spectral - * - * @{ - */ -// ----------------------------------------------------------- KERNELS ---------------------------------------------------------- -//notice that these function will likely not be inlined as we have a pointer to them... -static inline double _hej_2_3unb0spe(const void* params) { - double r = ((double*)params) [0]; - double eps = ((double*)params) [1]; - return c_1o4pi / r * (erf(r / eps * c_1osqrt2)); -} -static inline double _hej_4_3unb0spe(const void* params) { - double r = ((double*)params) [0]; - double eps = ((double*)params) [1]; - double rho = r / eps; - return c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (rho)*exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2)); -} -static inline double _hej_6_3unb0spe(const void* params) { - double r = ((double*)params) [0]; - double eps = ((double*)params) [1]; - double rho = r / eps; - return c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (c_7o4 * rho - c_1o4 * pow(rho, 3)) * exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2)); -} -static inline double _chat_2_3unb0spe(const void* params) { - double r = ((double*)params) [0]; - return c_1o4pi / r ; -} +typedef double (*GreenKernel)(const void*,const double*); /** - * @brief Compute the Green function for 3dirunbounded + * @brief Compute the Green function for 0 dir spectral (i.e. 3 dir unbounded or 2 dirunbounded) * * @param topo the topology associated to the Green's function * @param hfact the h multiplication factors @@ -83,9 +41,8 @@ static inline double _chat_2_3unb0spe(const void* params) { * @param green the Green function array * @param typeGreen the type of Green function * @param eps the smoothing length (only used for HEJ kernels) - * */ -void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double hfact[3], const double symstart[3], double *green, GreenType typeGreen, const double eps){ +void cmpt_Green_3dirunbounded(const Topology *topo, const double hfact[3], const double symstart[3], double *green, GreenType typeGreen, const double eps){ BEGIN_FUNC; FLUPS_CHECK(!(topo->isComplex()),"Green topology cannot been complex with 0 dir spectral", LOCATION); @@ -95,29 +52,37 @@ void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double FLUPS_CHECK(hfact[1] != 0.0, "grid spacing cannot be 0", LOCATION); FLUPS_CHECK(hfact[2] != 0.0, "grid spacing cannot be 0", LOCATION); - double G0; //value of G in 0 GreenKernel G; + double G0; //value of G in 0 + int GN = 0; + double *Gdata = NULL; + + //========================== 3D ================================= switch (typeGreen) { case HEJ_2: G = &_hej_2_3unb0spe; - G0 = M_SQRT2 / (4.0 * eps * sqrt(M_PI * M_PI * M_PI)); + G0 = - M_SQRT2 / (4.0 * eps * sqrt(M_PI * M_PI * M_PI)); break; case HEJ_4: G = &_hej_4_3unb0spe; - G0 = 3.0 * M_SQRT2 / (8.0 * eps * sqrt(M_PI * M_PI * M_PI)); + G0 = - 3.0 * M_SQRT2 / (8.0 * eps * sqrt(M_PI * M_PI * M_PI)); break; case HEJ_6: G = &_hej_6_3unb0spe; - G0 = 15.0 * M_SQRT2 / (32.0 * eps * sqrt(M_PI * M_PI * M_PI)); + G0 = - 15.0 * M_SQRT2 / (32.0 * eps * sqrt(M_PI * M_PI * M_PI)); break; case CHAT_2: G = &_chat_2_3unb0spe; - G0 = .5 * pow(1.5 * c_1o2pi * hfact[0] * hfact[1] * hfact[2], 2. / 3.); + G0 = - 0.5 * pow(1.5 * c_1o2pi * hfact[0] * hfact[1] * hfact[2], 2. / 3.); break; case LGF_2: - FLUPS_ERROR("Lattice Green Function not implemented yet.", LOCATION); - //please add the parameters you need to params + FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs", LOCATION); + FLUPS_CHECK(hfact[1] == hfact[2], "the grid has to be isotropic to use the LGFs", LOCATION); + // read the LGF data and store it + _lgf_readfile(3,&GN, &Gdata); + // associate the Green's function + G = &_lgf_2_3unb0spe; break; default: FLUPS_ERROR("Green Function type unknow.", LOCATION); @@ -149,95 +114,26 @@ void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double const double r2 = x0 * x0 + x1 * x1 + x2 * x2; const double r = sqrt(r2); - const double tmp[2] = {r, eps}; - green[id + i0 * nf] = -G(tmp); + // the first two arguments are used in standard kernels, the two zeros are for compatibility with the 2dirunbounded function, + // and the others 5 ones are aimed for LGFs only + const double tmp[9] = {r, eps, 0, 0, is[ax0], is[ax1], is[ax2], GN, hfact[ax0]}; + green[id + i0 * nf] = G(tmp,Gdata); } } } - // reset the value in 0.0 - if (istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) { - green[0] = -G0; + // reset the value in 0.0 but not for LGF's since we have already pre-computed its value + if (typeGreen != LGF_2 && istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) { + green[0] = G0; + } + // free Gdata if needed + if (Gdata != NULL) { + flups_free(Gdata); } - END_FUNC; -} -/**@} */ - - -/** - * @name 2 directions unbounded - 1 direction spectral - * - * @{ - */ -// ----------------------------------------------------------- KERNELS ---------------------------------------------------------- -static inline double _hej_2_2unb1spe_k0(const void* params) { - const double r = ((double*)params)[0]; - const double sig = ((double*)params)[2]; - - const double rho = r/sig; - const double rho2 = rho*rho; - // return -c_1o2pi * (log(r) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018] - return -c_1o2pi * (log(r) + .5 * expint_ei(rho2 / 2)); - // return -c_1o2pi * (.5*log(rho*.5) + .5 * expint_ei(rho2 / 2)); -} -static inline double _hej_2_2unb1spe_r0(const void* params) { - const double sig = ((double*)params)[2]; - - return c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig)); -} - -static inline double _hej_4_2unb1spe_k0(const void* params) { - const double r = ((double*)params) [0]; - const double sig = ((double*)params) [2]; - - const double rho = r/sig; - const double rho2 = rho*rho; - // return -c_1o2pi * (log(r) - (1 - .5 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018] - return -c_1o2pi * (log(r) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); - // return -c_1o2pi * (.5*log(rho2*.5) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); -} -static inline double _hej_4_2unb1spe_r0(const void* params) { - const double sig = ((double*)params)[2]; - - return c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .5); -} - -static inline double _hej_6_2unb1spe_k0(const void* params) { - const double r = ((double*)params) [0]; - const double sig = ((double*)params) [2]; - - const double rho = r/sig; - const double rho2 = rho*rho; - // return -c_1o2pi * (log(r) - (1 - rho2 + .125 * rho2 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018] - return -c_1o2pi * (log(r) - (.75 - .125 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); - // return -c_1o2pi * (.5*log(rho2*.5) - (.75 - .125 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); -} -static inline double _hej_6_2unb1spe_r0(const void* params) { - const double sig = ((double*)params)[2]; - return c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .75); -} -static inline double _zero(const void* params) { - return - 0.0; + END_FUNC; } -static inline double _chat_2_2unb1spe(const void* params) { - const double r = ((double*)params) [0]; - const double k = ((double*)params) [1]; - return c_1o2pi * besselk0(fabs(k) * r); -} -static inline double _chat_2_2unb1spe_r0(const void* params) { - const double k = ((double*)params) [1]; - const double r_eq2D = ((double*)params) [3]; - - return (1.0 - k * r_eq2D * besselk1(k * r_eq2D)) * c_1opi / ((k * r_eq2D) * (k * r_eq2D)); -} -static inline double _chat_2_2unb1spe_k0(const void* params) { - const double r = ((double*)params) [0]; - // const double sig = ((double*)params)[2]; - - return - c_1o2pi * log(r) ; //caution: mistake on the sign in [Chatelain2010] -} /** * @brief Compute the Green function for 2dirunbounded and 1dirspectral @@ -252,47 +148,52 @@ static inline double _chat_2_2unb1spe_k0(const void* params) { * @param green the Green function array * @param typeGreen the type of Green function * @param eps the smoothing length (only used for HEJ kernels) + * + * @warning For 3D kernels: According to [Spietz2018], we can obtain the **approximate** Green kernel by using the 2D unbounded kernel + for mode 0 in the spectral direction, and the rest of the Green kernel is the same as in full spectral. + We here fill with zero most part of Green data. Indeed, we are interested only in doing the FFT + of _hej_*_2unb1spe_k0 in the 2 remaining spatial directions. We will complete the Green function with the + full spectral part afterwards, while going through Solver::_cmptGreenFunction. + * */ -void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) { +void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) { BEGIN_FUNC; // assert that the green spacing and dk is not 0.0 - this is also a way to check that ax0 will be spectral, and the others are still to be transformed FLUPS_CHECK(kfact[0] != hfact[0], "grid spacing[0] cannot be = to dk[0]", LOCATION); FLUPS_CHECK(kfact[1] != hfact[1], "grid spacing[1] cannot be = to dk[1]", LOCATION); - FLUPS_CHECK(kfact[2] != hfact[2], "grid spacing[2] cannot be = to dk[2]", LOCATION); + // check that if hfact or kfact != 0, they are not the same + FLUPS_CHECK(!(kfact[2] == hfact[2] && (kfact[2]!= 0.0 || hfact[2] != 0.0)), "grid spacing[2] cannot be = to dk[2]", LOCATION); // @Todo For Helmolz, we need Green to be complex // FLUPS_CHECK(topo->isComplex(), "I can't fill a non complex topo with a complex green function.", LOCATION); // opt_double_ptr mygreen = green; //casting of the Green function to be able to access real and complex part //Implementation note: if you want to do Helmolz, you need Hankel functions (3rd order Bessel) which are not implemented in stdC. Consider the use of boost lib. //notice that bessel_k has been introduced in c++17 - + GreenKernel G; // the Green kernel (general expression in the whole domain) GreenKernel Gk0; // the Green kernel (particular expression in k=0) GreenKernel Gr0; // the Green kernel (particular expression in r=0) + int GN = 0; + double *Gdata = NULL; + switch (typeGreen) { case HEJ_2: - FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail a approximation.", LOCATION); - - // Note: - // According to [Spietz2018], we can obtain the **approximate** Green kernel by using the 2D unbounded kernel - // for mode 0 in the spectral direction, and the rest of the Green kernel is the same as in full spectral. - // We here fill with zero the greatest part of Green: we are actually interested only in doing the FFT - // of _hej_*_2unb1spe_k0 in the 2 remaining spatial directions. We will complete the Green function with the - // full spectral part afterwards. + FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail an approximation.", LOCATION); + // see warning in the function description G = &_zero; Gk0 = &_hej_2_2unb1spe_k0; Gr0 = &_hej_2_2unb1spe_r0; break; case HEJ_4: - // FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail a approximation."); + FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail an approximation.", LOCATION); G = &_zero; Gk0 = &_hej_4_2unb1spe_k0; Gr0 = &_hej_4_2unb1spe_r0; break; case HEJ_6: - // FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail a approximation."); + FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail an approximation.", LOCATION); G = &_zero; Gk0 = &_hej_6_2unb1spe_k0; Gr0 = &_hej_6_2unb1spe_r0; @@ -304,7 +205,13 @@ void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double // caution: the value of G in k=r=0 is specified at the end of this routine break; case LGF_2: - FLUPS_ERROR("Lattice Green Function not implemented yet.", LOCATION); + FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs", LOCATION); + // read the LGF data and store it + _lgf_readfile(2,&GN, &Gdata); + // associate the Green's function + G = &_zero; + Gk0 = &_lgf_2_2unb0spe; + Gr0 = &_lgf_2_2unb0spe; break; default: FLUPS_ERROR("Green Function type unknow.", LOCATION); @@ -323,19 +230,18 @@ void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double for (int i2 = 0; i2 < topo->nloc(ax2); i2++) { for (int i1 = 0; i1 < topo->nloc(ax1); i1++) { //local indexes start - const size_t id = localIndex(ax0,0, i1, i2, ax0, nmem,nf); - + const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, nf); + for (int i0 = 0; i0 < topo->nloc(ax0); i0++) { - // global indexes int is[3]; - cmpt_symID(ax0,i0,i1,i2,istart,symstart,0,is); + cmpt_symID(ax0, i0, i1, i2, istart, symstart, 0, is); // (symmetrized) wave number : only one kfact is non-zero const double k0 = (is[ax0] + koffset[ax0]) * kfact[ax0]; const double k1 = (is[ax1] + koffset[ax1]) * kfact[ax1]; const double k2 = (is[ax2] + koffset[ax2]) * kfact[ax2]; - const double k = k0 + k1 + k2; + const double k = k0 + k1 + k2; //(symmetrized) position : only one hfact is zero const double x0 = (is[ax0]) * hfact[ax0]; @@ -343,110 +249,32 @@ void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double const double x2 = (is[ax2]) * hfact[ax2]; const double r = sqrt(x0 * x0 + x1 * x1 + x2 * x2); - const double tmp[4] = {r, k, eps, r_eq2D}; + const double tmp[9] = {r, k, eps, r_eq2D, is[ax0], is[ax1], is[ax2], GN, hfact[ax0]}; // green function value // Implementation note: having a 'if' in a loop is highly discouraged... however, this is the init so we prefer having a // this routine with a high readability and lower efficency than the opposite. if (r <= (hfact[ax0] + hfact[ax1] + hfact[ax2]) * .2) { - green[id + i0 * topo->nf()] = -Gr0(tmp); + // we should enter this case for 2d and 3d cases + green[id + i0 * topo->nf()] = Gr0(tmp, Gdata); } else if (k <= (kfact[ax0] + kfact[ax1] + kfact[ax2]) * 0.2) { - green[id + i0 * topo->nf()] = -Gk0(tmp); + // we should always enter this routine for 2d case and sometimes for 3d cases + green[id + i0 * topo->nf()] = Gk0(tmp, Gdata); } else { - green[id + i0 * topo->nf()] = -G(tmp); + green[id + i0 * topo->nf()] = G(tmp, Gdata); } } } } - // reset the value in x=y=0.0 and k=0 - if (typeGreen == CHAT_2 && istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) { + // reset the value in x=y=0.0 and k=0 for singular expressions + if ((typeGreen == CHAT_2) && istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) { // green[0] = -2.0 * log(1 + sqrt(2)) * c_1opiE3o2 / r_eq2D; - green[0] = .25 * c_1o2pi * (M_PI - 6.0 + 2. * log(.5 * M_PI * r_eq2D)); //caution: mistake in [Chatelain2010] + green[0] = - 0.25 * c_1o2pi * (M_PI - 6.0 + 2.0 * log(0.5 * M_PI * r_eq2D)); //caution: mistake in [Chatelain2010] } END_FUNC; } -/**@} */ - - -/** - * @name 1 direction unbounded - 2 directions spectral - * - * @{ - */ -// ----------------------------------------------------------- KERNELS ---------------------------------------------------------- -static inline double _hej_2_1unb2spe(const void* params) { - const double r = ((double*)params) [0]; - const double k = ((double*)params) [1]; - const double sig = ((double*)params) [2]; - const double rho = r/sig; - const double s = k*sig; - - const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho)); - return .25 * sig / s * subfun ; -} -static inline double _hej_2_1unb2spe_k0(const void* params) { - const double r = ((double*)params) [0]; - const double sig = ((double*)params) [2]; - - const double rho = r/sig; - const double rosqrt2 = r*c_1osqrt2; - // return -.5* (r * erf(rosqrt2/sig) + (exp(-r*r/(2*sig*sig)) - 1.)*sig*M_SQRT2*c_1osqrtpi) ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] - return -.5* r * erf(rosqrt2/sig) + (1.-exp(-rho*rho*.5)) *sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] -} - -static inline double _hej_4_1unb2spe(const void* params) { - const double r = ((double*)params) [0]; - const double k = ((double*)params) [1]; - const double sig = ((double*)params) [2]; - - const double rho = r/sig; - const double s = k*sig; - const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho)); - return .25 * sig / s * subfun + \ - sig * M_SQRT2 * c_1osqrtpi * .25 * exp(-.5 * (s * s + rho * rho)); -} -static inline double _hej_4_1unb2spe_k0(const void* params) { - const double r = ((double*)params) [0]; - const double sig = ((double*)params) [2]; - - const double rho = r/sig; - const double rosqrt2 = r*c_1osqrt2; - return -.5* r * erf(rosqrt2/sig) + (1.-exp(-rho*rho*.5)) *.5*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] -} - -static inline double _hej_6_1unb2spe(const void* params) { - const double r = ((double*)params) [0]; - const double k = ((double*)params) [1]; - const double sig = ((double*)params) [2]; - - const double rho = r/sig; - const double s = k*sig; - const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho)); - return .25 * sig / s * subfun + \ - sig * M_SQRT2 * c_1osqrtpi * (c_5o16 + c_1o16 * (s * s - rho * rho)) * exp(-.5 * (s * s + rho * rho)); -} -static inline double _hej_6_1unb2spe_k0(const void* params) { - const double r = ((double*)params) [0]; - const double sig = ((double*)params) [2]; - - const double rho = r/sig; - const double rosqrt2 = r*c_1osqrt2; - return -.5* r * erf(rosqrt2/sig) + (3.-exp(-rho*rho*.5) * (rho*rho+3.) ) *.125*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] -} - -static inline double _chat_2_1unb2spe(const void* params) { - const double r = ((double*)params) [0]; - const double k = ((double*)params) [1]; - - return .5 * exp(-k * r) / k; -} -static inline double _chat_2_1unb2spe_k0(const void* params) { - const double r = ((double*)params) [0]; - - return -.5 * fabs(r); -} /** @@ -463,13 +291,14 @@ static inline double _chat_2_1unb2spe_k0(const void* params) { * @param typeGreen the type of Green function * @param eps the smoothing length (only used for HEJ kernels) */ -void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) { +void cmpt_Green_1dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) { BEGIN_FUNC; // assert that the green spacing and dk is not 0.0 - this is also a way to check that ax0 will be spectral, and the others are still to be transformed FLUPS_CHECK(kfact[0] != hfact[0], "grid spacing[0] cannot be = to dk[0]", LOCATION); FLUPS_CHECK(kfact[1] != hfact[1], "grid spacing[1] cannot be = to dk[1]", LOCATION); - FLUPS_CHECK(kfact[2] != hfact[2], "grid spacing[2] cannot be = to dk[2]", LOCATION); + // check that if hfact or kfact != 0, they are not the same + FLUPS_CHECK(!(kfact[2] == hfact[2] && (kfact[2]!= 0.0 || hfact[2] != 0.0)), "grid spacing[2] cannot be = to dk[2]", LOCATION); // @Todo For Helmolz, we need Green to be complex // FLUPS_CHECK(topo->isComplex(), "I can't fill a non complex topo with a complex green function.", LOCATION); @@ -537,10 +366,10 @@ void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double // Implementation note: having a 'if' in a loop is highly discouraged... however, this is the init so we prefer having a // this routine with a high readability and lower efficency than the opposite. if (k <= (kfact[ax0] + kfact[ax1] + kfact[ax2]) * 0.2) { - green[id + i0 * nf] = -G0(tmp); + green[id + i0 * nf] = G0(tmp,NULL); } else{ - green[id + i0 * nf] = -G(tmp); + green[id + i0 * nf] = G(tmp,NULL); } } } @@ -548,43 +377,6 @@ void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double END_FUNC; } -/**@} */ - - -/** - * @name 3 directions spectral - * - * @{ - */ -// ----------------------------------------------------------- KERNELS ---------------------------------------------------------- -static inline double _hej_2_0unb3spe(const void* params) { - const double ksqr = ((double*)params)[0]; - const double sig = ((double*)params)[1]; - - const double ssqr = ksqr * (sig * sig); - return exp(-ssqr / 2) / (ksqr); -} -static inline double _hej_4_0unb3spe(const void* params) { - const double ksqr = ((double*)params)[0]; - const double sig = ((double*)params)[1]; - - const double ssqr = ksqr * (sig * sig); - return (1 + ssqr / 2) * exp(-ssqr / 2) / (ksqr); -} -static inline double _hej_6_0unb3spe(const void* params) { - const double ksqr = ((double*)params)[0]; - const double sig = ((double*)params)[1]; - - const double ssqr = ksqr * (sig * sig); - return (1 + ssqr / 2 + ssqr * ssqr / 8) * exp(-ssqr / 2) / (ksqr); -} - -static inline double _chat_2_0unb3spe(const void* params) { - const double ksqr = ((double*)params) [0]; - - return 1 / ksqr; -} - /** * @brief Compute the Green function for 3dirspectral (in the whole spectral domain) * @@ -597,6 +389,7 @@ static inline double _chat_2_0unb3spe(const void* params) { * The wave number in each direction is obtained as k_i = (i_s + koffset_i) * kfact_i, where is the global (potentially symmetric) index. * * @param topo the topology associated to the Green's function + * @param hgrid the grid spacing h = hx = hy = hz, used only for the LGF * @param kfact the k multiplicative factor * @param koffset the k additive factor * @param symstart index of the symmetry in each direction @@ -604,11 +397,10 @@ static inline double _chat_2_0unb3spe(const void* params) { * @param typeGreen the type of Green function * @param eps the smoothing length (only used for HEJ kernels) */ -void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps){ - cmpt_Green_3D_0dirunbounded_3dirspectral(topo, kfact, koffset, symstart, green, typeGreen, eps, NULL, NULL); +void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) { + cmpt_Green_0dirunbounded(topo, hgrid, kfact, koffset, symstart, green, typeGreen, eps, NULL, NULL); } - /** * @brief Compute the Green function for 3dirspectral (in a portion of the spectral domain) * @@ -627,13 +419,13 @@ void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double * @param istart_custom global index where we start to fill data, in each dir. If NULL, we start at the beginning of the spectral space. * @param iend_custom global index where we end to fill data, in each dir. If NULL, we end at the end of the spectral space. */ -void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]){ +void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]) { BEGIN_FUNC; // assert that the green spacing is not 0.0 everywhere FLUPS_CHECK(kfact[0] != 0.0, "dk cannot be 0", LOCATION); FLUPS_CHECK(kfact[1] != 0.0, "dk cannot be 0", LOCATION); - FLUPS_CHECK(kfact[2] != 0.0, "dk cannot be 0", LOCATION); + // FLUPS_CHECK(kfact[2] != 0.0, "dk cannot be 0", LOCATION); GreenKernel G; // the Green kernel (general expression in the whole domain) @@ -651,7 +443,7 @@ void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double G = &_chat_2_0unb3spe; break; case LGF_2: - FLUPS_ERROR("Lattice Green Function not implemented yet.", LOCATION); + G = &_lgf_2_0unb3spe; break; default: FLUPS_ERROR("Green Function type unknow.", LOCATION); @@ -708,17 +500,18 @@ void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double // green function value const double ksqr = k0 * k0 + k1 * k1 + k2 * k2; - const double tmp[2] = {ksqr, eps}; + // const double tmp[2] = {ksqr, eps}; + const double tmp[6] = {ksqr, eps, k0, k1, k2, hgrid}; - green[id + i0 * nf] = -G(tmp); + green[id + i0 * nf] = G(tmp,NULL); } } } // reset the value in 0.0 if (istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0 \ && koffset[0]+koffset[1]+koffset[2]<0.2 ) { - green[0] = -0.0; + green[0] = 0.0; } END_FUNC; } -/**@} */ + diff --git a/src/green_functions.hpp b/src/green_functions.hpp new file mode 100644 index 00000000..85c276aa --- /dev/null +++ b/src/green_functions.hpp @@ -0,0 +1,83 @@ +/** + * @file green_functions.hpp + * @author Thomas Gillis and Denis-Gabriel Caprace + * @copyright Copyright © UCLouvain 2019 + * + * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers. + * + * Copyright (C) <2019> + * + * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE file. + * + * This program (FLUPS) is free software: + * you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (see COPYING file). If not, + * see . + * + */ + +#include "defines.hpp" +#include "Topology.hpp" +#include "bessel.hpp" +#include "expint.hpp" + +// define macros to strigyfy, both are required! +#define STR(a) ZSTR(a) +#define ZSTR(a) #a + + +void cmpt_Green_3dirunbounded(const Topology *topo, const double hfact[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); +void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); +void cmpt_Green_1dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); +void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid , const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); +void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid , const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]); + +/** + * @brief read the LGF file in the KERNEL_PATH folder + * + * @param [in] greendim the dimension of the Green function to use, 2D or 3D + * @param [out] N the size above which we switch to the approximation, i.e. the size of the pre-stored kernel is N^3 + * @param [out] data the data where we store the + */ +static void _lgf_readfile(const int greendim, int* N, double** data) { + BEGIN_FUNC; + + // some defined parameters: + char lgfname[512]; + char path[] = STR(KERNEL_PATH); + if (greendim == 3) { + (*N) = 64; + sprintf(lgfname, "%s/LGF_3d_sym_acc12_%d.ker", path, (*N)); + } else if (greendim == 2) { + (*N) = 32; + sprintf(lgfname, "%s/LGF_2d_sym_acc12_%d.ker", path, (*N)); + } else { + FLUPS_ERROR("Greendim = %d is not available in this version", greendim, LOCATION); + } + + // open the file + FILE *lgf_file = fopen(lgfname, "r"); + // display the information to the user + FLUPS_INFO_1("loading the LGF kernel function %s", lgfname); + + (*data) = NULL; + // start to read the file + if (lgf_file != NULL) { + // allocate the data + const int size = (*N) * (*N) * (*N); + (*data) = (double *)flups_malloc(sizeof(double) * size); + fread((*data), sizeof(double), size, lgf_file); + // close the file + fclose(lgf_file); + } else { + FLUPS_ERROR("unable to read file %s", lgfname, LOCATION); + } + END_FUNC; +} \ No newline at end of file diff --git a/src/green_functions_3d.hpp b/src/green_functions_3d.hpp deleted file mode 100644 index 3619e38a..00000000 --- a/src/green_functions_3d.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/** - * @file green_functions_3d.hpp - * @author Thomas Gillis and Denis-Gabriel Caprace - * @copyright Copyright © UCLouvain 2019 - * - * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers. - * - * Copyright (C) <2019> - * - * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE file. - * - * This program (FLUPS) is free software: - * you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (see COPYING file). If not, - * see . - * - */ - -#include "defines.hpp" -#include "Topology.hpp" -#include "bessel.hpp" -#include "expint.hpp" - -void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double hfact[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); -void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); -void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); -void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps); -void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]); diff --git a/src/green_kernels.hpp b/src/green_kernels.hpp new file mode 100644 index 00000000..af99bd0f --- /dev/null +++ b/src/green_kernels.hpp @@ -0,0 +1,368 @@ +/** + * @file green_kernels.hpp + * @author Thomas Gillis and Denis-Gabriel Caprace + * @brief defines the 3D Green functions kernels + * @version + * @date 2019-11-20 + * + * @copyright Copyright © UCLouvain 2019 + * + * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers. + * + * Copyright (C) <2019> + * + * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE file. + * + * This program (FLUPS) is free software: + * you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (see COPYING file). If not, + * see . + * + */ + +#include "defines.hpp" +#include "expint.hpp" + +/** + * @name 3 directions unbounded - 0 direction spectral + * + * @{ + */ +// ----------------------------------------------------------- 3D - KERNELS ---------------------------------------------------------- +//notice that these function will likely not be inlined as we have a pointer to them... +static inline double _hej_2_3unb0spe(const void* params,const double* data) { + double r = ((double*)params) [0]; + double eps = ((double*)params) [1]; + return -c_1o4pi / r * (erf(r / eps * c_1osqrt2)); +} +static inline double _hej_4_3unb0spe(const void* params,const double* data) { + double r = ((double*)params) [0]; + double eps = ((double*)params) [1]; + double rho = r / eps; + return -c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (rho)*exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2)); +} +static inline double _hej_6_3unb0spe(const void* params,const double* data) { + double r = ((double*)params) [0]; + double eps = ((double*)params) [1]; + double rho = r / eps; + return -c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (c_7o4 * rho - c_1o4 * pow(rho, 3)) * exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2)); +} +static inline double _chat_2_3unb0spe(const void* params,const double* data) { + double r = ((double*)params) [0]; + return -c_1o4pi / r ; +} +/** + * @brief LGF 3D + * + * @param params + * @param data + * @return double + */ +static inline double _lgf_2_3unb0spe(const void* params,const double* data) { + int ix = (int)((double*)params)[4]; + int iy = (int)((double*)params)[5]; + int iz = (int)((double*)params)[6]; + int N = (int)((double*)params)[7]; + double h = ((double*)params)[8]; + + // if the point is close enough, it will be already precomputed + double green; + if (ix < N && iy < N && iz < N) { + green = - data[ix + iy * N + iz * N * N]; + + } else { // if not, we use the extrapolation + const double rho = sqrt(ix * ix + iy * iy + iz * iz); + const double rho_2 = rho * rho; + const double oorho_6 = 1.0 / std::pow(rho, 6.0); + const double oorho_7 = 1.0 / std::pow(rho, 7.0); + // ix + const double ix_2 = std::pow(ix, 2.0); + const double ix_4 = std::pow(ix, 4.0); + const double ix_6 = std::pow(ix, 6.0); + const double ix_8 = std::pow(ix, 8.0); + const double ix_10 = std::pow(ix, 10.0); + const double ix_12 = std::pow(ix, 12.0); + // iy + const double iy_2 = std::pow(iy, 2.0); + const double iy_4 = std::pow(iy, 4.0); + const double iy_6 = std::pow(iy, 6.0); + const double iy_8 = std::pow(iy, 8.0); + const double iy_10 = std::pow(iy, 10.0); + const double iy_12 = std::pow(iy, 12.0); + //iz + const double iz_2 = std::pow(iz, 2.0); + const double iz_4 = std::pow(iz, 4.0); + const double iz_6 = std::pow(iz, 6.0); + const double iz_8 = std::pow(iz, 8.0); + const double iz_10 = std::pow(iz, 10.0); + const double iz_12 = std::pow(iz, 12.0); + + green = - c_1o4pi / rho \ + - 1.0/( 16.0 * M_PI) * (ix_4 + iy_4 + iz_4 - 3.0 * (ix_2 * iy_2 + iy_2 * iz_2 + ix_2 * iz_2)) * oorho_7 \ + - 1.0/( 128.0 * M_PI) * (23.0 * (ix_8 + iy_8 + iz_8) - 244.0 * (ix_6 * (iy_2 + iz_2) + iy_6 * (ix_2 + iz_2) + iz_6 * (ix_2 + iy_2)) - 228.0 * ix_2 * iy_2 * iz_2 * rho_2 + 621.0 * (ix_4 * iy_4 + ix_4 * iz_4 + iy_4 * iz_4)) * oorho_7 * oorho_6 \ + - 1.0/(2048.0 * M_PI) * (2588.0 * (ix_12 + iy_12 + iz_12) - 65676.0 * (ix_10 * iy_2 + ix_10 * iz_2 + ix_2 * iy_10 + iy_10 * iz_2 + ix_2 * iz_10 + iy_2 * iz_10) + 426144.0 * (ix_8 * iy_4 + ix_4 * iy_8 + ix_8 * iz_4 + iy_8 * iz_4 + ix_4 * iz_8 + iy_4 * iz_8) - 712884.0 * (ix_6 * iy_6 + iy_6 * iz_6 + ix_6 * iz_6) - 62892.0 * (ix_8 * iy_2 * iz_2 + ix_2 * iy_8 * iz_2 + ix_2 * iy_2 * iz_8) - 297876.0 * (ix_6 * iy_4 * iz_2 + ix_4 * iy_6 * iz_2 + ix_4 * iy_2 * iz_6 + ix_2 * iy_4 * iz_6 + ix_6 * iy_2 * iz_4 + ix_2 * iy_6 * iz_4) + 2507340.0 * ix_4 * iy_4 * iz_4) * oorho_7 * oorho_6 * oorho_6; + } + + return green/(h); +} +/** + * @brief LGF 2D + * + * @param params + * @param data + * @return double + */ +static inline double _lgf_2_2unb0spe(const void* params,const double* data) { + int ix = (int)((double*)params)[4]; + int iy = (int)((double*)params)[5]; + int iz = (int)((double*)params)[6]; + int N = (int)((double*)params)[7]; + + // if the point is close enough, it will be already precomputed + double green; + if (ix < N && iy < N && iz < N) { + green = - data[ix + iy * N]; + + } else { // if not, we use the extrapolation + const double rho = sqrt(ix * ix + iy * iy); + const double oorho_6 = 1.0 / std::pow(rho, 6.0); + // const double ix_1 = ix; + const double ix_2 = std::pow(ix, 2.0); + const double ix_4 = std::pow(ix, 4.0); + const double ix_6 = std::pow(ix, 6.0); + const double ix_8 = std::pow(ix, 8.0); + const double ix_10 = std::pow(ix, 10.0); + const double ix_12 = std::pow(ix, 12.0); + const double ix_14 = std::pow(ix, 14.0); + const double ix_16 = std::pow(ix, 16.0); + // const double iy_1 = iy; + const double iy_2 = std::pow(iy, 2.0); + const double iy_4 = std::pow(iy, 4.0); + const double iy_6 = std::pow(iy, 6.0); + const double iy_8 = std::pow(iy, 8.0); + const double iy_10 = std::pow(iy, 10.0); + const double iy_12 = std::pow(iy, 12.0); + const double iy_14 = std::pow(iy, 14.0); + const double iy_16 = std::pow(iy, 16.0); + + green = 1.0 / ( 2.0 * M_PI) * (log(rho) + c_gamma + log(8.0) * c_1o2)\ + - 1.0 / ( 24.0 * M_PI) * (ix_4 - 6.0 * ix_2 * iy_2 + iy_4) * oorho_6\ + - 1.0 / ( 480.0 * M_PI) * (43.0 * (ix_8 + iy_8) - 772.0 * (ix_6 * iy_2 + ix_2 * iy_6) + 1570.0 * ix_4 * iy_4) * oorho_6 * oorho_6\ + - 1.0 / (2016.0 * M_PI) * (609.0 * (ix_12 + iy_12) - 24234.0 * (ix_10 * iy_2 + ix_2 * iy_10) + 109935.0 * (ix_8 * iy_4 + ix_4 * iy_8) - 160524.0 * ix_6 * iy_6) * oorho_6 * oorho_6 * oorho_6\ + - 1.0 / (2880.0 * M_PI) * (63139.0 * (ix_16 + iy_16) - 4467336.0 * (ix_14 * iy_2 + ix_2 * iy_14) + 38334996.0 * (ix_12 * iy_4 + ix_4 * iy_12) - 98512568.0 * (ix_10 * iy_6 + ix_6 * iy_10) + 122747922.0 * ix_8 * iy_8) * oorho_6 * oorho_6 * oorho_6 * oorho_6; + } + return green; +} +/**@} */ + + +/** + * @name 2 directions unbounded - 1 direction spectral + * + * @{ + */ +// ----------------------------------------------------------- KERNELS ---------------------------------------------------------- +static inline double _hej_2_2unb1spe_k0(const void* params,const double* data) { + const double r = ((double*)params)[0]; + const double sig = ((double*)params)[2]; + + const double rho = r/sig; + const double rho2 = rho*rho; + // return -c_1o2pi * (log(r) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018] + // return -c_1o2pi * (log(r) + .5 * expint_ei(rho2 / 2.0)); + return c_1o2pi * (log(r) + 0.5 * expint_ei(rho2 * 0.5)); + // return -c_1o2pi * (.5*log(rho*.5) + .5 * expint_ei(rho2 / 2)); +} +static inline double _hej_2_2unb1spe_r0(const void* params,const double* data) { + const double sig = ((double*)params)[2]; + return -c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig)); +} + +static inline double _hej_4_2unb1spe_k0(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double rho2 = rho*rho; + // return -c_1o2pi * (log(r) - (1 - .5 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018] + // return -c_1o2pi * (log(r) - exp(-rho2 / 2.0) + .5 * expint_ei(rho2 / 2.0)); + return c_1o2pi * (log(r) - 0.5 * exp(-rho2 * 0.5) + 0.5 * expint_ei(rho2 * 0.5)); + // return -c_1o2pi * (.5*log(rho2*.5) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); +} +static inline double _hej_4_2unb1spe_r0(const void* params,const double* data) { + const double sig = ((double*)params)[2]; + + return -c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .5); +} + +static inline double _hej_6_2unb1spe_k0(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double rho2 = rho*rho; + // return -c_1o2pi * (log(r) - (1 - rho2 + .125 * rho2 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018] + return c_1o2pi * (log(r) - (0.75 - 0.125 * rho2) * exp(-rho2 * 0.5) + 0.5 * expint_ei(rho2 * 0.5)); + // return -c_1o2pi * (.5*log(rho2*.5) - (.75 - .125 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); +} +static inline double _hej_6_2unb1spe_r0(const void* params,const double* data) { + const double sig = ((double*)params)[2]; + + return -c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .75); +} +static inline double _zero(const void* params,const double* data) { + return 0.0; +} + +static inline double _chat_2_2unb1spe(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double k = ((double*)params) [1]; + + return -c_1o2pi * besselk0(fabs(k) * r); +} +static inline double _chat_2_2unb1spe_r0(const void* params,const double* data) { + const double k = ((double*)params) [1]; + const double r_eq2D = ((double*)params) [3]; + + return -(1.0 - k * r_eq2D * besselk1(k * r_eq2D)) * c_1opi / ((k * r_eq2D) * (k * r_eq2D)); +} +static inline double _chat_2_2unb1spe_k0(const void* params,const double* data) { + const double r = ((double*)params) [0]; + // const double sig = ((double*)params)[2]; + + return c_1o2pi * log(r) ; //caution: mistake on the sign in [Chatelain2010] +} + +/**@} */ + +/** + * @name 1 direction unbounded - 2 directions spectral + * + * @{ + */ +// ----------------------------------------------------------- KERNELS ---------------------------------------------------------- +static inline double _hej_2_1unb2spe(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double k = ((double*)params) [1]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double s = k*sig; + + const double subfun = s * rho > 100. ? 0.0 : ((1.0 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1.0 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho)); + return - .25 * sig / s * subfun ; +} +static inline double _hej_2_1unb2spe_k0(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double rosqrt2 = r*c_1osqrt2; + // return -.5* (r * erf(rosqrt2/sig) + (exp(-r*r/(2*sig*sig)) - 1.)*sig*M_SQRT2*c_1osqrtpi) ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] + return 0.5* r * erf(rosqrt2/sig) - (1.-exp(-rho*rho*.5)) *sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] +} + +static inline double _hej_4_1unb2spe(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double k = ((double*)params) [1]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double s = k*sig; + const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho)); + return - 0.25 * sig / s * subfun \ + - sig * M_SQRT2 * c_1osqrtpi * .25 * exp(-.5 * (s * s + rho * rho)); +} +static inline double _hej_4_1unb2spe_k0(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double rosqrt2 = r*c_1osqrt2; + return 0.5* r * erf(rosqrt2/sig) - (1.-exp(-rho*rho*.5)) *.5*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] +} + +static inline double _hej_6_1unb2spe(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double k = ((double*)params) [1]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double s = k*sig; + const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho)); + return - 0.25 * sig / s * subfun \ + - sig * M_SQRT2 * c_1osqrtpi * (c_5o16 + c_1o16 * (s * s - rho * rho)) * exp(-.5 * (s * s + rho * rho)); +} +static inline double _hej_6_1unb2spe_k0(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double sig = ((double*)params) [2]; + + const double rho = r/sig; + const double rosqrt2 = r*c_1osqrt2; + return 0.5* r * erf(rosqrt2/sig) - (3.-exp(-rho*rho*.5) * (rho*rho+3.) ) *.125*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018] +} + +static inline double _chat_2_1unb2spe(const void* params,const double* data) { + const double r = ((double*)params) [0]; + const double k = ((double*)params) [1]; + + return -0.5 * exp(-k * r) / k; +} +static inline double _chat_2_1unb2spe_k0(const void* params,const double* data) { + const double r = ((double*)params) [0]; + + return 0.5 * fabs(r); +} + +/**@} */ + + +/** + * @name 3 directions spectral + * + * @{ + */ +// ----------------------------------------------------------- KERNELS ---------------------------------------------------------- +static inline double _hej_2_0unb3spe(const void* params,const double* data) { + const double ksqr = ((double*)params)[0]; + const double sig = ((double*)params)[1]; + + const double ssqr = ksqr * (sig * sig); + return - exp(-ssqr / 2.0) / (ksqr); +} +static inline double _hej_4_0unb3spe(const void* params,const double* data) { + const double ksqr = ((double*)params)[0]; + const double sig = ((double*)params)[1]; + + const double ssqr = ksqr * (sig * sig); + return - (1.0 + ssqr / 2.0) * exp(-ssqr / 2.0) / (ksqr); +} +static inline double _hej_6_0unb3spe(const void* params,const double* data) { + const double ksqr = ((double*)params)[0]; + const double sig = ((double*)params)[1]; + + const double ssqr = ksqr * (sig * sig); + return - (1.0 + ssqr / 2.0 + ssqr * ssqr / 8.0) * exp(-ssqr / 2.0) / (ksqr); +} + +static inline double _chat_2_0unb3spe(const void* params,const double* data) { + const double ksqr = ((double*)params) [0]; + + return - 1.0 / ksqr; +} +static inline double _lgf_2_0unb3spe(const void* params, const double* data) { + const double kx = ((double*)params)[2]; + const double ky = ((double*)params)[3]; + const double kz = ((double*)params)[4]; + const double h = ((double*)params)[5]; + + return - h * h / (4.0 * pow(sin(kx * h / 2.0), 2.0) + 4.0 * pow(sin(ky * h / 2.0), 2.0) + 4.0 * pow(sin(kz * h / 2.0), 2.0)); +} +/**@} */ \ No newline at end of file