diff --git a/.gitignore b/.gitignore
index ed98daa8..f214f65f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@ prof
 .vscode
 doc/html
 doc/latex
+*.d
+*.in
 *.x
 *.xlsx
 *.log
diff --git a/Makefile b/Makefile
index b29206a8..b59bb75a 100644
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,13 @@ ifneq (,$(findstring -DHAVE_METIS,$(CXXFLAGS)))
 	LIB+= -L$(METIS_LIB) -lmetis  -Wl,-rpath,$(METIS_LIB)
 endif
 
+#-----------------------------------------------------------------------------
+# LGF SPECIAL CASE
+# by default the LGF kernel data is installed in the include directory
+LGF_PATH=$(abspath $(PREFIX)/include)
+DEF += -DKERNEL_PATH=${LGF_PATH}
+LGF_DATA := $(wildcard $(PREFIX)/kernel/*.ker)
+
 #-----------------------------------------------------------------------------
 ## add the wanted folders - common folders
 SRC := $(notdir $(wildcard $(SRC_DIR)/*.cpp))
@@ -129,6 +136,7 @@ install_dynamic: lib_dynamic
 	@cp $(TARGET_LIB_A2A).so $(PREFIX)/lib
 	@cp $(TARGET_LIB_NB).so $(PREFIX)/lib
 	@cp $(API) $(PREFIX)/include
+	@cp $(LGF_DATA) $(PREFIX)/include
 
 install_static: lib_static 
 	@mkdir -p $(PREFIX)/lib
@@ -136,6 +144,7 @@ install_static: lib_static
 	@cp $(TARGET_LIB_A2A).a $(PREFIX)/lib
 	@cp $(TARGET_LIB_NB).a $(PREFIX)/lib
 	@cp $(API) $(PREFIX)/include
+	@cp $(LGF_DATA) $(PREFIX)/include
 
 # for a standard installation, do the dynamic link	
 install: info install_static
@@ -163,6 +172,7 @@ info: logo
 	$(info compil. flags = $(CXXFLAGS) $(INC) $(DEF) -fPIC -MMD)
 	$(info linker flags = -shared $(LDFLAGS))
 	$(info using arch file = $(ARCH_FILE) )
+	$(info LGF path = $(LGF_PATH) )
 	$(info ------------)
 	$(info FFTW:)
 	$(info - include: -I$(FFTW_INC) )
@@ -177,6 +187,7 @@ info: logo
 	$(info - OBJ A2A = $(OBJ_A2A))
 	$(info - OBJ NB = $(OBJ_NB))
 	$(info - DEP = $(DEP))
+	$(info - LGF_DATA = $(LGF_DATA))
 	$(info ------------)
 
 .NOTPARALLEL: logo
diff --git a/README.md b/README.md
index 9731e216..dd9f05dc 100644
--- a/README.md
+++ b/README.md
@@ -15,13 +15,21 @@ For the list of all the contributors to the development of FLUPS, description an
 If you use FLUPS, please cite it as follows in your publications:
 - Caprace et al., **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2019 (under review)
 
+### Why should you use FLUPS?
+- You can solve the Poisson on rectangular and uniform distributed 2D/3D grids;
+- You can use any boundary conditions, including truly unbounded boundary conditions and semi-unbounded conditions
+- You can solve may times the same Poisson problem at low cost using precomputed Green's function and communication patterns;
+- You can use threads and/or MPI to fasten the execution;
+- You can use the build-in profiler to optimize the execution speed;
+- You can use any part of the library on its own, especially the pre-computed communications and the FFTs;
+- You can apply filters or do any computation you want while in the Fourier space.
 
 ### Installation
 
 FLUPS is a C++ library, with an API in C.
-The compilation of FLUPS was tested with Intel compilers and GCC.
+The compilation of FLUPS was tested with Intel compilers and GCC.,
 
-#### 1. Dependencies
+#### Dependencies
 First, you need to install the dependencies, typically using the following configuration commands (for the intel compilers)
 - FFTW (> v3.3.8) in the `fftw_prefix` dir:
 ```shell
@@ -31,9 +39,8 @@ CC=icc CXX=icpc FC=ifort ./configure --prefix=fftw_prefix --enable-mpi --enable-
 ```shell
 CC=mpiicc CXX=mpiicpc FC=mpif90 ./configure --prefix=hdf5_prefix --enable-build-mode=production --enable-parallel
 ```
-- METIS (> v5.1.0) - only if compiling with `REORDER_RANKS`
 
-#### 2. The Library
+#### Compilation
 You need now to create a architecture/compiler dependent file in `make_arch` to define `CXX`, `CXXFLAGS`, `FFTWDIR` and `HDF5DIR`.
 For example:
 ```makefile
@@ -57,15 +64,21 @@ HDF5_LIB := ${HDF5_DIR}/lib
 HDF5_INC := ${HDF5_DIR}/include
 ```
 By default, the Makefile is looking for `-lfftw3_openmp -lfftw3` and `-lhdf5`. You can overwrite this by changing the variable `FFTW_LIBNAME` and `HDF5_LIBNAME` in your arch file.
-
-Then you need to reference the created configuration file and the prefix you wish to :
-```shell
-export ARCH_FILE=make_arch/my_arch_dependent_file
+For example:
+```makefile
+FFTW_LIBNAME := -lfftw3_omp -lfftw3
+HDF5_LIBNAME := -lhdf5_openmpi
 ```
 
+Then you need to reference the created configuration file (using `ARCH_FILE`) and the prefix in you wish to install the library (using `PREFIX`).
+You can either `export` the variables or reference them later while calling the Makefile.
+If no prefix is given, `make install` uses the current working directory to install the library
+
 Finally, go to the main folder and type the compilation command.
-- Check the compilation details before doing the installation
+- Check the compilation details before doing the installation\
 ```shell
+export ARCH_FILE=make_arch/my_arch_dependent_file
+export PREFIX=/my/lib/prefix
 make info
 ## or
 ARCH_FILE=make_arch/my_arch_dependent_file PREFIX=/my/lib/prefix make info
@@ -77,12 +90,15 @@ make install
 ARCH_FILE=make_arch/my_arch_dependent_file PREFIX=/my/lib/prefix make install
 ```
 
-#### 3. Documentation
+:warning: you must **install** the library. Indeed, we copy some data required by the solver.
+If you wish to keep everything local, simply do not give a prefix and the current directory will be selected.
+
+#### Documentation
 
-The documentation is built with Doxygen.
-To build the documentation, please go to the `./doc` subfolder and type `doxygen`.
+The documentation is built using Doxygen.
+To build the documentation, go to the `./doc` subfolder and type `doxygen`.
 
-#### 4. Compilation flags
+#### Available compilation flags
 Here is an exhautstive list of the compilation flags that can be used to change the behavior of the code. To use `MY_FLAG`, simply add `-DMY_FLAG` to the variable `CXXFLAGS` in your `make_arch`.
 - `DUMP_DBG`: if specified, the solver will I/O fields using the HDF5 library.
 - `COMM_NONBLOCK`: if specified, the code will use the non-blocking communication pattern instead of the all to all version.
@@ -90,18 +106,43 @@ Here is an exhautstive list of the compilation flags that can be used to change
 - `NDEBUG`: use this flag to bypass various checks inside the library
 - `PROF`: allow you to use the build-in profiler to have a detailed view of the timing in each part of the solve. Make sure you have created a folder ```./prof``` next to your executable.
 - `REORDER_RANKS`: try to reorder the MPI ranks based on the precomputed communication graph, using call to MPI_Dist_graph. We recommend the use of this feature when the number of processes > 128 and the nodes are allocated exclusive for your application, especially on fully unbounded domains.
-- `HAVE_METIS`: in combination with REORDER_RANKS, use METIS instead of MPI_Dist_graph to partition the call graph based on the allocated ressources
+- `HAVE_METIS`: in combination with REORDER_RANKS, use METIS instead of MPI_Dist_graph to partition the call graph based on the allocated ressources. You must hence install metis for this functionality.
 
 :warning: You may also change the memory alignement and the FFTW planner flag in the `flups.h` file.
 
 ### How to use a solver?
 
 #### Detailed reference
+The scientific background of the library is explained in "Caprace et al., **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2019 (under review)".
 
-The scientific background of the library is explained in "Caprace et al., **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2019 (under review)"
+A detailed description of the API is provided in the documentation (@ref flups.h), as well as many implementation details.
+
+#### Memory layout
+In this project we choose to handle the memory in a **Fortran** way of doing even if we are in C/C++.
+So, the memory is aligned as a single row of size `n[0] * n[1] * n[2]`.
+The fastest rotating index is set to be `n[0]` then `n[1]` and finally `n[2]`.
+
+We have chosen this way of doing to reuse the 3D code in a 2D framework.
+Indeed having the last dimension in the slower rotating index does not penalize the loops writting.
 
-For the detailed specifications of the API, have a look at @ref flups.h .
+As an example, we here is how we access the memory
+
+```cpp
+double* data =(double*) flups_malloc(n[0] * n[1] * n[2] * sizeof(double));
+
+for(int iz=0; iz<n[2]; iz++){
+    for(int iy=0; iy<n[1]; iy++){
+        for(int ix=0; ix<n[0]; ix++){
+            // n[0] is the fastest rotating index
+            const int id = iz*n[1]*n[0] + iy * n[0] + ix;
+
+            data[id] = 1.0 ;
+        }
+    }
+}
 
+flups_free(data);
+```
 
 #### FLUPS in a nutshell
 To use the solver, you first need to create a topology
@@ -112,40 +153,39 @@ int  nproc[3]  = {2, 1, 3};      // 6 procs; 2 x 1 x 3
 bool isComplex = false;          // real data
 
 // no specific alignement => we put a value of 1
-Topology *topo = new Topology(axis, nglob, nproc, isComplex,NULL,1, MPI_COMM_WORLD);
+FLUPS_Topology *topo = flups_topo_new(axis, nglob, nproc, isComplex, NULL, 1, MPI_COMM_WORLD);
 
 // define additional quantities
 double L = {1.0, 2.0, 1.0};
 double h = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]};
 ```
 
-Then, you can define a new solver and it's boundary condition
+Then, you can define a new solver and its boundary condition
 ```cpp
 // define the solver
-const BoundaryType mybc[3][2] = {{UNB, UNB}, {EVEN, ODD}, {UNB, EVEN}};  // BC in X,Y,Z
-Solver *      mysolver   = new Solver(topo, mybc, h, L);
+const FLUPS_BoundaryType mybc[3][2] = {{UNB, UNB}, {EVEN, ODD}, {UNB, EVEN}};  // BC in X,Y,Z
+FLUPS_Solver *mysolver = flups_init(topo, mybc, h, L,prof);
 
 // setup the solver
-mysolver->set_GreenType(HEJ2);
-mysolver->setup(false);
+flups_set_greenType(mysolver,typeGreen);
+flups_setup(mysolver,false);
 ```
 
 To solve a field `rhs` that has been defined on the topology, use
 ```cpp
-mysolver->solve(rhs, rhs, SRHS);
+flups_solve(mysolver,rhs, rhs, SRHS);
 ```
 
-Then, destroy the solver
+Then, destroy the solver and the created topology
 ```
-delete (mysolver);
+flups_cleanup(mysolver);
+flups_topo_free(topo);
 ```
 
 #### Advanced usage
-
 Examples of usage of FLUPS in C programs are provided in the `./sample` subfolder.
 
 #### Memory footprint
-
 For the recommanded configuration of 128^3 unknowns per processor in full unbounded, we have measured the memory usage of FLUPS on a 2000 cores run:
 - the all to all version uses ~530Mb (O.253kB/unknown)
 - the non-blocking version uses ~560Mb (O.267kB/unknown)
@@ -157,8 +197,7 @@ For 1.5Go, max 168
 21*8 
 7*24-->
 
-**CAUTION**
-FLUPS was nerver tested above 1024^3 unknowns per core.
+:warning: FLUPS was nerver tested above 1024^3 unknowns per core.
 
 ### Implementation details and developers guide
 #### C++ use
@@ -166,8 +205,9 @@ We use the C++ language in a very limited way, on purpose.
 The features used are the object oriented layout and some usefull features of the standard library.
 
 #### Conventions
-
-- Put a ```BEGIN_FUNC;``` at the begining of each function
+- Put a ```BEGIN_FUNC;``` at the begining and a ```END_FUNC;``` at the end of each function
+- Use ```FLUPS_INFO``` for verbosity (several levels available), ```FLUPS_CHECK``` for assertions and ```FLUPS_ERROR``` for error management
+- Use ```flups_malloc``` and ```flups_free``` function to allocate/free memory
 - how to name an action? ```action_mySuperFunction``` where ```action``` = ```set```, ```get```, ```execute```, ```switch```, ```cmpt```
 - how to name a function? ```mySuperFunction```
 - how to name an class? ```MyClass```
@@ -183,31 +223,6 @@ Set then the value:
 
 Inspired from https://clang.llvm.org/docs/ClangFormatStyleOptions.html (*Configurable Format Style Options* section)
 
-#### Memory layout
-In this project we choose to handle the memory in a **Fortran** way of doing iven if we are in C/C++.
-So, the memory is aligned as a single row of size `n[0] * n[1] * n[2]`.
-The fastest rotating index is set to be `n[0]` then `n[1]` and finally `n[2]`.
-
-We have chosen this way of doing to reuse the 3D code in a 2D framework.
-Indeed having the last dimension in the slower rotating index does not penalize the loops writting.
-
-As an example, we here is how we access the memory
-
-```cpp
-double* data =(double*) flups_malloc(n[0] * n[1] * n[2] * sizeof(double));
-
-for(int iz=0; iz<n[2]; iz++){
-    for(int iy=0; iy<n[1]; iy++){
-        for(int ix=0; ix<n[0]; ix++){
-            // n[0] is the fastest rotating index
-            const int id = iz*n[1]*n[0] + iy * n[0] + ix;
-
-            data[id] = 1.0 ;
-        }
-    }
-}
-```
-
 #### Debugging
 
 FLUPS can be compiled with different levels of verbosity. The following compilation flags are accepted:
diff --git a/kernel/LGF_2d_sym_acc12_32.ker b/kernel/LGF_2d_sym_acc12_32.ker
new file mode 100644
index 00000000..d0f5f8e1
Binary files /dev/null and b/kernel/LGF_2d_sym_acc12_32.ker differ
diff --git a/kernel/LGF_3d_sym_acc12_64.ker b/kernel/LGF_3d_sym_acc12_64.ker
new file mode 100644
index 00000000..100589cd
Binary files /dev/null and b/kernel/LGF_3d_sym_acc12_64.ker differ
diff --git a/samples/compareP3DFFT++/main_compare++.cpp b/samples/compareP3DFFT++/main_compare++.cpp
index 92e7f829..098954bc 100644
--- a/samples/compareP3DFFT++/main_compare++.cpp
+++ b/samples/compareP3DFFT++/main_compare++.cpp
@@ -260,24 +260,24 @@ int main(int argc, char *argv[]) {
     // //-------------------------------------------------------------------------
     // /** - allocate rhs and solution */
     // //-------------------------------------------------------------------------
-    
-    printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2));
-    printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2));
-    printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2));
-    printf("[FLUPS] topo OUT loc  : nmem: %d*%d*%d nf:%d (nloc: %d %d %d)  \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2));
+    if(rank == 0) {
+        printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2));
+        printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2));
+        printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2));
+        printf("[FLUPS] topo OUT loc  : nmem: %d*%d*%d nf:%d (nloc: %d %d %d)  \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2));
 
 #ifndef SKIP_P3D
-    printf("[P3DFFT++] topo IN glob  : %d %d %d  \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]);
-    printf("[P3DFFT++] topo IN loc   : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]);
-    printf("[P3DFFT++] topo OUT glob : %d %d %d  \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]);
-    printf("[P3DFFT++] topo OUT loc  : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]);
+        printf("[P3DFFT++] topo IN glob  : %d %d %d  \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]);
+        printf("[P3DFFT++] topo IN loc   : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]);
+        printf("[P3DFFT++] topo OUT glob : %d %d %d  \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]);
+        printf("[P3DFFT++] topo OUT loc  : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]);
 #endif
 
-
-    printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT);
+        printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT);
 #ifndef SKIP_P3D    
-    printf("                        P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT);
+        printf("                        P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT);
 #endif
+    }
     
  
     double *rhsFLU   = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN);
diff --git a/samples/compareP3DFFT++/run/zenobe_kernel.sh b/samples/compareP3DFFT++/run/zenobe_kernel.sh
index 46b313d8..e6be3100 100755
--- a/samples/compareP3DFFT++/run/zenobe_kernel.sh
+++ b/samples/compareP3DFFT++/run/zenobe_kernel.sh
@@ -36,12 +36,6 @@ MY_SIZE_Z=$((${MY_SIZE}*${LZ}))
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE} -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE} -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID}
 
-echo "============================== FLUPS ONLY, WITHOUT METIS ==========================================" >> stdout_${PBS_JOBID}
-
-echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE} -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID}"
-mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXE}_noP3D -np ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -ni 100 >> stdout_${PBS_JOBID}
-
-
 ################## 
 echo "End time : " $(date)
 echo "----------------- Computation over, bye bye! ----"
diff --git a/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh b/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh
index 20411d77..5dc9913a 100755
--- a/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh
+++ b/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh
@@ -9,7 +9,7 @@ VER=a2a
 EXE=flups_vs_p3dfft++_${VER}
 
 ######### WEAK -> increase the number of CPU and the size
-SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_$VER
+SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_${VER}_V4
 
 # clean the validation dir
 # rm -rf ${SCRATCH}
@@ -18,7 +18,6 @@ mkdir -p $SCRATCH/data
 mkdir -p $SCRATCH/prof
 # copy the needed info
 cp $HOME_FLUPS/$EXE $SCRATCH
-cp $HOME_FLUPS/${EXE}_noP3D $SCRATCH
 cp $HOME_FLUPS/run/zenobe_kernel.sh $SCRATCH
 
 cd $SCRATCH
@@ -38,18 +37,22 @@ cd $SCRATCH
 # qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=64,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
 
 #####################   size = 128^3/proc  #################################
-# cpu = 64
+## CANNOT DO cpu=96,192,384... due to P3D !
+
+# cpu = 128 (-> actually allocating 144)
 # same on large
-qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=32:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=6:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
-# cpu = 256
-qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=64:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+# cpu = 256 (->264)
+qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=11:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
-# cpu = 512
-qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=128:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+# cpu = 512 (->528)
+qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=22:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
-# cpu = 1024
-qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+# cpu = 1024 (->1032)
+qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=43:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
+# cpu = 2048 (->2064)
+qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=64,LX=8,LY=16,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=86:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
 #end of file
diff --git a/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh b/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh
index a56f1f02..c78305f4 100755
--- a/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh
+++ b/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh
@@ -9,7 +9,7 @@ VER=nb
 EXE=flups_vs_p3dfft++_${VER}
 
 ######### WEAK -> increase the number of CPU and the size
-SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_$VER
+SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_${VER}_V3
 
 # clean the validation dir
 # rm -rf ${SCRATCH}
@@ -38,18 +38,22 @@ cd $SCRATCH
 # qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=64,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
 
 #####################   size = 128^3/proc  #################################
-# cpu = 64
+## CANNOT DO cpu=96,192,384... due to P3D !
+
+# cpu = 128 (-> actually allocating 144)
 # same on large
-qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=32:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=6:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
-# cpu = 256
-qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=64:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+# cpu = 256 (->264)
+qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=11:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
-# cpu = 512
-qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=128:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+# cpu = 512 (->528)
+qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=22:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
-# cpu = 1024
-qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh
+# cpu = 1024 (->1032)
+qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=43:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
+# cpu = 2048 (->2064)
+qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=64,LX=8,LY=16,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=86:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh
 
 #end of file
diff --git a/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt
index 228c963a..e19044b4 100644
--- a/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.879671993812e-16 1.110223024625e-15
+8 3.066390565529e-16 1.110223024625e-15
diff --git a/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt
index de54da40..492c1f29 100644
--- a/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000001_typeGreen=0.txt
@@ -1 +1 @@
-8 3.412318533331e-16 1.110223024625e-15
+8 3.852295714867e-16 1.332267629550e-15
diff --git a/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt
index 391b172f..13b8e85d 100644
--- a/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000010_typeGreen=0.txt
@@ -1 +1 @@
-8 5.541747624352e-16 2.109423746788e-15
+8 5.419249018646e-16 2.220446049250e-15
diff --git a/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt
index 1f0eae9c..c27ccb3b 100644
--- a/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000011_typeGreen=0.txt
@@ -1 +1 @@
-8 7.581356855986e-17 1.665334536938e-16
+8 9.720644932128e-17 2.220446049250e-16
diff --git a/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt
index 1f0eae9c..c27ccb3b 100644
--- a/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000033_typeGreen=0.txt
@@ -1 +1 @@
-8 7.581356855986e-17 1.665334536938e-16
+8 9.720644932128e-17 2.220446049250e-16
diff --git a/samples/validation/data_ref/validation_3d_000099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000099_typeGreen=0.txt
new file mode 100644
index 00000000..ade1e653
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_000099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.360116630811e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt
index e2160119..3b32bd6f 100644
--- a/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000100_typeGreen=0.txt
@@ -1 +1 @@
-8 2.741897992369e-16 9.992007221626e-16
+8 2.580705344185e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt
index ff702b7f..b2e87218 100644
--- a/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000101_typeGreen=0.txt
@@ -1 +1 @@
-8 2.880255821380e-16 8.881784197001e-16
+8 2.904773986610e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt
index 45bedcc0..e2e3ad7e 100644
--- a/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000110_typeGreen=0.txt
@@ -1 +1 @@
-8 5.022257820411e-16 2.109423746788e-15
+8 5.227751426413e-16 2.109423746788e-15
diff --git a/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt
index 6c92f22d..4ae4d18b 100644
--- a/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.383034061608e-16 3.330669073875e-16
+8 1.400043829337e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt
index 6c92f22d..4ae4d18b 100644
--- a/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_000133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.383034061608e-16 3.330669073875e-16
+8 1.400043829337e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_000199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000199_typeGreen=0.txt
new file mode 100644
index 00000000..8f9aa6b3
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_000199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 1.230079805695e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_000499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_000499_typeGreen=0.txt
new file mode 100644
index 00000000..0417ff66
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_000499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.978324264490e-02 1.215492793748e-01
diff --git a/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt
index c02a8d47..2da51f9c 100644
--- a/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.837726816918e-16 9.992007221626e-16
+8 2.319347045319e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt
index 58f2d574..4a0b5d87 100644
--- a/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001001_typeGreen=0.txt
@@ -1 +1 @@
-8 2.824889171655e-16 9.992007221626e-16
+8 2.915468233191e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt
index 323119eb..64da6deb 100644
--- a/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001010_typeGreen=0.txt
@@ -1 +1 @@
-8 3.493854481640e-16 1.443289932013e-15
+8 3.326123904568e-16 1.443289932013e-15
diff --git a/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt
index 6fd2d107..a50ebf6b 100644
--- a/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001011_typeGreen=0.txt
@@ -1 +1 @@
-8 1.184869787392e-16 3.053113317719e-16
+8 1.171869586735e-16 3.053113317719e-16
diff --git a/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt
index 6fd2d107..a50ebf6b 100644
--- a/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.184869787392e-16 3.053113317719e-16
+8 1.171869586735e-16 3.053113317719e-16
diff --git a/samples/validation/data_ref/validation_3d_001099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001099_typeGreen=0.txt
new file mode 100644
index 00000000..9d4c7391
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_001099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.559180168130e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt
index 55386e5e..5d1f87c1 100644
--- a/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001100_typeGreen=0.txt
@@ -1 +1 @@
-8 2.117464329924e-16 7.216449660064e-16
+8 2.506862993999e-16 9.436895709314e-16
diff --git a/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt
index e8c9e7fd..431cfec5 100644
--- a/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001101_typeGreen=0.txt
@@ -1 +1 @@
-8 2.346660992591e-16 8.881784197001e-16
+8 2.006729208194e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt
index 68ef1c69..96423adf 100644
--- a/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001110_typeGreen=0.txt
@@ -1 +1 @@
-8 2.606439386068e-16 9.992007221626e-16
+8 2.623438039626e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt
index 753eac20..9b9d370d 100644
--- a/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.291175088092e-16 3.330669073875e-16
+8 1.284632718366e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt
index 753eac20..9b9d370d 100644
--- a/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_001133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.291175088092e-16 3.330669073875e-16
+8 1.284632718366e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_001199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001199_typeGreen=0.txt
new file mode 100644
index 00000000..6e9f1441
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_001199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 1.666418386409e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_001499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_001499_typeGreen=0.txt
new file mode 100644
index 00000000..cc71831b
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_001499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.860935708545e-02 1.337508016338e-01
diff --git a/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt
index 3b80641e..5e6c393d 100644
--- a/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_003300_typeGreen=0.txt
@@ -1 +1 @@
-8 2.324124869550e-16 8.881784197001e-16
+8 4.249776009696e-16 1.276756478319e-15
diff --git a/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt
index 3abb847f..be4b0054 100644
--- a/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_003301_typeGreen=0.txt
@@ -1 +1 @@
-8 2.647136944320e-16 7.771561172376e-16
+8 2.731931856942e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt
index 64e046e4..8cf094f1 100644
--- a/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_003310_typeGreen=0.txt
@@ -1 +1 @@
-8 2.471325589817e-16 8.881784197001e-16
+8 2.635294617138e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt
index ae7ed52a..59a46df3 100644
--- a/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_003311_typeGreen=0.txt
@@ -1 +1 @@
-8 9.880312447293e-17 2.220446049250e-16
+8 9.745379437590e-17 2.220446049250e-16
diff --git a/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt
index ae7ed52a..59a46df3 100644
--- a/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_003333_typeGreen=0.txt
@@ -1 +1 @@
-8 9.880312447293e-17 2.220446049250e-16
+8 9.745379437590e-17 2.220446049250e-16
diff --git a/samples/validation/data_ref/validation_3d_003399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_003399_typeGreen=0.txt
new file mode 100644
index 00000000..962bfe94
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_003399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 1.015351299880e-15 2.109423746788e-15
diff --git a/samples/validation/data_ref/validation_3d_004099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_004099_typeGreen=0.txt
new file mode 100644
index 00000000..0417ff66
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_004099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.978324264490e-02 1.215492793748e-01
diff --git a/samples/validation/data_ref/validation_3d_004199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_004199_typeGreen=0.txt
new file mode 100644
index 00000000..cc71831b
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_004199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.860935708545e-02 1.337508016338e-01
diff --git a/samples/validation/data_ref/validation_3d_004499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_004499_typeGreen=0.txt
new file mode 100644
index 00000000..23e8aa48
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_004499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.223380168681e-02 9.249743986706e-02
diff --git a/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt
index 28873f1d..4fa1f9b9 100644
--- a/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.460575272916e-16 8.881784197001e-16
+8 2.746147771692e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt
index dbdf8e6d..5cf70871 100644
--- a/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010001_typeGreen=0.txt
@@ -1 +1 @@
-8 3.786553299301e-16 1.221245327088e-15
+8 2.284799278593e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt
index e97b4ee5..41b89059 100644
--- a/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010010_typeGreen=0.txt
@@ -1 +1 @@
-8 3.651152377896e-16 1.554312234475e-15
+8 4.130595809974e-16 1.554312234475e-15
diff --git a/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt
index e07c5669..6b52819e 100644
--- a/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010011_typeGreen=0.txt
@@ -1 +1 @@
-8 1.251858722303e-16 3.330669073875e-16
+8 1.485975713201e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt
index e07c5669..6b52819e 100644
--- a/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.251858722303e-16 3.330669073875e-16
+8 1.485975713201e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_010099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010099_typeGreen=0.txt
new file mode 100644
index 00000000..eea94cb4
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_010099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.729847094858e-16 7.216449660064e-16
diff --git a/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt
index 957d351c..afaec869 100644
--- a/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010100_typeGreen=0.txt
@@ -1 +1 @@
-8 1.714256197463e-16 6.106226635438e-16
+8 2.065930418964e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt
index 8eac4509..dab5cdbc 100644
--- a/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010101_typeGreen=0.txt
@@ -1 +1 @@
-8 3.375444239021e-16 1.221245327088e-15
+8 2.810036259783e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt
index 5ef9a0fc..09ddfd1e 100644
--- a/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010110_typeGreen=0.txt
@@ -1 +1 @@
-8 3.781711920843e-16 1.665334536938e-15
+8 3.807108354803e-16 1.665334536938e-15
diff --git a/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt
index c5baf099..19b11e9a 100644
--- a/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.380618125209e-16 4.440892098501e-16
+8 1.270840351296e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt
index c5baf099..19b11e9a 100644
--- a/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_010133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.380618125209e-16 4.440892098501e-16
+8 1.270840351296e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_010199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010199_typeGreen=0.txt
new file mode 100644
index 00000000..6b3f7dc4
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_010199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 1.740249526309e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_010499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_010499_typeGreen=0.txt
new file mode 100644
index 00000000..f3839c98
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_010499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.239982674130e-02 1.460032340803e-01
diff --git a/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt
index 1bc27be1..d325efaa 100644
--- a/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.463321950145e-16 8.881784197001e-16
+8 1.594821179868e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt
index 1168b450..23dacd93 100644
--- a/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011001_typeGreen=0.txt
@@ -1 +1 @@
-8 2.070035810573e-16 9.992007221626e-16
+8 2.415845289031e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt
index a729477a..895ac10c 100644
--- a/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011010_typeGreen=0.txt
@@ -1 +1 @@
-8 3.464774054186e-16 1.554312234475e-15
+8 3.633428620793e-16 1.443289932013e-15
diff --git a/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt
index 9b25beee..6d41e571 100644
--- a/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011011_typeGreen=0.txt
@@ -1 +1 @@
-8 1.883444858262e-16 4.440892098501e-16
+8 1.517128559027e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt
index 9b25beee..6d41e571 100644
--- a/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.883444858262e-16 4.440892098501e-16
+8 1.517128559027e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_011099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011099_typeGreen=0.txt
new file mode 100644
index 00000000..d9d5094d
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_011099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.494344651719e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt
index a460571a..b6fbf4d6 100644
--- a/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011100_typeGreen=0.txt
@@ -1 +1 @@
-8 1.738708762151e-16 6.106226635438e-16
+8 1.936485848748e-16 7.216449660064e-16
diff --git a/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt
index 9480fbfb..87b3ea7b 100644
--- a/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011101_typeGreen=0.txt
@@ -1 +1 @@
-8 2.241018778753e-16 9.992007221626e-16
+8 1.648270320622e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt
index 552c0225..946c4094 100644
--- a/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011110_typeGreen=0.txt
@@ -1 +1 @@
-8 2.285105617638e-16 9.992007221626e-16
+8 2.224172249971e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt
index ab57c1a4..86bda4bb 100644
--- a/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.981233446090e-16 5.551115123126e-16
+8 1.984875422410e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt
index ab57c1a4..86bda4bb 100644
--- a/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_011133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.981233446090e-16 5.551115123126e-16
+8 1.984875422410e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_011199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011199_typeGreen=0.txt
new file mode 100644
index 00000000..f084ef42
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_011199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.670798201198e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_011499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_011499_typeGreen=0.txt
new file mode 100644
index 00000000..c828ab08
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_011499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.200258829336e-02 1.509764206207e-01
diff --git a/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt
index bfc94ffb..bd1538be 100644
--- a/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_013300_typeGreen=0.txt
@@ -1 +1 @@
-8 2.016219626873e-16 7.216449660064e-16
+8 4.180582353684e-16 1.221245327088e-15
diff --git a/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt
index 19ac8a2f..343db07c 100644
--- a/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_013301_typeGreen=0.txt
@@ -1 +1 @@
-8 2.826868462581e-16 8.881784197001e-16
+8 2.362957789240e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt
index 78e3ffc1..da942b45 100644
--- a/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_013310_typeGreen=0.txt
@@ -1 +1 @@
-8 2.158074840743e-16 8.881784197001e-16
+8 2.800810052076e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt
index 70b6177a..4ca2b300 100644
--- a/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_013311_typeGreen=0.txt
@@ -1 +1 @@
-8 1.322036846889e-16 3.330669073875e-16
+8 1.270499915246e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt
index 70b6177a..4ca2b300 100644
--- a/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_013333_typeGreen=0.txt
@@ -1 +1 @@
-8 1.322036846889e-16 3.330669073875e-16
+8 1.270499915246e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_013399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_013399_typeGreen=0.txt
new file mode 100644
index 00000000..880342c3
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_013399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.301219577348e-16 1.332267629550e-15
diff --git a/samples/validation/data_ref/validation_3d_014099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_014099_typeGreen=0.txt
new file mode 100644
index 00000000..f3839c98
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_014099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.239982674130e-02 1.460032340803e-01
diff --git a/samples/validation/data_ref/validation_3d_014199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_014199_typeGreen=0.txt
new file mode 100644
index 00000000..c828ab08
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_014199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.200258829336e-02 1.509764206207e-01
diff --git a/samples/validation/data_ref/validation_3d_014499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_014499_typeGreen=0.txt
new file mode 100644
index 00000000..afab620e
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_014499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.506048052626e-02 1.040899031895e-01
diff --git a/samples/validation/data_ref/validation_3d_040099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040099_typeGreen=0.txt
new file mode 100644
index 00000000..3f75abfd
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_040099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.741303848805e-02 1.583004445949e-01
diff --git a/samples/validation/data_ref/validation_3d_040199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040199_typeGreen=0.txt
new file mode 100644
index 00000000..5256d4db
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_040199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.453121113062e-02 1.997372939511e-01
diff --git a/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt
index ecbb818b..106b1622 100644
--- a/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_040410_typeGreen=0.txt
@@ -1 +1 @@
-8 9.233949105056e-03 5.785860761320e-02
+8 9.233949105056e-03 5.785860761319e-02
diff --git a/samples/validation/data_ref/validation_3d_040499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_040499_typeGreen=0.txt
new file mode 100644
index 00000000..506ba9d2
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_040499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.051484886687e-01 2.005866243539e+00
diff --git a/samples/validation/data_ref/validation_3d_041099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_041099_typeGreen=0.txt
new file mode 100644
index 00000000..5256d4db
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_041099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.453121113062e-02 1.997372939511e-01
diff --git a/samples/validation/data_ref/validation_3d_041199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_041199_typeGreen=0.txt
new file mode 100644
index 00000000..3b67f047
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_041199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.796571614602e-02 2.268632740773e-01
diff --git a/samples/validation/data_ref/validation_3d_041499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_041499_typeGreen=0.txt
new file mode 100644
index 00000000..7c6fcb39
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_041499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_043399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_043399_typeGreen=0.txt
new file mode 100644
index 00000000..3b67f047
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_043399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.796571614602e-02 2.268632740773e-01
diff --git a/samples/validation/data_ref/validation_3d_044099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_044099_typeGreen=0.txt
new file mode 100644
index 00000000..506ba9d2
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_044099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.051484886687e-01 2.005866243539e+00
diff --git a/samples/validation/data_ref/validation_3d_044199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_044199_typeGreen=0.txt
new file mode 100644
index 00000000..7c6fcb39
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_044199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_044199_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_044199_typeGreen=1.txt
new file mode 100644
index 00000000..1d71a7b0
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_044199_typeGreen=1.txt
@@ -0,0 +1 @@
+17 5.516870695410e-03 3.245510815373e-02
diff --git a/samples/validation/data_ref/validation_3d_044499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_044499_typeGreen=0.txt
new file mode 100644
index 00000000..b52c1f22
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_044499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.102639526126e-01 1.513172916610e+00
diff --git a/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt
index 087e6489..c9e547e8 100644
--- a/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.248521341384e-16 8.881784197001e-16
+8 2.295284422793e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt
index c9422d2c..fbdec53e 100644
--- a/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100001_typeGreen=0.txt
@@ -1 +1 @@
-8 3.085055053406e-16 1.110223024625e-15
+8 3.487725898558e-16 1.221245327088e-15
diff --git a/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt
index e085c996..84c902bd 100644
--- a/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100010_typeGreen=0.txt
@@ -1 +1 @@
-8 4.112667330478e-16 1.776356839400e-15
+8 4.037770508257e-16 1.665334536938e-15
diff --git a/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt
index 1faf7787..575d4d26 100644
--- a/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100011_typeGreen=0.txt
@@ -1 +1 @@
-8 1.185940757741e-16 4.440892098501e-16
+8 1.042242595377e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt
index 1faf7787..575d4d26 100644
--- a/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.185940757741e-16 4.440892098501e-16
+8 1.042242595377e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_100099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100099_typeGreen=0.txt
new file mode 100644
index 00000000..0f01fcec
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_100099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 1.665402298195e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt
index 76679cd7..1a97b27f 100644
--- a/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100100_typeGreen=0.txt
@@ -1 +1 @@
-8 2.370175092121e-16 9.992007221626e-16
+8 2.487098902905e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt
index 625143fb..d0c9990c 100644
--- a/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100101_typeGreen=0.txt
@@ -1 +1 @@
-8 2.857673038043e-16 1.110223024625e-15
+8 3.446501259485e-16 1.221245327088e-15
diff --git a/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt
index af1fd558..041eaee7 100644
--- a/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100110_typeGreen=0.txt
@@ -1 +1 @@
-8 3.027825802011e-16 1.443289932013e-15
+8 2.981402106445e-16 1.221245327088e-15
diff --git a/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt
index 4e2e4283..f5d3807b 100644
--- a/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.143744394216e-16 3.330669073875e-16
+8 1.575522964835e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt
index 4e2e4283..f5d3807b 100644
--- a/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.143744394216e-16 3.330669073875e-16
+8 1.575522964835e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_100199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100199_typeGreen=0.txt
new file mode 100644
index 00000000..2d9578e7
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_100199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.005183188409e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt
index 452bd3f7..a9039708 100644
--- a/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_100440_typeGreen=0.txt
@@ -1 +1 @@
-8 6.901116042656e-03 5.203433555700e-02
+8 6.901116042656e-03 5.203433555701e-02
diff --git a/samples/validation/data_ref/validation_3d_100499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_100499_typeGreen=0.txt
new file mode 100644
index 00000000..f3839c98
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_100499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.239982674130e-02 1.460032340803e-01
diff --git a/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt
index b83ad742..3f4b8e9c 100644
--- a/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.115439972084e-16 8.881784197001e-16
+8 2.407414558747e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt
index 1f417e05..09f54508 100644
--- a/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101001_typeGreen=0.txt
@@ -1 +1 @@
-8 2.390377305867e-16 1.110223024625e-15
+8 2.517897300738e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt
index a9963574..067e1f83 100644
--- a/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101010_typeGreen=0.txt
@@ -1 +1 @@
-8 3.272278205610e-16 1.443289932013e-15
+8 3.055679946251e-16 1.221245327088e-15
diff --git a/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt
index 0fa97df7..83518696 100644
--- a/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101011_typeGreen=0.txt
@@ -1 +1 @@
-8 1.247458350191e-16 4.440892098501e-16
+8 1.469559156822e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt
index 0fa97df7..83518696 100644
--- a/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.247458350191e-16 4.440892098501e-16
+8 1.469559156822e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_101099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101099_typeGreen=0.txt
new file mode 100644
index 00000000..f01700dc
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_101099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.523039791531e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt
index 25a84091..13a78458 100644
--- a/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101100_typeGreen=0.txt
@@ -1 +1 @@
-8 1.724051917864e-16 6.106226635438e-16
+8 1.543809479869e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt
index 718acb43..75a95090 100644
--- a/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101101_typeGreen=0.txt
@@ -1 +1 @@
-8 2.086964319043e-16 6.661338147751e-16
+8 1.879473652148e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt
index 3c0a0538..148ac719 100644
--- a/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101110_typeGreen=0.txt
@@ -1 +1 @@
-8 2.837394475002e-16 1.110223024625e-15
+8 2.411452824717e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt
index c0d94a16..de85982b 100644
--- a/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.276289714672e-16 3.330669073875e-16
+8 1.650541757014e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt
index c0d94a16..de85982b 100644
--- a/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_101133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.276289714672e-16 3.330669073875e-16
+8 1.650541757014e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_101199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101199_typeGreen=0.txt
new file mode 100644
index 00000000..d0d7b55e
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_101199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.114442198303e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_101499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_101499_typeGreen=0.txt
new file mode 100644
index 00000000..c828ab08
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_101499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.200258829336e-02 1.509764206207e-01
diff --git a/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt
index 81b07c5f..dfaa3afe 100644
--- a/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_103300_typeGreen=0.txt
@@ -1 +1 @@
-8 1.824851077850e-16 6.106226635438e-16
+8 3.082139455477e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt
index 9851958c..0e00d15c 100644
--- a/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_103301_typeGreen=0.txt
@@ -1 +1 @@
-8 4.190767293333e-16 1.332267629550e-15
+8 2.483565619348e-16 1.110223024625e-15
diff --git a/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt
index f89a838f..b315b3da 100644
--- a/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_103310_typeGreen=0.txt
@@ -1 +1 @@
-8 3.320754177571e-16 1.221245327088e-15
+8 3.008633483925e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt
index 3e6c23ea..fd6aa38c 100644
--- a/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_103311_typeGreen=0.txt
@@ -1 +1 @@
-8 1.102743943057e-16 2.220446049250e-16
+8 1.238475936291e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt
index 3e6c23ea..fd6aa38c 100644
--- a/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_103333_typeGreen=0.txt
@@ -1 +1 @@
-8 1.102743943057e-16 2.220446049250e-16
+8 1.238475936291e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_103399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_103399_typeGreen=0.txt
new file mode 100644
index 00000000..ae3755c2
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_103399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.251687730649e-16 1.887379141863e-15
diff --git a/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt
index 452bd3f7..a9039708 100644
--- a/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_104004_typeGreen=0.txt
@@ -1 +1 @@
-8 6.901116042656e-03 5.203433555700e-02
+8 6.901116042656e-03 5.203433555701e-02
diff --git a/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt
index 452bd3f7..a9039708 100644
--- a/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_104040_typeGreen=0.txt
@@ -1 +1 @@
-8 6.901116042656e-03 5.203433555700e-02
+8 6.901116042656e-03 5.203433555701e-02
diff --git a/samples/validation/data_ref/validation_3d_104099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104099_typeGreen=0.txt
new file mode 100644
index 00000000..f3839c98
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_104099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.239982674130e-02 1.460032340803e-01
diff --git a/samples/validation/data_ref/validation_3d_104199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104199_typeGreen=0.txt
new file mode 100644
index 00000000..fe4993d8
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_104199_typeGreen=0.txt
@@ -0,0 +1,2 @@
+8 4.200258829336e-02 1.509764206207e-01
+17 8.486914331463e-03 2.887335804383e-02
diff --git a/samples/validation/data_ref/validation_3d_104499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_104499_typeGreen=0.txt
new file mode 100644
index 00000000..afab620e
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_104499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.506048052626e-02 1.040899031895e-01
diff --git a/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt
index 939dd6b9..71b0bf34 100644
--- a/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.408596549030e-16 8.881784197001e-16
+8 2.290475563610e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt
index 6c8f6319..c00cf188 100644
--- a/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110001_typeGreen=0.txt
@@ -1 +1 @@
-8 2.507388587558e-16 9.992007221626e-16
+8 2.352712468881e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt
index 9100b9c4..0ce490e0 100644
--- a/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110010_typeGreen=0.txt
@@ -1 +1 @@
-8 3.787315981161e-16 1.554312234475e-15
+8 3.338366326547e-16 1.221245327088e-15
diff --git a/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt
index 007b8a2d..a42248f9 100644
--- a/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110011_typeGreen=0.txt
@@ -1 +1 @@
-8 7.957068990635e-17 2.220446049250e-16
+8 9.231597913641e-17 2.775557561563e-16
diff --git a/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt
index 007b8a2d..a42248f9 100644
--- a/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110033_typeGreen=0.txt
@@ -1 +1 @@
-8 7.957068990635e-17 2.220446049250e-16
+8 9.231597913641e-17 2.775557561563e-16
diff --git a/samples/validation/data_ref/validation_3d_110099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110099_typeGreen=0.txt
new file mode 100644
index 00000000..94786cf6
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_110099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.325306069933e-16 6.106226635438e-16
diff --git a/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt
index 7e4bc989..d3250967 100644
--- a/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110100_typeGreen=0.txt
@@ -1 +1 @@
-8 2.285706986988e-16 8.881784197001e-16
+8 1.790517133086e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt
index 37674592..e5550db5 100644
--- a/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110101_typeGreen=0.txt
@@ -1 +1 @@
-8 2.289010105550e-16 9.992007221626e-16
+8 1.950118864312e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt
index f826b1e0..84d7cccd 100644
--- a/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110110_typeGreen=0.txt
@@ -1 +1 @@
-8 2.623586732967e-16 1.110223024625e-15
+8 3.606486436078e-16 1.443289932013e-15
diff --git a/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt
index 782d4470..ab0f95e8 100644
--- a/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110111_typeGreen=0.txt
@@ -1 +1 @@
-8 2.099284199178e-16 4.440892098501e-16
+8 2.156418977007e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt
index 782d4470..ab0f95e8 100644
--- a/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_110133_typeGreen=0.txt
@@ -1 +1 @@
-8 2.099284199178e-16 4.440892098501e-16
+8 2.156418977007e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_110199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110199_typeGreen=0.txt
new file mode 100644
index 00000000..08264a6c
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_110199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 1.856944304086e-16 4.996003610813e-16
diff --git a/samples/validation/data_ref/validation_3d_110499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_110499_typeGreen=0.txt
new file mode 100644
index 00000000..3f75abfd
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_110499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.741303848805e-02 1.583004445949e-01
diff --git a/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt
index 4df24d16..2ff77cf7 100644
--- a/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.072251583388e-16 6.661338147751e-16
+8 2.353091410498e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt
index 12f4ed3d..0ff34348 100644
--- a/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111001_typeGreen=0.txt
@@ -1 +1 @@
-8 2.515435231594e-16 9.992007221626e-16
+8 2.243585394794e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt
index 17ffef2c..9337689c 100644
--- a/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111010_typeGreen=0.txt
@@ -1 +1 @@
-8 2.654382863482e-16 9.992007221626e-16
+8 3.357334522952e-16 1.221245327088e-15
diff --git a/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt
index 1abccb91..61a6627e 100644
--- a/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111011_typeGreen=0.txt
@@ -1 +1 @@
-8 1.247644913917e-16 3.330669073875e-16
+8 1.264743095170e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt
index 1abccb91..61a6627e 100644
--- a/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.247644913917e-16 3.330669073875e-16
+8 1.264743095170e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_111099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111099_typeGreen=0.txt
new file mode 100644
index 00000000..b22def0e
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_111099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.451459512795e-16 6.106226635438e-16
diff --git a/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt
index d6099504..46a89ff8 100644
--- a/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111100_typeGreen=0.txt
@@ -1 +1 @@
-8 1.506065702201e-16 4.440892098501e-16
+8 1.361454041896e-16 4.996003610813e-16
diff --git a/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt
index 25065ea3..ef28fbe7 100644
--- a/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111101_typeGreen=0.txt
@@ -1 +1 @@
-8 1.430564401166e-16 6.661338147751e-16
+8 1.310056339657e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt
index 0309783e..7f38bae6 100644
--- a/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111110_typeGreen=0.txt
@@ -1 +1 @@
-8 2.036428399477e-16 7.771561172376e-16
+8 1.946209037840e-16 6.661338147751e-16
diff --git a/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt
index b5399251..3da8440c 100644
--- a/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.577740299644e-16 3.330669073875e-16
+8 1.582311261321e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt
index b5399251..3da8440c 100644
--- a/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_111133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.577740299644e-16 3.330669073875e-16
+8 1.582311261321e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_111199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111199_typeGreen=0.txt
new file mode 100644
index 00000000..574f9a99
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_111199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.231261729659e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_111499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_111499_typeGreen=0.txt
new file mode 100644
index 00000000..d8a97445
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_111499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.722214533751e-02 1.602958913385e-01
diff --git a/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt
index b6db15e0..17a86932 100644
--- a/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_113300_typeGreen=0.txt
@@ -1 +1 @@
-8 1.711628015844e-16 5.551115123126e-16
+8 1.857410152917e-16 7.216449660064e-16
diff --git a/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt
index cb99cdb7..06afceca 100644
--- a/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_113301_typeGreen=0.txt
@@ -1 +1 @@
-8 1.961000758343e-16 7.771561172376e-16
+8 1.893172848568e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt
index ee58c910..f3e59879 100644
--- a/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_113310_typeGreen=0.txt
@@ -1 +1 @@
-8 2.383249998992e-16 8.326672684689e-16
+8 2.364862414871e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt
index bded1cd8..3523a0cc 100644
--- a/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_113311_typeGreen=0.txt
@@ -1 +1 @@
-8 1.116709290277e-16 3.330669073875e-16
+8 1.152775633689e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt
index bded1cd8..3523a0cc 100644
--- a/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_113333_typeGreen=0.txt
@@ -1 +1 @@
-8 1.116709290277e-16 3.330669073875e-16
+8 1.152775633689e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_113399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_113399_typeGreen=0.txt
new file mode 100644
index 00000000..42029051
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_113399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.681568408891e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_114099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_114099_typeGreen=0.txt
new file mode 100644
index 00000000..3f75abfd
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_114099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.741303848805e-02 1.583004445949e-01
diff --git a/samples/validation/data_ref/validation_3d_114199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_114199_typeGreen=0.txt
new file mode 100644
index 00000000..d8a97445
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_114199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.722214533751e-02 1.602958913385e-01
diff --git a/samples/validation/data_ref/validation_3d_114499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_114499_typeGreen=0.txt
new file mode 100644
index 00000000..95f1fab1
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_114499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.992446487147e-02 1.135798757980e-01
diff --git a/samples/validation/data_ref/validation_3d_140099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_140099_typeGreen=0.txt
new file mode 100644
index 00000000..d8a97445
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_140099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.722214533751e-02 1.602958913385e-01
diff --git a/samples/validation/data_ref/validation_3d_140199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_140199_typeGreen=0.txt
new file mode 100644
index 00000000..dba36c18
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_140199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.443019074090e-02 2.006714310501e-01
diff --git a/samples/validation/data_ref/validation_3d_140499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_140499_typeGreen=0.txt
new file mode 100644
index 00000000..7c6fcb39
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_140499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_141099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141099_typeGreen=0.txt
new file mode 100644
index 00000000..dba36c18
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_141099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.443019074090e-02 2.006714310501e-01
diff --git a/samples/validation/data_ref/validation_3d_141199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141199_typeGreen=0.txt
new file mode 100644
index 00000000..530f3ce4
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_141199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.795591596726e-02 2.268934764004e-01
diff --git a/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt
index 35e11ec4..b962f696 100644
--- a/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_141410_typeGreen=0.txt
@@ -1 +1 @@
-8 9.257074536541e-03 5.787589076140e-02
+8 9.257074536541e-03 5.787589076139e-02
diff --git a/samples/validation/data_ref/validation_3d_141499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_141499_typeGreen=0.txt
new file mode 100644
index 00000000..20066e2a
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_141499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.043648693180e-01 1.982452423276e+00
diff --git a/samples/validation/data_ref/validation_3d_143399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_143399_typeGreen=0.txt
new file mode 100644
index 00000000..530f3ce4
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_143399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.795591596726e-02 2.268934764004e-01
diff --git a/samples/validation/data_ref/validation_3d_144099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144099_typeGreen=0.txt
new file mode 100644
index 00000000..7c6fcb39
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_144099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt
index 35e11ec4..b962f696 100644
--- a/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_144110_typeGreen=0.txt
@@ -1 +1 @@
-8 9.257074536541e-03 5.787589076140e-02
+8 9.257074536541e-03 5.787589076139e-02
diff --git a/samples/validation/data_ref/validation_3d_144199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144199_typeGreen=0.txt
new file mode 100644
index 00000000..20066e2a
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_144199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.043648693180e-01 1.982452423276e+00
diff --git a/samples/validation/data_ref/validation_3d_144499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_144499_typeGreen=0.txt
new file mode 100644
index 00000000..be85b941
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_144499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.100653287328e-01 1.507869829275e+00
diff --git a/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt
index 9068183c..3bd4ba14 100644
--- a/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330000_typeGreen=0.txt
@@ -1 +1 @@
-8 2.417264182289e-16 8.881784197001e-16
+8 2.224152959320e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt
index a9c20297..3d4ec07e 100644
--- a/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330001_typeGreen=0.txt
@@ -1 +1 @@
-8 5.004057813534e-16 1.332267629550e-15
+8 2.073204635671e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt
index 82501401..cec42b28 100644
--- a/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330010_typeGreen=0.txt
@@ -1 +1 @@
-8 4.222797590796e-16 1.665334536938e-15
+8 5.131031483671e-16 1.776356839400e-15
diff --git a/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt
index fb079dcb..f9511d88 100644
--- a/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330011_typeGreen=0.txt
@@ -1,2 +1,2 @@
-8 1.276643339166e-16 3.330669073875e-16
-16 1.706826512311e-16 7.771561172376e-16
+8 9.910722837601e-17 2.220446049250e-16
+16 1.629108020274e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt
index c3ff9285..5bfcf01d 100644
--- a/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.276643339166e-16 3.330669073875e-16
+8 9.910722837601e-17 2.220446049250e-16
diff --git a/samples/validation/data_ref/validation_3d_330099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330099_typeGreen=0.txt
new file mode 100644
index 00000000..19338cce
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_330099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 1.837169624052e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt
index 24624883..2aa404ad 100644
--- a/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330100_typeGreen=0.txt
@@ -1 +1 @@
-8 2.926288778510e-16 1.221245327088e-15
+8 1.730680789872e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt
index 38d63282..b1f14f10 100644
--- a/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330101_typeGreen=0.txt
@@ -1 +1 @@
-8 2.115034829798e-16 7.771561172376e-16
+8 2.285725259941e-16 1.110223024625e-15
diff --git a/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt
index eb39266d..0e4ac1c0 100644
--- a/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330110_typeGreen=0.txt
@@ -1 +1 @@
-8 2.994365074576e-16 1.221245327088e-15
+8 3.055033613695e-16 1.443289932013e-15
diff --git a/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt
index 2849122a..dd109706 100644
--- a/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.242508934651e-16 4.440892098501e-16
+8 1.391703464091e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt
index 2849122a..dd109706 100644
--- a/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_330133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.242508934651e-16 4.440892098501e-16
+8 1.391703464091e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_330199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330199_typeGreen=0.txt
new file mode 100644
index 00000000..d3679278
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_330199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.613943634531e-16 8.326672684689e-16
diff --git a/samples/validation/data_ref/validation_3d_330499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_330499_typeGreen=0.txt
new file mode 100644
index 00000000..3f75abfd
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_330499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.741303848805e-02 1.583004445949e-01
diff --git a/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt
index c285a991..0ca3a20e 100644
--- a/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331000_typeGreen=0.txt
@@ -1 +1 @@
-8 3.072060405836e-16 9.992007221626e-16
+8 3.302831470286e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt
index 03148b9a..3dde6644 100644
--- a/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331001_typeGreen=0.txt
@@ -1 +1 @@
-8 2.666179212385e-16 8.881784197001e-16
+8 2.035206327463e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt
index 511646d2..0aefd803 100644
--- a/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331010_typeGreen=0.txt
@@ -1 +1 @@
-8 3.910393537349e-16 1.554312234475e-15
+8 3.954947985543e-16 1.554312234475e-15
diff --git a/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt
index 80de8639..d7c1927f 100644
--- a/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331011_typeGreen=0.txt
@@ -1 +1 @@
-8 1.344802348586e-16 3.330669073875e-16
+8 1.241933593859e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt
index 80de8639..d7c1927f 100644
--- a/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331033_typeGreen=0.txt
@@ -1 +1 @@
-8 1.344802348586e-16 3.330669073875e-16
+8 1.241933593859e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_331099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331099_typeGreen=0.txt
new file mode 100644
index 00000000..45bfc424
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_331099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.932939822763e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt
index 61f42caa..18e8b4f8 100644
--- a/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331100_typeGreen=0.txt
@@ -1 +1 @@
-8 1.616134902723e-16 4.996003610813e-16
+8 1.640437657289e-16 4.996003610813e-16
diff --git a/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt
index da3ef3ea..ce78b3e7 100644
--- a/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331101_typeGreen=0.txt
@@ -1 +1 @@
-8 1.790463298367e-16 7.771561172376e-16
+8 1.655274575850e-16 7.771561172376e-16
diff --git a/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt
index cce39885..2c07a6b4 100644
--- a/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331110_typeGreen=0.txt
@@ -1 +1 @@
-8 1.435599953163e-16 6.661338147751e-16
+8 1.500012415547e-16 5.551115123126e-16
diff --git a/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt
index 2464e12f..8cfb5b71 100644
--- a/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331111_typeGreen=0.txt
@@ -1 +1 @@
-8 1.203853098659e-16 3.330669073875e-16
+8 1.380822452361e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt
index 2464e12f..8cfb5b71 100644
--- a/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_331133_typeGreen=0.txt
@@ -1 +1 @@
-8 1.203853098659e-16 3.330669073875e-16
+8 1.380822452361e-16 4.440892098501e-16
diff --git a/samples/validation/data_ref/validation_3d_331199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331199_typeGreen=0.txt
new file mode 100644
index 00000000..bacdbbfc
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_331199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 2.600002712460e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_331499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_331499_typeGreen=0.txt
new file mode 100644
index 00000000..d8a97445
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_331499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.722214533751e-02 1.602958913385e-01
diff --git a/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt
index 93a545bc..f5bb3df8 100644
--- a/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_333300_typeGreen=0.txt
@@ -1 +1 @@
-8 1.547554192428e-16 5.551115123126e-16
+8 1.826697591811e-16 6.106226635438e-16
diff --git a/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt
index 0d7fe36d..3ce4e810 100644
--- a/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_333301_typeGreen=0.txt
@@ -1 +1 @@
-8 6.083466602000e-16 1.332267629550e-15
+8 3.648200634147e-16 1.110223024625e-15
diff --git a/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt
index 1e5227ec..422f8ac0 100644
--- a/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_333310_typeGreen=0.txt
@@ -1 +1 @@
-8 6.819607188466e-16 1.887379141863e-15
+8 2.232551035605e-16 8.881784197001e-16
diff --git a/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt
index 2a3c4e36..a557424f 100644
--- a/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_333311_typeGreen=0.txt
@@ -1 +1 @@
-8 1.231531498259e-16 3.330669073875e-16
+8 1.279468816630e-16 3.330669073875e-16
diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt
index 80aaf1d6..4e03e797 100644
--- a/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=0.txt
@@ -1,2 +1,3 @@
-8 1.183686628522e-16 3.330669073875e-16
-16 2.023798911980e-16 9.992007221626e-16
+16 2.506968496639e-16 9.992007221626e-16
+8 1.181651052418e-16 3.330669073875e-16
+16 2.506968496639e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=1.txt
new file mode 100644
index 00000000..59c84cd2
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=1.txt
@@ -0,0 +1 @@
+16 6.461989693825e-02 1.171077243925e-01
diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=2.txt
new file mode 100644
index 00000000..07a61244
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=2.txt
@@ -0,0 +1 @@
+16 3.530094381085e-01 6.397430813834e-01
diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=3.txt
new file mode 100644
index 00000000..6af2669d
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=3.txt
@@ -0,0 +1 @@
+16 3.494862970899e-01 6.333582518347e-01
diff --git a/samples/validation/data_ref/validation_3d_333333_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_333333_typeGreen=4.txt
new file mode 100644
index 00000000..d30864c0
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333333_typeGreen=4.txt
@@ -0,0 +1 @@
+16 3.380767319311e-01 6.126812115521e-01
diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=0.txt
new file mode 100644
index 00000000..1ed2d0b8
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=0.txt
@@ -0,0 +1,2 @@
+16 2.355266459011e-16 6.661338147751e-16
+8 4.191000011073e-16 9.992007221626e-16
diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=1.txt
new file mode 100644
index 00000000..14d7dc72
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=1.txt
@@ -0,0 +1,2 @@
+16 2.238092546229e-02 4.055994167232e-02
+17 1.977047785451e-02 3.920432575338e-02
diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=2.txt
new file mode 100644
index 00000000..4fc1f137
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=2.txt
@@ -0,0 +1 @@
+16 3.930370609172e-01 7.122833366619e-01
diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=3.txt
new file mode 100644
index 00000000..4c1b4b4c
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=3.txt
@@ -0,0 +1 @@
+16 2.280867649289e-01 4.133513557039e-01
diff --git a/samples/validation/data_ref/validation_3d_333399_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_333399_typeGreen=4.txt
new file mode 100644
index 00000000..a4b43cc3
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_333399_typeGreen=4.txt
@@ -0,0 +1 @@
+16 1.008997206127e-01 1.828560123531e-01
diff --git a/samples/validation/data_ref/validation_3d_334099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334099_typeGreen=0.txt
new file mode 100644
index 00000000..3f75abfd
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_334099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.741303848805e-02 1.583004445949e-01
diff --git a/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt
index f0088cce..edb5f8a9 100644
--- a/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_334111_typeGreen=0.txt
@@ -1,2 +1,3 @@
 8 2.556243887413e-01 8.031750419599e-01
 17 4.413576773139e-02 2.044117431074e-01
+17 4.413576773139e-02 2.044117431074e-01
diff --git a/samples/validation/data_ref/validation_3d_334199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334199_typeGreen=0.txt
new file mode 100644
index 00000000..d8a97445
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_334199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.722214533751e-02 1.602958913385e-01
diff --git a/samples/validation/data_ref/validation_3d_334499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_334499_typeGreen=0.txt
new file mode 100644
index 00000000..95f1fab1
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_334499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.992446487147e-02 1.135798757980e-01
diff --git a/samples/validation/data_ref/validation_3d_400099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400099_typeGreen=0.txt
new file mode 100644
index 00000000..3f75abfd
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_400099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.741303848805e-02 1.583004445949e-01
diff --git a/samples/validation/data_ref/validation_3d_400199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400199_typeGreen=0.txt
new file mode 100644
index 00000000..5256d4db
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_400199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.453121113062e-02 1.997372939511e-01
diff --git a/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt
index ecbb818b..106b1622 100644
--- a/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_400410_typeGreen=0.txt
@@ -1 +1 @@
-8 9.233949105056e-03 5.785860761320e-02
+8 9.233949105056e-03 5.785860761319e-02
diff --git a/samples/validation/data_ref/validation_3d_400499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_400499_typeGreen=0.txt
new file mode 100644
index 00000000..506ba9d2
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_400499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.051484886687e-01 2.005866243539e+00
diff --git a/samples/validation/data_ref/validation_3d_401099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_401099_typeGreen=0.txt
new file mode 100644
index 00000000..5256d4db
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.453121113062e-02 1.997372939511e-01
diff --git a/samples/validation/data_ref/validation_3d_401199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_401199_typeGreen=0.txt
new file mode 100644
index 00000000..3b67f047
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.796571614602e-02 2.268632740773e-01
diff --git a/samples/validation/data_ref/validation_3d_401441_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_401441_typeGreen=1.txt
new file mode 100644
index 00000000..df2c8feb
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401441_typeGreen=1.txt
@@ -0,0 +1 @@
+16 3.000341400202e-03 4.344975347735e-02
diff --git a/samples/validation/data_ref/validation_3d_401444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_401444_typeGreen=1.txt
new file mode 100644
index 00000000..61ec2be0
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401444_typeGreen=1.txt
@@ -0,0 +1 @@
+16 3.015468995435e-03 4.222496564571e-02
diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=0.txt
new file mode 100644
index 00000000..55fc664a
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=0.txt
@@ -0,0 +1,2 @@
+16 9.439111015658e-02 5.894889259406e-01
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=1.txt
new file mode 100644
index 00000000..fcad07a2
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=1.txt
@@ -0,0 +1 @@
+16 6.256321865795e-03 3.781128583175e-02
diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=2.txt
new file mode 100644
index 00000000..dc8a911d
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=2.txt
@@ -0,0 +1 @@
+16 9.579721195958e-02 5.537345525604e-01
diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=3.txt
new file mode 100644
index 00000000..3f6d95a7
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=3.txt
@@ -0,0 +1 @@
+16 5.156406574464e-02 3.080377026025e-01
diff --git a/samples/validation/data_ref/validation_3d_401499_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_401499_typeGreen=4.txt
new file mode 100644
index 00000000..1f9d9051
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_401499_typeGreen=4.txt
@@ -0,0 +1 @@
+16 2.777470852546e-02 1.665997142499e-01
diff --git a/samples/validation/data_ref/validation_3d_403399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_403399_typeGreen=0.txt
new file mode 100644
index 00000000..3b67f047
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_403399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.796571614602e-02 2.268632740773e-01
diff --git a/samples/validation/data_ref/validation_3d_404099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_404099_typeGreen=0.txt
new file mode 100644
index 00000000..506ba9d2
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_404099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.051484886687e-01 2.005866243539e+00
diff --git a/samples/validation/data_ref/validation_3d_404199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_404199_typeGreen=0.txt
new file mode 100644
index 00000000..7c6fcb39
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_404199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_404499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_404499_typeGreen=0.txt
new file mode 100644
index 00000000..b52c1f22
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_404499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.102639526126e-01 1.513172916610e+00
diff --git a/samples/validation/data_ref/validation_3d_410099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_410099_typeGreen=0.txt
new file mode 100644
index 00000000..d8a97445
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_410099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.722214533751e-02 1.602958913385e-01
diff --git a/samples/validation/data_ref/validation_3d_410199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_410199_typeGreen=0.txt
new file mode 100644
index 00000000..dba36c18
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_410199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.443019074090e-02 2.006714310501e-01
diff --git a/samples/validation/data_ref/validation_3d_410499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_410499_typeGreen=0.txt
new file mode 100644
index 00000000..7c6fcb39
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_410499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_411099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_411099_typeGreen=0.txt
new file mode 100644
index 00000000..dba36c18
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_411099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.443019074090e-02 2.006714310501e-01
diff --git a/samples/validation/data_ref/validation_3d_411199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_411199_typeGreen=0.txt
new file mode 100644
index 00000000..530f3ce4
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_411199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.795591596726e-02 2.268934764004e-01
diff --git a/samples/validation/data_ref/validation_3d_411444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_411444_typeGreen=1.txt
new file mode 100644
index 00000000..0c995230
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_411444_typeGreen=1.txt
@@ -0,0 +1 @@
+16 2.989390717874e-03 4.161211731720e-02
diff --git a/samples/validation/data_ref/validation_3d_411499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_411499_typeGreen=0.txt
new file mode 100644
index 00000000..20066e2a
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_411499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.043648693180e-01 1.982452423276e+00
diff --git a/samples/validation/data_ref/validation_3d_413399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_413399_typeGreen=0.txt
new file mode 100644
index 00000000..530f3ce4
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_413399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.795591596726e-02 2.268934764004e-01
diff --git a/samples/validation/data_ref/validation_3d_414099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414099_typeGreen=0.txt
new file mode 100644
index 00000000..7c6fcb39
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_414099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.046385573823e-01 1.989076837382e+00
diff --git a/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt
index 35e11ec4..b962f696 100644
--- a/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_414110_typeGreen=0.txt
@@ -1 +1 @@
-8 9.257074536541e-03 5.787589076140e-02
+8 9.257074536541e-03 5.787589076139e-02
diff --git a/samples/validation/data_ref/validation_3d_414199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414199_typeGreen=0.txt
new file mode 100644
index 00000000..20066e2a
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_414199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.043648693180e-01 1.982452423276e+00
diff --git a/samples/validation/data_ref/validation_3d_414444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_414444_typeGreen=1.txt
new file mode 100644
index 00000000..6e6ffeef
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_414444_typeGreen=1.txt
@@ -0,0 +1 @@
+16 3.001897712375e-03 4.042950163304e-02
diff --git a/samples/validation/data_ref/validation_3d_414499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_414499_typeGreen=0.txt
new file mode 100644
index 00000000..be85b941
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_414499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.100653287328e-01 1.507869829275e+00
diff --git a/samples/validation/data_ref/validation_3d_440099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_440099_typeGreen=0.txt
new file mode 100644
index 00000000..95f1fab1
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_440099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 4.992446487147e-02 1.135798757980e-01
diff --git a/samples/validation/data_ref/validation_3d_440199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_440199_typeGreen=0.txt
new file mode 100644
index 00000000..43d8083f
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_440199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.680462636025e-02 1.465168649220e-01
diff --git a/samples/validation/data_ref/validation_3d_440444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_440444_typeGreen=1.txt
new file mode 100644
index 00000000..b3e5f03d
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_440444_typeGreen=1.txt
@@ -0,0 +1 @@
+16 3.033581271861e-03 4.102111411487e-02
diff --git a/samples/validation/data_ref/validation_3d_440499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_440499_typeGreen=0.txt
new file mode 100644
index 00000000..b52c1f22
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_440499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.102639526126e-01 1.513172916610e+00
diff --git a/samples/validation/data_ref/validation_3d_441099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_441099_typeGreen=0.txt
new file mode 100644
index 00000000..43d8083f
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_441099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 5.680462636025e-02 1.465168649220e-01
diff --git a/samples/validation/data_ref/validation_3d_441199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_441199_typeGreen=0.txt
new file mode 100644
index 00000000..f77e0f83
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_441199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.954437578025e-02 1.767108271899e-01
diff --git a/samples/validation/data_ref/validation_3d_441499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_441499_typeGreen=0.txt
new file mode 100644
index 00000000..be85b941
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_441499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.100653287328e-01 1.507869829275e+00
diff --git a/samples/validation/data_ref/validation_3d_443399_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_443399_typeGreen=0.txt
new file mode 100644
index 00000000..f77e0f83
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_443399_typeGreen=0.txt
@@ -0,0 +1 @@
+8 8.954437578025e-02 1.767108271899e-01
diff --git a/samples/validation/data_ref/validation_3d_443399_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_443399_typeGreen=3.txt
new file mode 100644
index 00000000..76ccd048
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_443399_typeGreen=3.txt
@@ -0,0 +1 @@
+17 1.299759222394e-01 4.610254128717e-01
diff --git a/samples/validation/data_ref/validation_3d_444099_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444099_typeGreen=0.txt
new file mode 100644
index 00000000..b52c1f22
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_444099_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.102639526126e-01 1.513172916610e+00
diff --git a/samples/validation/data_ref/validation_3d_444199_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444199_typeGreen=0.txt
new file mode 100644
index 00000000..be85b941
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_444199_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.100653287328e-01 1.507869829275e+00
diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt
index 2b2a9d88..2e40d19d 100644
--- a/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt
+++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=0.txt
@@ -1,3 +1,2 @@
 16 1.271883888163e-02 1.739957177053e-01
-32 3.225108970826e-03 5.041207253504e-02
-64 8.090864625062e-04 1.307047437423e-02
+8 4.884531403115e-02 3.680868204613e-01
diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=1.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=1.txt
new file mode 100644
index 00000000..7b571f13
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=1.txt
@@ -0,0 +1 @@
+16 3.017234157142e-03 3.927536957671e-02
diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt
index 84ad044e..66222a90 100644
--- a/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt
+++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=2.txt
@@ -1,3 +1 @@
 16 5.171085302138e-02 6.277588695484e-01
-32 2.213405736435e-02 3.301075527807e-01
-64 6.683903540655e-03 1.066411041943e-01
diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt
index 316b601f..0564224e 100644
--- a/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt
+++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=3.txt
@@ -1,3 +1 @@
 16 3.098524288986e-02 4.030347242545e-01
-32 5.612919251869e-03 9.135896990668e-02
-64 5.099579703039e-04 8.679550799164e-03
diff --git a/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt b/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt
index d2ba42ef..07a63c24 100644
--- a/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt
+++ b/samples/validation/data_ref/validation_3d_444444_typeGreen=4.txt
@@ -1,3 +1 @@
 16 1.814399512337e-02 2.443536829148e-01
-32 1.368220040893e-03 2.194920130034e-02
-64 3.781077087410e-05 5.800217852716e-04
diff --git a/samples/validation/data_ref/validation_3d_444499_typeGreen=0.txt b/samples/validation/data_ref/validation_3d_444499_typeGreen=0.txt
new file mode 100644
index 00000000..8969b7db
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_444499_typeGreen=0.txt
@@ -0,0 +1 @@
+8 3.157704605151e-01 1.123512937953e+00
diff --git a/samples/validation/run/juwels_kernel_valid.sh b/samples/validation/run/juwels_kernel_valid.sh
new file mode 100755
index 00000000..dd29e816
--- /dev/null
+++ b/samples/validation/run/juwels_kernel_valid.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Submission script for JUWELS
+#SBATCH --account=prpa79
+#SBATCH --job-name=scaling
+#
+#SBATCH --output=flups_%j.out
+#SBATCH --error=flups_%j.err
+
+export OMP_NUM_THREADS=${MY_NTHREADS}
+
+export I_MPI_DEBUG=+5
+
+echo "----------------- Load modules -----------------"
+module purge
+#module load Intel/2019.3.199-GCC-8.3.0
+#module load IntelMPI/2018.5.288
+#module load IntelMPI/2019.3.199
+
+module load intel-para/2019a
+
+# module use /p/software/juwels/otherstages/
+# module load Stages/Devel-2019a
+# module load Intel
+# module load IntelMPI/2019.6.RC20191024
+
+
+module load FFTW/3.3.8
+module load HDF5/1.10.5
+module load METIS/5.1.0
+module list
+
+#CHANGING COMMUNICATION METHOD, OTHERWISE MPI USE TOO MUCH MEMORY !
+export PSP_UCP=1
+export UCX_TLS=ud_mlx5,self,sm
+#export PSI_LOGGERDEBUG=1
+#export PSI_FORWARDERDEBUG=1
+
+echo "----------------- launching job -----------------"
+echo "launch command: srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0"
+
+srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0
+
+scontrol show job ${SLURM_JOB_ID}
+
+sacct --format="JobID,NCPUS,NNodes,Elapsed,MaxRSS,MaxVMSize,ExitCode" | grep "$SLURM_JOB_ID"
\ No newline at end of file
diff --git a/samples/validation/run/juwels_strongscaling.sh b/samples/validation/run/juwels_strongscaling.sh
new file mode 100755
index 00000000..1fce77d8
--- /dev/null
+++ b/samples/validation/run/juwels_strongscaling.sh
@@ -0,0 +1,534 @@
+#!/bin/sh
+
+HOME_FLUPS=/p/project/prpa79/flups/samples/validation
+KERNEL=juwels_kernel_valid.sh
+
+## fixed parameters
+export INITIAL_SIZE_X=1152
+export INITIAL_SIZE_Y=1152
+export INITIAL_SIZE_Z=1152
+
+export ver=$1
+
+if [[ -z $1 ]];
+then
+  echo "you must specify a version (small/large) as an argument"
+  exit 1
+else
+  echo "starting as $1"
+fi
+
+export nPerSwitch=1 #number of process per switch, unknown
+export SW_TIMEOUT=1440 #minutes to wait before releasing the constraint on switches
+
+
+if [ "$ver" = "small" ]; then
+############################################################
+############################################################
+############################################################
+#                           SMALL (<=4k)
+############################################################
+############################################################
+############################################################
+
+export PARTITION=batch
+
+############################################################
+# ALL TO ALL
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_a2a
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_a2a_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+#================== 1152 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=8
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=${PARTITION} ./${KERNEL}
+
+
+#================== 2304 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 4608 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+
+
+############################################################
+# NON-BLOCKING
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_nb
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_nb_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 1152 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=8
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 2304 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 4608 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+elif [ "$ver" = "large" ]; then
+
+############################################################
+############################################################
+############################################################
+#                           LARGE (>4k, <=18k)
+############################################################
+############################################################
+############################################################
+export PARTITION=batch
+
+
+# ############################################################
+# # ALL TO ALL
+# #-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_a2a
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_a2a_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 9216 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 18432 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+
+
+############################################################
+# NON-BLOCKING
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_nb
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_nb_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 9216 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --partition=large ./${KERNEL}
+
+
+#================== 18432 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+elif [ "$ver" = "Xlarge" ]; then
+
+############################################################
+############################################################
+############################################################
+#                     EXTRA  LARGE (>18k)
+############################################################
+############################################################
+############################################################
+export PARTITION=large
+
+# ############################################################
+# # ALL TO ALL
+# #-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_a2a
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_a2a_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 36,864 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 73,728 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=48
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+
+
+############################################################
+# NON-BLOCKING
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_nb
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_strong_nb_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 36,864 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 73,728  CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=48
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=1.0
+export L_Z=1.0
+#-- global size
+export SIZE_X=$INITIAL_SIZE_X
+export SIZE_Y=$INITIAL_SIZE_Y
+export SIZE_Z=$INITIAL_SIZE_Z
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+fi
diff --git a/samples/validation/run/juwels_weakscaling.sh b/samples/validation/run/juwels_weakscaling.sh
new file mode 100755
index 00000000..16b2d373
--- /dev/null
+++ b/samples/validation/run/juwels_weakscaling.sh
@@ -0,0 +1,531 @@
+#!/bin/sh
+
+HOME_FLUPS=/p/project/prpa79/flups/samples/validation
+KERNEL=juwels_kernel_valid.sh
+
+## fixed parameters
+export SIZE_PER_PROC=64
+
+export ver=$1
+
+if [[ -z $1 ]];
+then
+  echo "you must specify a version (small/large) as an argument"
+  exit 1
+else
+  echo "starting as $1"
+fi
+
+export nPerSwitch=1152 #number of process per switch
+export SW_TIMEOUT=1440 #minutes to wait before releasing the constraint on switches
+
+
+if [ "$ver" = "small" ]; then
+############################################################
+############################################################
+############################################################
+#                           SMALL (<=4k)
+############################################################
+############################################################
+############################################################
+export PARTITION=batch
+
+############################################################
+# ALL TO ALL
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_a2a
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_a2a_${SIZE_PER_PROC}_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+#================== 1152 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=8
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 2304 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 4608 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+
+
+############################################################
+# NON-BLOCKING
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_nb
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_nb_${SIZE_PER_PROC}_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 1152 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=8
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 2304 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=12
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 4608 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+elif [ "$ver" = "large" ]; then
+
+############################################################
+############################################################
+############################################################
+#                           LARGE (>4k, <=18k)
+############################################################
+############################################################
+############################################################
+export PARTITION=batch
+
+
+# ############################################################
+# # ALL TO ALL
+# #-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_a2a
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_a2a_${SIZE_PER_PROC}_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 9216 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 18432 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+
+
+############################################################
+# NON-BLOCKING
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_nb
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_nb_${SIZE_PER_PROC}_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 9216 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 18432 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+elif [ "$ver" = "Xlarge" ]; then
+
+############################################################
+############################################################
+############################################################
+#                     EXTRA  LARGE (>18k)
+############################################################
+############################################################
+############################################################
+export PARTITION=large
+
+# ############################################################
+# # ALL TO ALL
+# #-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_a2a
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_a2a_${SIZE_PER_PROC}_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 36,864 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 73,728 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=48
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+
+
+############################################################
+# NON-BLOCKING
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_nb
+
+SCRATCH=/p/scratch/prpa79/$(whoami)/flups_weak_nb_${SIZE_PER_PROC}_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/${KERNEL} $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 36,864 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+#================== 73,728 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=48
+export MY_NZ=48
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./${KERNEL}"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./${KERNEL}
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./${KERNEL}"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT}  --partition=${PARTITION}  ./${KERNEL}
+
+
+fi
diff --git a/samples/validation/run/marenostrum_kernel_valid.sh b/samples/validation/run/marenostrum_kernel_valid.sh
index 2bb21796..6d88bf10 100755
--- a/samples/validation/run/marenostrum_kernel_valid.sh
+++ b/samples/validation/run/marenostrum_kernel_valid.sh
@@ -1,11 +1,14 @@
 #!/bin/bash
 # Submission script for Marenostrum
 #SBATCH --job-name=scaling
-#SBATCH --time=00:10:00
 #
 #SBATCH --output=flups_%j.out
 #SBATCH --error=flups_%j.err
 #SBATCH --qos=prace
+#SBATCH --exclude=s07r2b[01-24]
+#--> one of these failed at FFTW plans alloc
+#SBATCH --exclude=s05r1b[01-24]
+#--> proc s05r1b16 gave invalid address or slot during writev
 
 export OMP_NUM_THREADS=${MY_NTHREADS}
 
@@ -22,3 +25,5 @@ echo "----------------- launching job -----------------"
 echo "launch command: srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0"
 
 srun --label ${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${SIZE_X} ${SIZE_Y} ${SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -ns 20 -k 0
+
+scontrol show job ${SLURM_JOB_ID}
\ No newline at end of file
diff --git a/samples/validation/run/marenostrum_weakscaling.sh b/samples/validation/run/marenostrum_weakscaling.sh
index f3c1501e..bc2dd256 100755
--- a/samples/validation/run/marenostrum_weakscaling.sh
+++ b/samples/validation/run/marenostrum_weakscaling.sh
@@ -1,21 +1,45 @@
 #!/bin/sh
 ## RM the previous validation dir
 
-HOME_FLUPS=/home/pr1ekp00/pr1ekp02/flups/samples/validation
+HOME_FLUPS=/home/pr1ekp00/$(whoami)/flups/samples/validation
 
 
 ## fixed parameters
 export SIZE_PER_PROC=128
 
+export ver=$1
+
+if [[ -z $1 ]];
+then
+  echo "you must specify a version (small/large) as an argument"
+  exit 1
+else
+  echo "starting as $1"
+fi
+
+export nPerSwitch=1152 #number of process per switch
+export SW_TIMEOUT=1440 #minutes to wait before releasing the constraint on switches
+
+
+if [ "$ver" = "small" ]; then
+############################################################
+############################################################
+############################################################
+#                           SMALL (<=4k)
+############################################################
+############################################################
+############################################################
+
+
 ############################################################
 # ALL TO ALL
 #-----------------------------------------------------------
 export EXEC_FLUPS=flups_validation_a2a
 
-SCRATCH=/gpfs/scratch/pr1ekp00/pr1ekp02/flups_weak_a2a_align16
+SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_a2a_${ver}
 
 # clean the validation dir
-rm -rf ${SCRATCH}
+# rm -rf ${SCRATCH}
 mkdir -p $SCRATCH
 mkdir -p $SCRATCH/data
 mkdir -p $SCRATCH/prof
@@ -26,8 +50,9 @@ cp $HOME_FLUPS/run/marenostrum_kernel_valid.sh $SCRATCH
 # go to it
 cd $SCRATCH
 
-
 #================== 1152 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
 #-- proc domain
 export MY_NX=8
 export MY_NY=12
@@ -42,11 +67,24 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
 export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
 #-- 1 thread
 export MY_NTHREADS=1
-export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ))
-sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
 
+# #-- 4 thread
+# export WT='00:20:00'
+# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} ")
+# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2")
+# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} / 2")
+# export MY_NTHREADS=4
+# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh
 
 #================== 2304 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
 #-- proc domain
 export MY_NX=16
 export MY_NY=12
@@ -61,8 +99,57 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
 export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
 #-- 1 thread
 export MY_NTHREADS=1
-export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ))
-sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+# #-- 4 thread
+# export WT='00:30:00'
+# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2")
+# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2")
+# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ")
+# export MY_NTHREADS=4
+# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh
+
+
+#================== 4608 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+# #-- 4 thread
+# export WT='00:40:00'
+# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2")
+# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2")
+# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ")
+# export MY_NTHREADS=4
+# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh
+
+
+
+
 
 
 ############################################################
@@ -70,10 +157,10 @@ sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel
 #-----------------------------------------------------------
 export EXEC_FLUPS=flups_validation_nb
 
-SCRATCH=/gpfs/scratch/pr1ekp00/pr1ekp02/flups_weak_nb_align16
+SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_nb_${ver}
 
 # clean the validation dir
-rm -rf ${SCRATCH}
+# rm -rf ${SCRATCH}
 mkdir -p $SCRATCH
 mkdir -p $SCRATCH/data
 mkdir -p $SCRATCH/prof
@@ -86,6 +173,8 @@ cd $SCRATCH
 
 
 #================== 1152 CPU's ================
+#-- requested walltime
+export WT='00:10:00'
 #-- proc domain
 export MY_NX=8
 export MY_NY=12
@@ -100,11 +189,24 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
 export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
 #-- 1 thread
 export MY_NTHREADS=1
-export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ))
-sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
 
+# #-- 4 thread
+# export WT='00:20:00'
+# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} ")
+# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2")
+# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} / 2")
+# export MY_NTHREADS=4
+# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh
 
 #================== 2304 CPU's ================
+#-- requested walltime
+export WT='00:15:00'
 #-- proc domain
 export MY_NX=16
 export MY_NY=12
@@ -119,5 +221,210 @@ export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
 export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
 #-- 1 thread
 export MY_NTHREADS=1
-export MY_NTASKS=$(($MY_NX*$MY_NY*$MY_NZ))
-sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} ./marenostrum_kernel_valid.sh
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+# #-- 4 thread
+# export WT='00:30:00'
+# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2")
+# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2")
+# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ")
+# export MY_NTHREADS=4
+# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh
+
+#================== 4608 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=12
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+# #-- 4 thread
+# export WT='00:40:00'
+# export MY_NX=$(bc<<< "scale=6 ; ${MY_NX} / 2")
+# export MY_NY=$(bc<<< "scale=6 ; ${MY_NY} / 2")
+# export MY_NZ=$(bc<<< "scale=6 ; ${MY_NZ} ")
+# export MY_NTHREADS=4
+# export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+# echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh"
+# sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} ./marenostrum_kernel_valid.sh
+
+
+elif [ "$ver" = "large" ]; then
+
+############################################################
+############################################################
+############################################################
+#                           LARGE (>4k, <=18k)
+############################################################
+############################################################
+############################################################
+
+
+
+# ############################################################
+# # ALL TO ALL
+# #-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_a2a
+
+SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_a2a_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/marenostrum_kernel_valid.sh $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 9216 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+
+#================== 18432 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+
+
+
+############################################################
+# NON-BLOCKING
+#-----------------------------------------------------------
+export EXEC_FLUPS=flups_validation_nb
+
+SCRATCH=/gpfs/scratch/pr1ekp00/$(whoami)/flups_weak_nb_${ver}
+
+# clean the validation dir
+# rm -rf ${SCRATCH}
+mkdir -p $SCRATCH
+mkdir -p $SCRATCH/data
+mkdir -p $SCRATCH/prof
+
+# copy the needed info
+cp $HOME_FLUPS/${EXEC_FLUPS} $SCRATCH
+cp $HOME_FLUPS/run/marenostrum_kernel_valid.sh $SCRATCH
+# go to it
+cd $SCRATCH
+
+
+#================== 9216 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=16
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+
+#================== 18432 CPU's ================
+#-- requested walltime
+export WT='00:20:00'
+#-- proc domain
+export MY_NX=32
+export MY_NY=24
+export MY_NZ=24
+#-- domain length
+export L_X=1.0
+export L_Y=$(bc<<< "scale=6 ; $MY_NY / $MY_NX")
+export L_Z=$(bc<<< "scale=6 ; $MY_NZ / $MY_NX")
+#-- global size
+export SIZE_X=$(($SIZE_PER_PROC*$MY_NX))
+export SIZE_Y=$(($SIZE_PER_PROC*$MY_NY))
+export SIZE_Z=$(($SIZE_PER_PROC*$MY_NZ))
+#-- 1 thread
+export MY_NTHREADS=1
+export MY_NTASKS=$(bc<<< "scale=0 ; ($MY_NX*$MY_NY*$MY_NZ)/1")
+export N_SWITCH=$(bc<<< "scale=0 ; $MY_NTASKS / $nPerSwitch") #CAUTION, THIS WORKS ONLY BECAUSE WE ALWAYS HAVE A MULTIPLE OF 24 SWITCHES
+echo "sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT}  -- ./marenostrum_kernel_valid.sh"
+sbatch --ntasks=${MY_NTASKS} --cpus-per-task=${MY_NTHREADS} --time=${WT} --switches=${N_SWITCH}@${SW_TIMEOUT} ./marenostrum_kernel_valid.sh
+
+
+elif [ "$ver" = "Xlarge" ]; then
+
+############################################################
+############################################################
+############################################################
+#                     EXTRA  LARGE (>18k)
+############################################################
+############################################################
+############################################################
+
+echo "must be done"
+
+fi
\ No newline at end of file
diff --git a/samples/validation/run/zenobe_convergence_a2a.sh b/samples/validation/run/zenobe_convergence_a2a.sh
index 1e183d41..d83e98c5 100755
--- a/samples/validation/run/zenobe_convergence_a2a.sh
+++ b/samples/validation/run/zenobe_convergence_a2a.sh
@@ -1,10 +1,12 @@
 #!/bin/sh
 ## RM the previous validation dir
 
-HOME_FLUPS=/home/acad/ucl-tfl/dcaprace/FLUPS/flups_green/samples/validation
+#HOME_FLUPS=/home/acad/ucl-tfl/dcaprace/FLUPS/flups_green/samples/validation
+HOME_FLUPS=/home/acad/ucl-tfl/tgillis/flups/samples/validation
 EXEC_FLUPS=flups_validation_a2a
 
-SCRATCH=/SCRATCH/acad/examples/dcaprace/flups_convergence_a2a
+#SCRATCH=/SCRATCH/acad/examples/dcaprace/flups_convergence_a2a
+SCRATCH=/SCRATCH/acad/examples/tgillis/flups_convergence_a2a
 
 # clean the validation dir
 rm -rf ${SCRATCH}
@@ -21,18 +23,22 @@ cd $SCRATCH
 
 ## 256 
 export MY_RES=256
-qsub -q main -v MY_NX=1,MY_NY=2,MY_NZ=2,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=1:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+#qsub -q main -v MY_NX=1,MY_NY=2,MY_NZ=2,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=1:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+qsub -q main -v MY_NX=2,MY_NY=2,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=1:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh
 
 ## 512 
 export MY_RES=512
-qsub -q main -v MY_NX=2,MY_NY=2,MY_NZ=4,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=4:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+#qsub -q main -v MY_NX=2,MY_NY=2,MY_NZ=4,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=4:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+qsub -q main -v MY_NX=4,MY_NY=4,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=4:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernelConv_a2a.sh
 
 ## 1024 
 export MY_RES=1024
-qsub -q large -v MY_NX=4,MY_NY=6,MY_NZ=8,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=8:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+#qsub -q large -v MY_NX=4,MY_NY=6,MY_NZ=8,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=8:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+qsub -q large -v MY_NX=16,MY_NY=12,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=8:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh
 
 ## 2048 
 export MY_RES=2048
-qsub -q large -v MY_NX=8,MY_NY=12,MY_NZ=16,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=64:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+#qsub -q large -v MY_NX=8,MY_NY=12,MY_NZ=16,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=64:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh
+qsub -q large -v MY_NX=32,MY_NY=48,MY_NZ=1,MY_SIZE=${MY_RES},MY_NTH=1,L_X=1,L_Y=1,L_Z=1 -l select=64:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernelConv_a2a.sh
 
 #end of file
diff --git a/samples/validation/run/zenobe_kernelConv_a2a.sh b/samples/validation/run/zenobe_kernelConv_a2a.sh
index 8b08fd8b..5eb2206e 100755
--- a/samples/validation/run/zenobe_kernelConv_a2a.sh
+++ b/samples/validation/run/zenobe_kernelConv_a2a.sh
@@ -3,7 +3,7 @@
 #PBS -N convergence
 #PBS -r y 
 #PBS -W group_list=examples
-#PBS -l walltime=00:10:00 
+#PBS -l walltime=00:20:00 
 
 exec > ${PBS_O_WORKDIR}/${PBS_JOBNAME}_${PBS_JOBID}.log 
 echo "------------------ Work dir --------------------" 
@@ -40,6 +40,8 @@ MY_SIZE_Z=$((${MY_SIZE} * ${L_Z}/${L_X}))
 ######################### -bc 4 4 4 4 4 4 ###########################
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 >> stdout_${PBS_JOBID}"
@@ -51,6 +53,8 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX}
 ######################### -bc 0 0 1 0 3 3 ###########################
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}
+#echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}"
+#mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 0 0 1 0 3 3 >> stdout_${PBS_JOBID}"
@@ -62,6 +66,8 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX}
 ######################### -bc 4 0 4 4 1 4 ###########################
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}
+# echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}"
+# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 4 0 4 4 1 4 >> stdout_${PBS_JOBID}"
@@ -73,6 +79,8 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX}
 ######################### -bc 3 3 4 4 4 4 ###########################
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}
+# echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}"
+# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}"
@@ -80,6 +88,60 @@ mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX}
 echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}"
 mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 3 3 4 4 4 4 >> stdout_${PBS_JOBID}
 
+###############################################################################################################################################################################################################################################
+#                      2D
+###############################################################################################################################################################################################################################################
+
+######################### -bc 4 4 4 4 9 9  ###########################
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 4 4 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 4 4 4 4 9 9>> stdout_${PBS_JOBID}
+
+
+######################### -bc 0 0 1 0 3 3 ###########################
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}
+# echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 0 0 1 0 9 9  >> stdout_${PBS_JOBID}"
+# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 0 0 1 0 9 9  >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 0 0 1 0 9 9  >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 0 0 1 0 9 9  >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 0 0 1 0 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 0 0 1 0 9 9  >> stdout_${PBS_JOBID}
+
+
+######################### -bc 4 0 4 4 1 4 ###########################
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 4 0 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 4 0 4 4 9 9  >> stdout_${PBS_JOBID}
+
+
+######################### -bc 3 3 4 4 4 4 ###########################
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 0 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 0 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}
+# echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 1 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}"
+# mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 1 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 2 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 2 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 3 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 3 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}
+echo "launching  mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} ${MY_SIZE_Z} -L ${L_X} ${L_Y} ${L_Z} -nres 1 -k 4 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}"
+mpirun -n ${NCPUS} -genv OMP_NUM_THREADS=${MY_NTH} ./${EXEC_FLUPS} -np ${MY_NX} ${MY_NY} ${MY_NZ} -res ${MY_SIZE_X} ${MY_SIZE_Y} 1 -L ${L_X} ${L_Y} 0 -nres 1 -k 4 -bc 3 3 4 4 9 9 >> stdout_${PBS_JOBID}
 
 
 ################## 
diff --git a/samples/validation/scripts/test_3D_bcs.py b/samples/validation/scripts/test_3D_bcs.py
index eecfc710..a3cc5ff9 100644
--- a/samples/validation/scripts/test_3D_bcs.py
+++ b/samples/validation/scripts/test_3D_bcs.py
@@ -15,15 +15,21 @@
 n_success = 0
 n_failure = 0
 
+BC2 = BC1.copy()
+BC2.append(["9","9"])
+
 i = 0
 for bcx in BC1 :
     for bcy in BC1 :
-        for bcz in BC1:
+        for bcz in BC2:
             i+=1
             code = bcx[0] + bcx[1] + bcy[0] + bcy[1] + bcz[0] + bcz[1]
 
             #Launching test
-            r = subprocess.run(["./flups_validation_nb"] + ["-res"] + ["8"] + ["8"] + ["8"] + ["-bc"] + bcx + bcy + bcz, capture_output=True)
+            if(bcz == ["9","9"]):
+                r = subprocess.run(["./flups_validation_nb"] + ["-res"] + ["8"] + ["8"] + ["1"] + ["-bc"] + bcx + bcy + bcz, capture_output=True)
+            else:
+                r = subprocess.run(["./flups_validation_nb"] + ["-res"] + ["8"] + ["8"] + ["8"] + ["-bc"] + bcx + bcy + bcz, capture_output=True)
             
             if r.returncode != 0 :
                 print("test %i (BCs : "%i + code + ") failed with error code ",r.returncode)
diff --git a/samples/validation/scripts/test_3D_kerns.py b/samples/validation/scripts/test_3D_kerns.py
index aaf9bc29..9a50aebd 100644
--- a/samples/validation/scripts/test_3D_kerns.py
+++ b/samples/validation/scripts/test_3D_kerns.py
@@ -8,9 +8,12 @@
         ["4","1","1","4","4","4"],
         ["4","1","4","4","4","4"],
         ["4","0","1","4","4","4"],
-        ["4","0","1","4","4","1"]]
+        ["4","0","1","4","4","1"],
+        ["3","3","3","3","3","3"],
+        ["4","0","1","4","9","9"],
+        ["3","3","3","3","9","9"]]
 
-Kernels = ['0','2','3','4']
+Kernels = ['0','1','2','3','4']
 
 #Running all combinations of bcs and all kernels
 n_success = 0
@@ -27,9 +30,15 @@
 
         code = ''.join(bcs)
 
-        # Launching test
-        #+ ["-oversubscribe"]
-        r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["-np"] + ["1"] + ["1"] + ["2"] + ["-k"] + [kern] + ["-res"] + ["16"] + ["16"] + ["16"] + ["-nres"] + ["1"] + ["-bc"] + bcs, capture_output=True)
+        # if kernel = LGF, we only do the unbounded, if not, we do everything
+        # if ((kern=='1' and (bcs==["4","4","4","4","4","4"] or bcs==["3","3","3","3","9","9"])) or (kern != '1') ):
+            # Launching test
+            #+ ["-oversubscribe"]
+        if(bcs[4:6] == ["9","9"]):
+            # print("kikouuu from "%i + code)
+            r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["-np"] + ["1"] + ["2"] + ["1"] + ["-k"] + [kern] + ["-res"] + ["16"] + ["16"] + ["1"] + ["-nres"] + ["1"] + ["-bc"] + bcs, capture_output=True)
+        else:
+            r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["-np"] + ["1"] + ["2"] + ["1"] + ["-k"] + [kern] + ["-res"] + ["16"] + ["16"] + ["16"] + ["-nres"] + ["1"] + ["-bc"] + bcs, capture_output=True)
         
         if r.returncode != 0 :
             print("test %i (BCs : "%i + code + "with kernel "+kern+") failed with error code ",r.returncode)
@@ -51,7 +60,8 @@
             print("test %i (BCs : "%i + code + " and k="+ kern+ ") failed with wrong values.")
             print("/!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ -- /!\ \n")
             n_failure += 1
-
+        # else:
+        #     print("test %i (BCs : "%i + code + " and k="+ kern+ ") does not apply")
 
 print("%i test succeed out of %i" % (n_success,n_success+n_failure))
 exit(n_failure)
diff --git a/samples/validation/src/main.cpp b/samples/validation/src/main.cpp
index d6d6eea0..c7ab630a 100644
--- a/samples/validation/src/main.cpp
+++ b/samples/validation/src/main.cpp
@@ -51,7 +51,7 @@ static void print_help(){
     printf(" --nresolution, -nres Nr :      Nr is the number of higher resolutions that will be tested, with a resolution (R * 2^[0:Nr-1])\n");
     printf(" --nsolve, -ns Ns :             Ns is the number of times each validation case will be run (for statistics on the profiler) \n");
     printf(" --length, -L Lx Ly Lz :        Lx,Ly,Lz is the dimension of the physical domain \n");
-    printf(" --kernel, -k [0,2-4]:            the Green kernel 0=CHAT2, 2=HEJ2, 3=HEJ4, 4=HEJ6 \n");
+    printf(" --kernel, -k [0-4]:            the Green kernel 0=CHAT2, 1=LGF2, 2=HEJ2, 3=HEJ4, 4=HEJ6 \n");
     printf(" --boundary-conditions, -bc     \n ");
     printf("     Bxl Bxr Byl Byr Bzl Bzr : the boundary conditions in x/y/z on each side l/r. 0=EVEN, 1=ODD, 3=PERiodic, 4=UNBounded \n");
     printf(" --predefined-test, -pt :       runs a predefined validation test with several combination of UNB BCs and all the Green Kernels (excludes -L, -k and -bc) \n ");
@@ -101,8 +101,8 @@ int static parse_args(int argc, char *argv[], int nprocs[3], double L[3], FLUPS_
             for (int j = 0; j<3;j++){
                 if (i + j + 1 < argc) { // Make sure we aren't at the end of argv!
                     L[j] = atof(argv[i+j+1]); 
-                    if(L[j]<=0.0){
-                        fprintf(stderr, "L must be >0\n");
+                    if(L[j]<0.0){
+                        fprintf(stderr, "L must be >=0\n");
                         return 1;
                     }
                 } else { //Missing argument
diff --git a/samples/validation/src/validation_3d.cpp b/samples/validation/src/validation_3d.cpp
index 99046516..4288a411 100644
--- a/samples/validation/src/validation_3d.cpp
+++ b/samples/validation/src/validation_3d.cpp
@@ -248,6 +248,8 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const
             manuRHS[dir] = &d2dx2_fUnbSpietz;
             manuSol[dir] = &fUnbSpietz;
         } else {
+            manuRHS[dir] = &fZero;
+            manuSol[dir] = &fCst;
             // FLUPS_ERROR("I don''t know how to generate an analytical solution for this combination of BC.", LOCATION);
         }
     }
@@ -367,6 +369,14 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const
     double lerr2 = 0.0;
     double lerri = 0.0;
 
+    //determine the volume associated to a mesh
+    double vol = 1.0;
+    for (int id = 0; id < 3; id++) {
+        if (mybc[id][0] != NONE && mybc[id][1] != NONE) {
+            vol *= h[id];
+        }
+    }
+
     {
         const int ax0     = flups_topo_get_axis(topo);
         const int ax1     = (ax0 + 1) % 3;
@@ -379,7 +389,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const
                     const double err = sol[id] - field[id];
 
                     lerri = max(lerri, fabs(err));
-                    lerr2 += (err * err) * h[0] * h[1] * h[2];
+                    lerr2 += (err * err) * vol;
                 }
             }
         }
@@ -407,7 +417,8 @@ void validation_3d(const DomainDescr myCase, const FLUPS_SolverType type, const
             fprintf(myfile, "%d %12.12e %12.12e\n", nglob[0], err2, erri);
             fclose(myfile);
         } else {
-            // FLUPS_CHECK(false, "unable to open file %s", filename, LOCATION);
+            printf("unable to open file %s ! Here is what I would have written:", filename);
+            printf("%d %12.12e %12.12e\n", nglob[0], err2, erri);
         }
     }
 
diff --git a/src/FFTW_plan_dim.cpp b/src/FFTW_plan_dim.cpp
index 440b0c70..eadd5fa6 100644
--- a/src/FFTW_plan_dim.cpp
+++ b/src/FFTW_plan_dim.cpp
@@ -80,6 +80,14 @@ FFTW_plan_dim::FFTW_plan_dim(const int dimID, const double h[3], const double L[
         _volfact  = h[_dimID];
         _kfact    = c_2pi / (2.0 * L[_dimID]);
         _koffset  = 0.0;
+    } else if (mytype == EMPTY) {
+        _type = EMPTY;
+        // chosen to have no influence
+        _normfact   = 1.0;
+        _volfact    = 1.0;
+        _kfact      = 0.0;
+        _koffset    = 0.0;
+        _isSpectral = false;
     } else {
         FLUPS_ERROR("Invalid combination of BCs", LOCATION);
     }
@@ -128,9 +136,11 @@ void FFTW_plan_dim::init(const int size[3], const bool isComplex) {
         _init_mixunbounded(size, isComplex);
     } else if (_type == PERPER) {
         //this is the only transform that could give a R2C on data and being spectral for green
-        _init_periodic(size, isComplex); 
+        _init_periodic(size, isComplex);
     } else if (_type == UNBUNB) {
         _init_unbounded(size, isComplex);
+    } else if (_type == EMPTY) {
+        FLUPS_INFO_1("No plan required for this direction");
     }
     END_FUNC;
 }
@@ -473,6 +483,8 @@ void FFTW_plan_dim::_allocate_plan_real(const Topology *topo, double* data) {
         _plan        = fftw_plan_r2r_1d(_n_in, data, data, _kind, FFTW_FLAG);
 
     } else if (topo->nf() == 2) {
+        // if the topology is complex and I will do a R2R
+        // I do a the FFT only on the real part
         _fftw_stride = memsize[_dimID] * topo->nf();
         _plan        = fftw_plan_many_r2r(1, (int*)(&_n_in), 1,
                                    data, NULL, topo->nf(), memsize[_dimID] * topo->nf(),
@@ -565,6 +577,23 @@ void FFTW_plan_dim::_allocate_plan_complex(const Topology *topo, double* data) {
 
     } else {
         FLUPS_CHECK(topo->nf() == 2, "the nf of the input topology has to be 1 = real topo",LOCATION);
+        FLUPS_INFO("------------------------------------------");
+        if (_type == PERPER) {
+            FLUPS_INFO("## C2C plan created for plan periodic-periodic (=%d)", _type);
+        } else if (_type == UNBUNB) {
+            FLUPS_INFO("## C2C plan created for plan unbounded (=%d)", _type);
+        }
+        if (_sign == FLUPS_FORWARD) {
+            FLUPS_INFO("FORWARD transfrom");
+        } else if (_sign == FLUPS_BACKWARD) {
+            FLUPS_INFO("BACKWARD transfrom");
+        }
+        FLUPS_INFO("memsize = %d x %d x %d", memsize[0], memsize[1], memsize[2]);
+        FLUPS_INFO("dimID     = %d", _dimID);
+        FLUPS_INFO("howmany   = %d", _howmany);
+        FLUPS_INFO("fftw stride   = %d", _fftw_stride);
+        FLUPS_INFO("size n    = %d", _n_in);
+        FLUPS_INFO("------------------------------------------");
         _plan = fftw_plan_dft_1d(_n_in, (fftw_complex*)data, (fftw_complex*)data, _sign, FFTW_FLAG);
     }
     END_FUNC;
@@ -591,6 +620,9 @@ void FFTW_plan_dim::execute_plan(const Topology *topo, double* data) const {
         FLUPS_INFO(">> Doing plan periodic-periodic for dim %d", _dimID);
     } else if (_type == UNBUNB) {
         FLUPS_INFO(">> Doing plan unbounded for dim %d", _dimID);
+    } else if (_type == EMPTY) {
+        FLUPS_INFO(">> Doing no plan for dim %d", _dimID);
+        return;
     }
 
     const int howmany = _howmany;
@@ -604,7 +636,16 @@ void FFTW_plan_dim::execute_plan(const Topology *topo, double* data) const {
 #ifndef NDEBUG
     for (int id = 0; id < howmany; id++) {
         // get the memory
-        double* mydata = (double*)data + id * fftw_stride;
+        double* mydata;
+        if (_type == SYMSYM || _type == MIXUNB) {
+            mydata = (double*)data + id * fftw_stride;
+        } else if (_type == PERPER || _type == UNBUNB) {
+            if (_isr2c) {
+                mydata = (double*)data + id * fftw_stride;
+            } else {
+                mydata = (double*)data + id * fftw_stride * 2;
+            }
+        }
         // check the alignment
         FLUPS_CHECK(fftw_alignment_of(mydata) == 0, "data for FFTW have to be aligned on the FFTW alignement! Alignment is %d with id = %d and fftw_stride = %d", fftw_alignment_of(mydata), id, _fftw_stride, LOCATION);
     }
diff --git a/src/FFTW_plan_dim.hpp b/src/FFTW_plan_dim.hpp
index aa112ffc..892a0264 100644
--- a/src/FFTW_plan_dim.hpp
+++ b/src/FFTW_plan_dim.hpp
@@ -35,6 +35,7 @@
  * 
  */
 class FFTW_plan_dim {
+   public:
     /**
      * @brief PlanType is the type of plan considered and is computed as the sum of both BoundaryType variables
      * 
@@ -47,7 +48,8 @@ class FFTW_plan_dim {
         SYMSYM = 2, /**< type real 2 real (DCT / DST) : EE (0) , EO/OE (1) , OO (2) */
         MIXUNB = 5, /**< type unbounded and a symetry condition: UE/EU (4) , UO/OU (5) */
         PERPER = 6, /**< type periodic - periodic: PERPER (6) */
-        UNBUNB = 8  /**< type fully unbounded UU (8) */
+        UNBUNB = 8, /**< type fully unbounded UU (8) */
+        EMPTY  = 18 /**< type empty, i.e. this direction is not used */
     };
 
    protected:
@@ -55,21 +57,21 @@ class FFTW_plan_dim {
     const int  _dimID;   /**< @brief the dimension of the plan in the field reference */
     const int  _sign;    /**< @brief FFT_FORWARD (-1) or FFT_BACKWARD(+1) */
 
-    bool   _ignoreMode = false; /**< @brief do we have to ignore a mode in the output? k=0 if _shiftgreen=1 or k=end if _shiftgreen = 0*/
-    bool   _isr2c      = false; /**< @brief is this plan the one that changes to complex?*/
-    bool   _imult      = false; /**< @brief boolean to determine if we have to multiply by (i=sqrt(-1)) or not*/
-    bool   _isSpectral = false; /**< @brief indicate if the Green's function has to be done spectrally (leading to a helmolz problem) */
+    bool   _ignoreMode  = false; /**< @brief do we have to ignore a mode in the output? k=0 if _shiftgreen=1 or k=end if _shiftgreen = 0*/
+    bool   _isr2c       = false; /**< @brief is this plan the one that changes to complex?*/
+    bool   _imult       = false; /**< @brief boolean to determine if we have to multiply by (i=sqrt(-1)) or not*/
+    bool   _isSpectral  = false; /**< @brief indicate if the Green's function has to be done spectrally (leading to a helmolz problem) */
     int    _fftw_stride = 0;
-    int    _howmany         = 0;
-    int    _fieldstart = 0;     /**< @brief the starting index for the field copy in the direction of the plan*/
-    int    _n_in       = 0;     /**< @brief the number of element in the transform*/
-    int    _n_out      = 0;     /**< @brief the number of element coming out of the transform*/
-    int    _shiftgreen = 0;     /**< @brief the shift to set in the Green's function when doing the convolution*/
-    double _symstart   = 0.0;   /**< @brief the first index to be copied for the symmetry done on the Green's function, set to 0 if no symmetry is needed*/
-    double _normfact   = 0.0;   /**< @brief factor you need to multiply to get the transform on the right scaling*/
-    double _volfact    = 0.0;   /**< @brief volume factor*/
-    double _kfact      = 0.0;   /**< @brief multiplication factor to have the correct k numbers*/
-    double _koffset    = 0.0;   /**< @brief additive factor to have the correct k numbers*/
+    int    _howmany     = 0;
+    int    _fieldstart  = 0;   /**< @brief the starting index for the field copy in the direction of the plan*/
+    int    _n_in        = 1;   /**< @brief the number of element in the transform*/
+    int    _n_out       = 1;   /**< @brief the number of element coming out of the transform*/
+    int    _shiftgreen  = 0;   /**< @brief the shift to set in the Green's function when doing the convolution*/
+    double _symstart    = 0.0; /**< @brief the first index to be copied for the symmetry done on the Green's function, set to 0 if no symmetry is needed*/
+    double _normfact    = 1.0; /**< @brief factor you need to multiply to get the transform on the right scaling*/
+    double _volfact     = 1.0; /**< @brief volume factor*/
+    double _kfact       = 0.0; /**< @brief multiplication factor to have the correct k numbers*/
+    double _koffset     = 0.0; /**< @brief additive factor to have the correct k numbers*/
 
     PlanType     _type;  /**< @brief type of this plan, see #PlanType*/
     BoundaryType _bc[2]; /**< @brief boundary condition [0]=LEFT/MIN - [1]=RIGHT/MAX*/
diff --git a/src/Profiler.cpp b/src/Profiler.cpp
index ad27a0e7..b5e668ae 100644
--- a/src/Profiler.cpp
+++ b/src/Profiler.cpp
@@ -258,7 +258,9 @@ void TimerAgent::disp(FILE* file,const int level, const double totalTime){
         // printf the important information
         if (rank == 0) {
             printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", myname.c_str(), glob_percent, loc_percent, meanTime, selfTime, meanTimePerCount, minTimePerCount, maxTimePerCount, meanCount,meanBandwidth);
-            fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", _name.c_str(), glob_percent, loc_percent, meanTime, selfTime, meanTimePerCount, minTimePerCount, maxTimePerCount, meanCount,meanBandwidth);
+            if (file != NULL) {
+                fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", _name.c_str(), glob_percent, loc_percent, meanTime, selfTime, meanTimePerCount, minTimePerCount, maxTimePerCount, meanCount,meanBandwidth);
+            }
         }
     }
     // recursive call to the childrens
@@ -437,8 +439,13 @@ void Profiler::disp(const std::string ref) {
 
         string filename = folder + "/" + _name + "_parent.csv";
         file            = fopen(filename.c_str(), "w+");
-        _timeMap["root"]->writeParentality(file,0);
-        fclose(file);
+
+        if (file != NULL) {
+            _timeMap["root"]->writeParentality(file,0);
+            fclose(file);
+        } else {
+            printf("unable to open file %s !", filename.c_str());
+        }
     }
     
 
@@ -447,7 +454,7 @@ void Profiler::disp(const std::string ref) {
     //-------------------------------------------------------------------------
     
     if (rank == 0) {
-        string filename = "prof/" + _name + "_time.csv";
+        string filename = "./prof/" + _name + "_time.csv";
         file            = fopen(filename.c_str(), "w+");
     }
     // display the header
@@ -475,6 +482,11 @@ void Profiler::disp(const std::string ref) {
         printf("Max time - the max time / call spend in that timer among the processors\n");
         printf("Mean cnt - the total number of time the timer has been called (averaged among the processors)\n");
         printf("===================================================================================================================================================\n");
-        fclose(file);
+    
+        if (file != NULL) {
+            fclose(file);
+        } else {
+            printf("unable to open file for profiling !");
+        }
     }
 }
diff --git a/src/Solver.cpp b/src/Solver.cpp
index 8ac6eb86..b9c176b0 100644
--- a/src/Solver.cpp
+++ b/src/Solver.cpp
@@ -97,9 +97,12 @@ Solver::Solver(Topology *topo, const BoundaryType mybc[3][2], const double h[3],
     //-------------------------------------------------------------------------
     /** - For each dim, create the plans given the BC and sort them by type */
     //-------------------------------------------------------------------------
-    for (int id = 0; id < 3; id++)
+    for (int id = 0; id < 3; id++){
         _hgrid[id] = h[id];
+    }
 
+    // we allocate 3 plans
+    // it might be empty ones but we keep them since we need some information inside...
     for (int id = 0; id < 3; id++) {
         _plan_forward[id]  = new FFTW_plan_dim(id, h, L, mybc[id], FLUPS_FORWARD, false);
         _plan_backward[id] = new FFTW_plan_dim(id, h, L, mybc[id], FLUPS_BACKWARD, false);
@@ -111,6 +114,16 @@ Solver::Solver(Topology *topo, const BoundaryType mybc[3][2], const double h[3],
     _sort_plans(_plan_green);
     FLUPS_INFO("I will proceed with forward transforms in the following direction order: %d, %d, %d", _plan_forward[0]->dimID(), _plan_forward[1]->dimID(), _plan_forward[2]->dimID());
 
+    //-------------------------------------------------------------------------
+    /** - compute the real problem size using forward plans, i.e. are we 2D or 3D? */
+    //-------------------------------------------------------------------------
+    _ndim = 3;
+    for(int id=0; id<3; id++){
+        if(_plan_forward[id]->type() == FFTW_plan_dim::EMPTY){
+            _ndim --;
+        }
+    }
+
     //-------------------------------------------------------------------------
     /** - Initialise the topos, the plans and the SwitchTopos */
     //-------------------------------------------------------------------------
@@ -170,8 +183,9 @@ double* Solver::setup(const bool changeTopoComm) {
     /** - Precompute the communication graph */
     //-------------------------------------------------------------------------
     // get the communication size
-    int worldsize;
+    int worldsize, rank;
     MPI_Comm_size(_topo_phys->get_comm(), &worldsize);
+    MPI_Comm_rank(_topo_phys->get_comm(), &rank);
     
     // initialize the sources, sources weights, destination and destination weights
     int* sources  = (int*)flups_malloc(worldsize * sizeof(int));
@@ -200,11 +214,11 @@ double* Solver::setup(const bool changeTopoComm) {
     // ranks not having a self block) !
     // if we can change the topology, do it for every swithTopo
     if (changeTopoComm) {
-        for (int i = 0; i < 3; i++) {
+        for (int i = 0; i < _ndim; i++) {
             _switchtopo[i]->add_toGraph(sourcesW, destsW);
         }
     } else {
-        for (int i = 1; i < 3; i++) {
+        for (int i = 1; i < _ndim; i++) {
             _switchtopo[i]->add_toGraph(sourcesW, destsW);
         }
     }
@@ -214,37 +228,30 @@ double* Solver::setup(const bool changeTopoComm) {
     //-------------------------------------------------------------------------
     MPI_Comm graph_comm;
 #ifndef HAVE_METIS
-    MPI_Dist_graph_create_adjacent(_topo_phys->get_comm(), worldsize, sources, sourcesW, \
-                                                    worldsize, dests, destsW, \
-                                                    MPI_INFO_NULL, 1, &graph_comm);
-    
-    flups_free(sources);
-    flups_free(sourcesW);
-    flups_free(dests);
-    flups_free(destsW);
+    MPI_Dist_graph_create_adjacent(_topo_phys->get_comm(), worldsize, sources, sourcesW,
+                                   worldsize, dests, destsW,
+                                   MPI_INFO_NULL, 1, &graph_comm);
 
-    #if defined(VERBOSE) && VERBOSE==2
+#if defined(VERBOSE) && VERBOSE == 2
     int inD, outD, wei;
     MPI_Dist_graph_neighbors_count(graph_comm, &inD, &outD, &wei);
-    printf("[FGRAPH] inD:%d outD:%d wei:%d\n",inD,outD,wei);
+    printf("[FGRAPH] inD:%d outD:%d wei:%d\n", inD, outD, wei);
 
-    int* Sour = (int*) malloc(sizeof(int)*inD);
-    int* SourW = (int*) malloc(sizeof(int)*inD);
-    int* Dest = (int*) malloc(sizeof(int)*outD);
-    int* DestW = (int*) malloc(sizeof(int)*outD);
+    int *Sour  = (int *)malloc(sizeof(int) * inD);
+    int *SourW = (int *)malloc(sizeof(int) * inD);
+    int *Dest  = (int *)malloc(sizeof(int) * outD);
+    int *DestW = (int *)malloc(sizeof(int) * outD);
 
-    MPI_Dist_graph_neighbors(graph_comm, inD,  Sour,      SourW,
-                                        outD, Dest,      DestW);
+    MPI_Dist_graph_neighbors(graph_comm, inD, Sour, SourW,
+                             outD, Dest, DestW);
 
     printf("[FGRAPH] INedges: ");
-    for (int i=0; i<inD; i++)
-    {
-        printf("%d (%d), ",Sour[i],SourW[i]);
+    for (int i = 0; i < inD; i++) {
+        printf("%d (%d), ", Sour[i], SourW[i]);
     }
     printf("\n[FGRAPH] OUTedges: ");
-    for (int i=0; i<outD; i++)
-    {
-        printf("%d (%d), ",Dest[i],DestW[i]);
+    for (int i = 0; i < outD; i++) {
+        printf("%d (%d), ", Dest[i], DestW[i]);
     }
     printf("\n");
 
@@ -252,21 +259,18 @@ double* Solver::setup(const bool changeTopoComm) {
     free(SourW);
     free(Dest);
     free(DestW);
-    #endif
+#endif
 
     //-------------------------------------------------------------------------
     /** - if asked by the user, we overwrite the graph comm by a forced version (for test purpose) */
     //-------------------------------------------------------------------------
 #ifdef DEV_SIMULATE_GRAPHCOMM
-    int rank;
-    MPI_Comm_rank(_topo_phys->get_comm(), &rank);
-
     //switch indices by a random number:
-#ifdef DEV_REORDER_SHIFT
-    int shift = DEV_REORDER_SHIFT;
-#else
-    int shift = worldsize/2;
-#endif
+    #ifdef DEV_REORDER_SHIFT
+        int shift = DEV_REORDER_SHIFT;
+    #else
+        int shift = worldsize/2;
+    #endif
 
     int* outRanks = (int*) flups_malloc(sizeof(int)*worldsize);
     if(rank == 0){
@@ -286,18 +290,26 @@ double* Solver::setup(const bool changeTopoComm) {
 
     flups_free(outRanks);
 #endif
+//end simulate_graph
+
+    #ifdef PROF
+    //writing reordering to console
+    int newrank;
+    MPI_Comm_rank(graph_comm, &newrank);
+    printf("[MPI ORDER] %i : %i \n", rank, newrank);
+    #endif
 
 #else
     //Use METIS to find a smart partition of the graph
     int *order = (int *)flups_malloc(sizeof(int) * worldsize);
-    reorder_metis(_topo_phys->get_comm(), sources, sourcesW, dests, destsW, order);
+    _reorder_metis(_topo_phys->get_comm(), sources, sourcesW, dests, destsW, order);
     // create a new comm based on the order given by metis
     MPI_Group group_in, group_out;
     MPI_Comm_group(_topo_phys->get_comm(), &group_in);                //get the group of the current comm
     MPI_Group_incl(group_in, worldsize, order, &group_out);           //manually reorder the ranks
     MPI_Comm_create(_topo_phys->get_comm(), group_out, &graph_comm);  // create the new comm
     flups_free(order);
-#endif
+#endif // METIS
 
     flups_free(sources);
     flups_free(sourcesW);
@@ -312,7 +324,7 @@ double* Solver::setup(const bool changeTopoComm) {
     // The first switch topo will serve to redistribute
     // data following the optimized topology on the cluster, with reordered 
     // ranks
-    for(int i=0;i<3;i++){
+    for(int i=0;i<_ndim;i++){
         _topo_hat[i]->change_comm(graph_comm);
         _topo_green[i]->change_comm(graph_comm);
     }
@@ -320,9 +332,9 @@ double* Solver::setup(const bool changeTopoComm) {
         _topo_phys->change_comm(graph_comm);
     }
 
-#ifdef PERF_VERBOSE
+    #ifdef PERF_VERBOSE
     _topo_hat[0]->disp_rank();
-#endif
+    #endif
 
 #endif //REORDER_RANKS
 
@@ -352,7 +364,7 @@ double* Solver::setup(const bool changeTopoComm) {
     // finalize green by replacing some data in full spectral if needed by the kernel,
     // and by doing a last switch to the field topo
     if (_prof != NULL) _prof->start("green_final");
-    _finalizeGreenFunction(_topo_hat[2], _green, _topo_green[2], _plan_green);
+    _finalizeGreenFunction(_topo_hat[_ndim-1], _green, _topo_green[_ndim-1], _plan_green);
     if (_prof != NULL) _prof->stop("green_final");
 
     //-------------------------------------------------------------------------
@@ -384,7 +396,7 @@ double* Solver::setup(const bool changeTopoComm) {
     //-------------------------------------------------------------------------
     /** - Setup the SwitchTopo, this will take the latest comm into account */
     //-------------------------------------------------------------------------
-    _allocate_switchTopo(3, _switchtopo, &_sendBuf, &_recvBuf);
+    _allocate_switchTopo(_ndim, _switchtopo, &_sendBuf, &_recvBuf);
 
     if (_prof != NULL) _prof->stop("setup");
 
@@ -413,7 +425,7 @@ Solver::~Solver() {
 
     // cleanup the communicator if any
 #ifdef REORDER_RANKS
-    MPI_Comm mycomm = _topo_hat[2]->get_comm();
+    MPI_Comm mycomm = _topo_hat[_ndim-1]->get_comm();
     MPI_Comm_free(&mycomm);
 #endif
     _delete_topologies(_topo_hat);
@@ -440,7 +452,7 @@ const Topology* Solver::get_innerTopo_physical() {
  * 
  */
 const Topology* Solver::get_innerTopo_spectral() {
-    return _topo_hat[2];
+    return _topo_hat[_ndim-1];
 }
 
 /**
@@ -578,7 +590,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi
     //-------------------------------------------------------------------------
     bool isComplex = false;  //this refers to the "current state" of the data during dry run
     int  nproc[3];
-    for (int ip = 0; ip < 3; ip++) {
+    for (int ip = 0; ip < _ndim; ip++) {
         // initialize the plan (for Green only, using info from _plan_forward)
         planmap[ip]->init(size_tmp, isComplex);
         // update the size_tmp variable and get the complex information
@@ -597,15 +609,12 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi
         if (!isGreen && topomap != NULL && switchtopo != NULL) {
             // determines the proc repartition using the previous one if available
             if (ip == 0) {
-                //This was to keep an aspect ratio of the pencils in ax0 close to 1:
-                // pencil_nproc(dimID, nproc, comm_size, size_tmp);
-                //---------
-                //Finally, we opt for the following, which will maximize the total number of subcoms that we will be able to do
-                // over the 3 switchtopos:
+                // for the first switchTopo, we keep the number of proc constant in the 3rd direction
                 const int nproc_hint[3] = {topo->nproc(0), topo->nproc(1), topo->nproc(2)};
                 pencil_nproc_hint(dimID, nproc, comm_size, dimOrder[1], nproc_hint);
             } else {
                 const int nproc_hint[3] = {current_topo->nproc(0), current_topo->nproc(1), current_topo->nproc(2)};
+                // for the other switchtopos, we keep constant the id that is not mine, neither the old topo id
                 pencil_nproc_hint(dimID, nproc, comm_size, planmap[ip - 1]->dimID(), nproc_hint);
             }
             // create the new topology corresponding to planmap[ip] in the output layout (size and isComplex)
@@ -660,7 +669,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi
     current_topo = NULL;
     // isComplex = false; //Change this for Helmolz: we will always need to fill Green in complex
     if (isGreen && topomap != NULL && switchtopo != NULL) {
-        for (int ip = 2; ip >= 0; ip--) {
+        for (int ip = _ndim-1; ip >= 0; ip--) {
             // get the fastest rotating index
             int dimID = planmap[ip]->dimID();  // store the correspondance of the transposition
 
@@ -670,10 +679,10 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi
             }
 
             // get the proc repartition
-            if(ip>1){
+            if(ip>_ndim-2){
                 //it has to be the same as the field in full spectral
                 for(int i = 0;i<3;i++){
-                    nproc[i]=_topo_hat[2]->nproc(i);
+                    nproc[i]=_topo_hat[_ndim-1]->nproc(i);
                 }
             }else{
                 const int nproc_hint[3] = {current_topo->nproc(0), current_topo->nproc(1), current_topo->nproc(2)};
@@ -683,7 +692,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi
             // create the new topology in the output layout (size and isComplex)
             topomap[ip] = new Topology(dimID, size_tmp, nproc, isComplex, dimOrder, _fftwalignment, _topo_phys->get_comm());
             //switchmap only to be done for topo0->topo1 and topo1->topo2
-            if (ip < 2) {
+            if (ip < _ndim-1) {
                 // get the fieldstart = the point where the old topo has to begin in the new
                 int fieldstart[3] = {0};
                 // it shouldn't be different from 0 for the moment
@@ -736,7 +745,7 @@ void Solver::_init_plansAndTopos(const Topology *topo, Topology *topomap[3], Swi
     //-------------------------------------------------------------------------
     /** - reset the topologies to real if needed, in order to prepare them for their execution  */
     //-------------------------------------------------------------------------
-    for (int ip = 0; ip < 3; ip++) {
+    for (int ip = 0; ip < _ndim; ip++) {
         if (!isGreen && planmap[ip]->isr2c() && topomap != NULL) {
             topomap[ip]->switch2real();
         }
@@ -801,7 +810,7 @@ void Solver::_deallocate_switchTopo(SwitchTopo **switchtopo, opt_double_ptr *sen
  */
 void Solver::_allocate_plans(const Topology *const topo[3], FFTW_plan_dim *planmap[3], double *data) {
     BEGIN_FUNC;
-    for (int ip = 0; ip < 3; ip++) {
+    for (int ip = 0; ip < _ndim; ip++) {
         planmap[ip]->allocate_plan(topo[ip], data);
     }
     END_FUNC;
@@ -826,7 +835,7 @@ void Solver::_allocate_data(const Topology *const topo[3], const Topology *topo_
     //-------------------------------------------------------------------------
     // the biggest size will be along the pencils
     size_t size_tot = 1;
-    for (int id = 0; id < 3; id++) {
+    for (int id = 0; id < _ndim; id++) {
         size_tot = std::max(topo[id]->memsize(), size_tot);
     }
     if (topo_phys != NULL) {
@@ -867,17 +876,18 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim
     //-------------------------------------------------------------------------
     bool isSpectral[3] = {false};
 
-    double hfact[3];    // multiply the index by this factor to obtain the position (1/2/3 corresponds to x/y/z )
-    double kfact[3];    // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z )
-    double koffset[3];  // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z )
-    double symstart[3];
-    double epsilon = _alphaGreen * _hgrid[0]; //the smoothing length scale of the HEJ kernels
+    double hfact[3]    = {0.0, 0.0, 0.0};  // multiply the index by this factor to obtain the position (1/2/3 corresponds to x/y/z )
+    double kfact[3]    = {0.0, 0.0, 0.0};  // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z )
+    double koffset[3]  = {0.0, 0.0, 0.0};  // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z )
+    double symstart[3] = {0.0, 0.0, 0.0};
+    double epsilon     = _alphaGreen * _hgrid[0];  //the smoothing length scale of the HEJ kernels
 
-    if ((_typeGreen == HEJ_2 || _typeGreen == HEJ_4 || _typeGreen == HEJ_6) && (_hgrid[0] != _hgrid[1] || _hgrid[1] != _hgrid[2])) {
-        FLUPS_ERROR("You are trying to use a regularized kernel while not having dx=dy=dz.",LOCATION);
+    if ((_typeGreen == HEJ_2 || _typeGreen == HEJ_4 || _typeGreen == HEJ_6 || _typeGreen == LGF_2) && ((_ndim == 3 && (_hgrid[0] != _hgrid[1] || _hgrid[1] != _hgrid[2])) || (_ndim == 2 && _hgrid[0] != _hgrid[1]))) {
+        FLUPS_ERROR("You are trying to use a regularized kernel or a LGF while not having dx=dy=dz.", LOCATION);
     }
 
-    for (int ip = 0; ip < 3; ip++) {
+    // get the infor + determine which green function to use:
+    for (int ip = 0; ip < _ndim; ip++) {
         const int dimID = planmap[ip]->dimID();
         // get usefull datas
         isSpectral[dimID] = planmap[ip]->isSpectral();
@@ -891,11 +901,15 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim
             kfact[dimID]   = planmap[ip]->kfact();
             koffset[dimID] = planmap[ip]->koffset();
         }
+        if (planmap[ip]->type() == FFTW_plan_dim::EMPTY) {
+            // kill the hfact to have no influence in the green's functions
+            hfact[dimID] = 0.0;
+        }
     }
 
-    // count the number of spectral dimensions
+    // count the number of spectral dimensions and the green dimension
     int nbr_spectral = 0;
-    for (int id = 0; id < 3; id++) {
+    for (int id = 0; id < _ndim; id++) {
         if (isSpectral[id]) {
             nbr_spectral++;
         }
@@ -904,22 +918,21 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim
     //-------------------------------------------------------------------------
     /** - get the expression of Green in the full domain*/
     //-------------------------------------------------------------------------
-    if (GREEN_DIM == 3) {
-        if (nbr_spectral == 0) {
-            FLUPS_INFO(">> using Green function type %d on 3 dir unbounded",_typeGreen);
-            cmpt_Green_3D_3dirunbounded_0dirspectral(topo[0], hfact, symstart, green, _typeGreen, epsilon);
-        } else if (nbr_spectral == 1) {
-            FLUPS_INFO(">> using Green function of type %d on 2 dir unbounded - 1 dir spectral",_typeGreen);
-            cmpt_Green_3D_2dirunbounded_1dirspectral(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon);
-        } else if (nbr_spectral == 2) {
-            FLUPS_INFO(">> using Green function of type %d on 1 dir unbounded - 2 dir spectral",_typeGreen);
-            cmpt_Green_3D_1dirunbounded_2dirspectral(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon);
-        } else if (nbr_spectral == 3) {
-            FLUPS_INFO(">> using Green function of type %d on 3 dir spectral",_typeGreen);        
-            cmpt_Green_3D_0dirunbounded_3dirspectral(topo[0], kfact, koffset, symstart, green, _typeGreen, epsilon);
-        }
+    int n_unbounded = _ndim - nbr_spectral;
+    if ((n_unbounded) == 3) {
+        FLUPS_INFO(">> using Green function type %d on 3 dir unbounded", _typeGreen);
+        cmpt_Green_3dirunbounded(topo[0], hfact, symstart, green, _typeGreen, epsilon);
+    } else if ((n_unbounded) == 2) {
+        FLUPS_INFO(">> using Green function of type %d on 2 dir unbounded", _typeGreen);
+        cmpt_Green_2dirunbounded(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon);
+    } else if ((n_unbounded) == 1) {
+        FLUPS_INFO(">> using Green function of type %d on 1 dir unbounded", _typeGreen);
+        cmpt_Green_1dirunbounded(topo[0], hfact, kfact, koffset, symstart, green, _typeGreen, epsilon);
+    } else if ((n_unbounded) == 0) {
+        FLUPS_INFO(">> using Green function of type %d on 3 dir spectral", _typeGreen);
+        cmpt_Green_0dirunbounded(topo[0], _hgrid[0], kfact, koffset, symstart, green, _typeGreen, epsilon);
     } else {
-        FLUPS_ERROR("Sorry, the Green's function for 2D problems are not provided in this version.", LOCATION);
+        FLUPS_ERROR("Sorry, the number of unbounded directions does not match: %d = %d - %d", n_unbounded, _ndim, nbr_spectral, LOCATION);
     }
 
     // dump the green func
@@ -932,7 +945,7 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim
     //-------------------------------------------------------------------------
     /** - compute a symmetry and do the forward transform*/
     //-------------------------------------------------------------------------
-    for (int ip = 0; ip < 3; ip++) {
+    for (int ip = 0; ip < _ndim; ip++) {
         const int dimID = planmap[ip]->dimID();
 
         // go to the topology for the plan, if we are not already on it
@@ -955,13 +968,13 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim
     //-------------------------------------------------------------------------
     // - Explixitely destroying mode 0 ? no need to do that: we impose Green[0] is 0
     //   in full spectral.
-    _scaleGreenFunction(topo[2], green, false);
+    _scaleGreenFunction(topo[_ndim-1], green, false);
 
     //-------------------------------------------------------------------------
     /** - Complete the Green function in 2dirunbounded regularized case: we rewrite on the whole domain
      *      except the plane where k=0 in the spectral direction, as this was correctly computed. */
     // No need to scale this as that part of the Green function has a volfact = 1
-    if (GREEN_DIM == 3 && nbr_spectral == 1 && (_typeGreen==HEJ_2||_typeGreen==HEJ_4||_typeGreen==HEJ_6)) {
+    if (_ndim == 3 && nbr_spectral == 1 && (_typeGreen==HEJ_2||_typeGreen==HEJ_4||_typeGreen==HEJ_6)) {
         int istart_cstm[3] = {0, 0, 0};  //global
 
         for (int ip = 0; ip < 3; ip++) {
@@ -971,22 +984,9 @@ void Solver::_cmptGreenFunction(Topology *topo[3], double *green, FFTW_plan_dim
             kfact[dimID]       = planmap[ip]->kfact();
             koffset[dimID]    += planmap[ip]->shiftgreen();  //accounts for shifted modes which affect the value of k
         }
-        cmpt_Green_3D_0dirunbounded_3dirspectral(topo[2], kfact, koffset, symstart, green, _typeGreen, epsilon, istart_cstm, NULL);
+        cmpt_Green_0dirunbounded(topo[2], _hgrid[0], kfact, koffset, symstart, green, _typeGreen, epsilon, istart_cstm, NULL);
     }
 
-    //-------------------------------------------------------------------------
-    // This is what you would fo if you had to fill only the first plan:
-    // {
-    //     int iend_cstm[3] = {topo->nloc(0), topo->nloc(1), topo->nloc(2)}; //global
-    //     for (int ip = 0; ip < 3; ip++) {
-    //         const int dimID = planmap[ip]->dimID();
-    //         iend_cstm[ip]      = isSpectral[ip] ? 1 - planmap[ip]->shiftgreen() : topo->nloc(ip);  //selecting only mode 0 = cte (in per and even-even)
-    //         kfact[dimID]       = planmap[ip]->kfact();
-    //         koffset[dimID]    += planmap[ip]->shiftgreen();  //accounts for shifted modes which affect the value of k
-    //     }
-    //     cmpt_Green_3D_0dirunbounded_3dirspectral(topo, kfact, koffset, symstart, green, _typeGreen, epsilon, NULL, iend_cstm);
-    // }
-
 #ifdef DUMP_DBG
     hdf5_dump(topo[2], "green_h", green);
 #endif
@@ -1056,15 +1056,19 @@ void Solver::_finalizeGreenFunction(Topology *topo_field, double *green, const T
     /** - If needed, we create a new switchTopo from the current Green topo to the field one */
 
     //simulate that we have done the transforms
-    if(planmap[0]->isr2c() || planmap[1]->isr2c() || planmap[2]->isr2c()){
+    bool isr2c = false;
+    for(int id=0; id<_ndim; id++){
+        isr2c = isr2c || planmap[id]->isr2c();
+    }
+    if(isr2c){
         topo_field->switch2complex();
     }
 
-    if (planmap[2]->ignoreMode()) {
-        const int dimID = planmap[2]->dimID();
+    if (planmap[_ndim-1]->ignoreMode()) {
+        const int dimID = planmap[_ndim-1]->dimID();
         // get the shift
         int fieldstart[3] = {0};
-        fieldstart[dimID] = -planmap[2]->shiftgreen();
+        fieldstart[dimID] = -planmap[_ndim-1]->shiftgreen();
         // we do the link between topo of Green and the field topo
 #if defined(COMM_NONBLOCK)
         SwitchTopo *switchtopo = new SwitchTopo_nb(topo, topo_field, fieldstart, NULL);
@@ -1092,7 +1096,7 @@ void Solver::_finalizeGreenFunction(Topology *topo_field, double *green, const T
         FLUPS_CHECK(topo->nglob(2) == topo_field->nglob(2), "Topo of Green has to be the same as Topo of field", LOCATION);
     }
     //coming back (only if the last plan was r2c. No need it if was c2c or r2r...)
-    if(planmap[2]->isr2c()){
+    if(planmap[_ndim-1]->isr2c()){
         topo_field->switch2real();
     }
     END_FUNC;
@@ -1144,18 +1148,16 @@ void Solver::solve(double *field, double *rhs, const SolverType type) {
     do_FFT(mydata, FLUPS_FORWARD);
 
 #ifdef DUMP_DBG
-    hdf5_dump(_topo_hat[2], "rhs_h", mydata);
+    hdf5_dump(_topo_hat[_ndim-1], "rhs_h", mydata);
 #endif
     //-------------------------------------------------------------------------
     /** - Perform the magic */
     //-------------------------------------------------------------------------
     do_mult(mydata, type);
 
-    if (_prof != NULL) _prof->stop("domagic");
-
 #ifdef DUMP_DBG
     // io if needed
-    hdf5_dump(_topo_hat[2], "sol_h", mydata);
+    hdf5_dump(_topo_hat[_ndim-1], "sol_h", mydata);
 #endif
     //-------------------------------------------------------------------------
     /** - go back to reals */
@@ -1169,7 +1171,7 @@ void Solver::solve(double *field, double *rhs, const SolverType type) {
 
 #ifdef DUMP_DBG
     // io if needed
-    hdf5_dump(_topo_phys, "sol", myfield);
+    hdf5_dump(_topo_phys, "sol", field);
 #endif
     // stop the whole timer
     if (_prof != NULL) _prof->stop("solve");
@@ -1281,7 +1283,7 @@ void Solver::do_FFT(double *data, const int sign){
     opt_double_ptr  mydata  = data;
 
     if (sign == FLUPS_FORWARD) {
-        for (int ip = 0; ip < 3; ip++) {
+        for (int ip = 0; ip < _ndim; ip++) {
             // go to the correct topo
             _switchtopo[ip]->execute(mydata, FLUPS_FORWARD);
             // run the FFT
@@ -1294,7 +1296,7 @@ void Solver::do_FFT(double *data, const int sign){
             }
         }
     } else {  //FLUPS_BACKWARD
-        for (int ip = 2; ip >= 0; ip--) {
+        for (int ip = _ndim-1; ip >= 0; ip--) {
             if (_prof != NULL) _prof->start("fftw");
             _plan_backward[ip]->execute_plan(_topo_hat[ip], mydata);
             if (_prof != NULL) _prof->stop("fftw");
@@ -1321,7 +1323,7 @@ void Solver::do_mult(double *data, const SolverType type){
     
     if (_prof != NULL) _prof->start("domagic");
     if (type == SRHS) {
-        if (!_topo_hat[2]->isComplex()) {
+        if (!_topo_hat[_ndim-1]->isComplex()) {
             //-> there is only the case of 3dirSYM in which we could stay real for the whole process
             if (_nbr_imult == 0)
                 dothemagic_rhs_real(data);
@@ -1339,19 +1341,22 @@ void Solver::do_mult(double *data, const SolverType type){
     } else {
         FLUPS_CHECK(false, "type of solver %d not implemented", type, LOCATION);
 
-        // - Obtain what's needed to compute k 
-        double kfact[3];    // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z )
-        double koffset[3];  // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z )
+        // - Obtain what's needed to compute k
+        double kfact[3]   = {0.0, 0.0, 0.0};  // multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z )
+        double koffset[3] = {0.0, 0.0, 0.0};  // add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z )
 
-        for (int ip = 0; ip < 3; ip++) {
+        for (int ip = 0; ip < _ndim; ip++) {
             const int dimID = _plan_forward[ip]->dimID();
             kfact[dimID]    = _plan_forward[ip]->kfact();
             koffset[dimID]  = _plan_forward[ip]->koffset() + _plan_forward[ip]->shiftgreen();
         }
         // todo: if topo is not complex, need to handle the fact that we will multiply by i*
 
+        // WARNING: need to adapt the LDA of the topology WARNING
+
         //dothemagic...
     }
+    if (_prof != NULL) _prof->stop("domagic");
     END_FUNC;
 }
 
@@ -1362,10 +1367,11 @@ void Solver::do_mult(double *data, const SolverType type){
  */
 void Solver::dothemagic_rhs_real(double *data) {
     BEGIN_FUNC;
-    FLUPS_CHECK(_topo_hat[2]->nf() == 1, "The topo_hat[2] has to be real", LOCATION);
+    int cdim = _ndim-1; // get current dim
+    FLUPS_CHECK(_topo_hat[cdim]->nf() == 1, "The topo_hat[2] has to be real", LOCATION);
 
     // get the axis
-    const int ax0 = _topo_hat[2]->axis();
+    const int ax0 = _topo_hat[cdim]->axis();
     const int ax1 = (ax0 + 1) % 3;
     const int ax2 = (ax0 + 2) % 3;
     // get the factors
@@ -1375,12 +1381,12 @@ void Solver::dothemagic_rhs_real(double *data) {
     FLUPS_ASSUME_ALIGNED(mydata,FLUPS_ALIGNMENT);
     FLUPS_ASSUME_ALIGNED(mygreen,FLUPS_ALIGNMENT);
     {
-        const size_t onmax   = _topo_hat[2]->nloc(ax1) * _topo_hat[2]->nloc(ax2);
-        const size_t inmax   = _topo_hat[2]->nloc(ax0);
-        const int    nmem[3] = {_topo_hat[2]->nmem(0), _topo_hat[2]->nmem(1), _topo_hat[2]->nmem(2)};
+        const size_t onmax   = _topo_hat[cdim]->nloc(ax1) * _topo_hat[cdim]->nloc(ax2);
+        const size_t inmax   = _topo_hat[cdim]->nloc(ax0);
+        const int    nmem[3] = {_topo_hat[cdim]->nmem(0), _topo_hat[cdim]->nmem(1), _topo_hat[cdim]->nmem(2)};
 
-        FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
-        FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
+        FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
+        FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
 
         // do the loop
 #pragma omp parallel for default(none) proc_bind(close) schedule(static) firstprivate(onmax, inmax, nmem, mydata, mygreen, normfact, ax0)
@@ -1403,9 +1409,10 @@ void Solver::dothemagic_rhs_real(double *data) {
  */
 void Solver::dothemagic_rhs_complex_nmult0(double *data) {
     BEGIN_FUNC;
-    FLUPS_CHECK(_topo_hat[2]->nf() == 2, "The topo_hat[2] (field) has to be complex", LOCATION);
+    int cdim = _ndim-1; // get current dim
+    FLUPS_CHECK(_topo_hat[cdim]->nf() == 2, "The topo_hat[2] (field) has to be complex", LOCATION);
     // get the axis
-    const int ax0 = _topo_hat[2]->axis();
+    const int ax0 = _topo_hat[cdim]->axis();
     const int ax1 = (ax0 + 1) % 3;
     const int ax2 = (ax0 + 2) % 3;
     // get the factors
@@ -1415,12 +1422,12 @@ void Solver::dothemagic_rhs_complex_nmult0(double *data) {
     FLUPS_ASSUME_ALIGNED(mydata,FLUPS_ALIGNMENT);
     FLUPS_ASSUME_ALIGNED(mygreen,FLUPS_ALIGNMENT);
     {
-        const size_t onmax   = _topo_hat[2]->nloc(ax1) * _topo_hat[2]->nloc(ax2);
-        const size_t inmax   = _topo_hat[2]->nloc(ax0);
-        const int    nmem[3] = {_topo_hat[2]->nmem(0), _topo_hat[2]->nmem(1), _topo_hat[2]->nmem(2)};
+        const size_t onmax   = _topo_hat[cdim]->nloc(ax1) * _topo_hat[cdim]->nloc(ax2);
+        const size_t inmax   = _topo_hat[cdim]->nloc(ax0);
+        const int    nmem[3] = {_topo_hat[cdim]->nmem(0), _topo_hat[cdim]->nmem(1), _topo_hat[cdim]->nmem(2)};
 
-        FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
-        FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[2]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
+        FLUPS_CHECK(FLUPS_ISALIGNED(mygreen) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
+        FLUPS_CHECK(FLUPS_ISALIGNED(mydata) && (nmem[ax0] * _topo_hat[cdim]->nf() * sizeof(double)) % FLUPS_ALIGNMENT == 0, "please use FLUPS_ALIGNMENT to align the memory", LOCATION);
 
         // do the loop
 #pragma omp parallel for default(none) proc_bind(close) schedule(static) firstprivate(onmax, inmax, nmem, mydata, mygreen, normfact, ax0)
@@ -1471,3 +1478,288 @@ void Solver::dothemagic_rhs_complex_nmult3(double *data) {
     FLUPS_CHECK(false, "not implemented yet", LOCATION);
     END_FUNC;
 }
+
+
+/**
+ * @brief reorder the MPI-ranks using metis
+ * 
+ * @warning this functions assume an evenly distributed amount of procs on the nodes
+ * 
+ * @param comm 
+ * @param sources 
+ * @param sourcesW 
+ * @param dests 
+ * @param destsW 
+ * @param n_nodes 
+ * @param order 
+ */
+void Solver::_reorder_metis(MPI_Comm comm, int *sources, int *sourcesW, int *dests, int *destsW, int *order) {
+    int comm_size;
+    int comm_rank;
+    MPI_Comm_rank(comm, &comm_rank);
+    MPI_Comm_size(comm, &comm_size);
+
+#ifdef HAVE_METIS
+
+    //-------------------------------------------------------------------------
+    /** - get the total number of nodes */
+    //-------------------------------------------------------------------------
+    // create a group where everybody can create a shared memory region
+    MPI_Comm nodecomm;
+    MPI_Info mpinfo;
+    MPI_Info_create(&mpinfo);
+    MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, comm_rank, mpinfo, &nodecomm);
+    // we store the comm size
+    int local_nodesize;
+    MPI_Comm_size(nodecomm, &local_nodesize);
+
+    // gather on proc 1 the number of proc per node
+    int *vec_nodesize = (int *)flups_malloc(sizeof(int) * comm_size);
+    MPI_Allgather(&local_nodesize, 1, MPI_INT, vec_nodesize, 1, MPI_INT, comm);
+    
+    // count the number of partitions we'll need:
+    int n_nodes = 0;
+    int id = 0;
+    while( id < comm_size){
+        id += vec_nodesize[id];
+        n_nodes++;
+    }
+    
+#ifdef DEV_SIMULATE_GRAPHCOMM
+    //CHEATING: imposing that there will be 2 groups (there needs to be at least 4 procs)
+    n_nodes = 2;
+    for (int ip = 0; ip<comm_size; ip++ ){
+        vec_nodesize[ip]=comm_size/3;
+    }
+    vec_nodesize[0]=comm_size-vec_nodesize[1];
+#endif
+
+    real_t* tpwgts = (real_t*) flups_malloc(sizeof(real_t)*n_nodes);
+    // deduce the size of each partition:
+    id = 0;
+    for (int ip = 0; ip<n_nodes; ip++ ){
+        tpwgts[ip] = ((real_t)vec_nodesize[id])/((real_t) comm_size);
+        id += vec_nodesize[id];
+    }
+    //______________________________________________
+
+    // free stuffs
+    flups_free(vec_nodesize);
+    MPI_Comm_free(&nodecomm);
+
+    //-------------------------------------------------------------------------
+    /** - get the neighbour list and the associated weights */
+    //-------------------------------------------------------------------------
+    // we count the number of neighbours
+    int n_neighbours = 0;
+    // if we have either one way in or one way out, we have a neighbour
+    for (int i = 0; i < comm_size; ++i) {
+        if ((sourcesW[i] + destsW[i]) > 0 && i != comm_rank) n_neighbours++;
+    }
+    // allocate the number of neighbours and their weights
+    int *neighbours = (int *)flups_malloc(sizeof(int) * n_neighbours);
+    int *weights    = (int *)flups_malloc(sizeof(int) * n_neighbours);
+    n_neighbours    = 0;
+    for (int i = 0; i < comm_size; ++i) {
+        if (sourcesW[i] + destsW[i] > 0 && i != comm_rank) {
+            neighbours[n_neighbours] = i;
+            weights[n_neighbours]    = sourcesW[i] + destsW[i];
+            n_neighbours++;
+        }
+    }
+
+    //-------------------------------------------------------------------------
+    /** - build the graph on proc 0 and ask for partioning
+     * The graph structure follows metis rules:
+     * the edges (= id of the destination of the edges) starting from proc k are located
+     * from adj[xadj[k]] to adj[xadj[k+1]-1]
+     * Same structure is used for the weights with the ajdw
+     * */
+    //-------------------------------------------------------------------------
+    if (comm_rank == 0) {
+        int *xadj = (int *)flups_malloc((comm_size + 1) * sizeof(int));
+        int *nadj = (int *)flups_malloc((comm_size) * sizeof(int));
+
+        // get the number of neighbours from everybody
+        MPI_Gather(&n_neighbours, 1, MPI_INT, nadj, 1, MPI_INT, 0, comm);
+        // get the starting indexes of the neighbour description for everybody
+        xadj[0] = 0;
+        for (int i = 0; i < comm_size; ++i) {
+            xadj[i + 1] = xadj[i] + nadj[i];
+        }
+
+        // allocate the adjency list + weights and fill it with the neighbour list from everybody
+        int *adj  = (int *)flups_malloc(xadj[comm_size] * sizeof(int));
+        int *adjw = (int *)flups_malloc(xadj[comm_size] * sizeof(int));
+        MPI_Gatherv(neighbours, n_neighbours, MPI_INT, adj, nadj, xadj, MPI_INT, 0, comm);
+        MPI_Gatherv(weights, n_neighbours, MPI_INT, adjw, nadj, xadj, MPI_INT, 0, comm);
+#ifdef PROF
+        {
+            //writing graph to file, CSR format
+            string filename = "prof/graph.csr";
+            FILE* file      = fopen(filename.c_str(), "w+");
+            if(file==NULL){FLUPS_ERROR("Could not create file in ./prof. Did you create the folder?",LOCATION);}
+            for(int i=0; i<=comm_size; i++){
+                fprintf(file, "%d ",xadj[i]);
+            }
+            fprintf(file,"\n");
+            for(int i=0; i<xadj[comm_size]; i++){
+                fprintf(file, "%d (%d), ",adj[i],adjw[i]);
+            }
+            fprintf(file,"\n");
+            fclose(file);
+
+            //writing graph to file, per node
+            filename = "prof/graph.txt";
+            file     = fopen(filename.c_str(), "w+");
+            for(int i=0; i<comm_size; i++){
+                fprintf(file, "%d: ",i);
+                for(int j = xadj[i]; j<xadj[i+1]; j++){
+                        fprintf(file, "%d (%d), ",adj[j],adjw[j]);
+                }
+                fprintf(file,"\n");
+            }
+            fclose(file);
+        }
+#endif
+
+        //prepare vall to metis
+        int  ncon = 1;  // the number of balancing constraints
+        real_t tol = 1.0001; //tolerance on the constraint 
+        int  objval;
+        int *part = (int *)flups_malloc(comm_size * sizeof(int));
+        int *rids = (int *)flups_malloc(n_nodes * sizeof(int));
+        std::memset(rids,0,sizeof(int)*n_nodes);
+
+        // ask of the partitioning. call metis several times in case the tolerance on the partition size is not exactly respected 
+        int max_iter = 10;
+        if(n_nodes==1){
+            max_iter = 0;
+            FLUPS_WARNING("METIS: you asked only 1 node. I can't do the partitaioning.",LOCATION);
+        }
+        int iter;
+        idx_t options[METIS_NOPTIONS];
+        METIS_SetDefaultOptions(options);
+
+        //METIS options: ncuts and niter seems to have the most effect
+        options[METIS_OPTION_SEED] = 1;
+        options[METIS_OPTION_NCUTS] = 50;
+        options[METIS_OPTION_NITER] = 50;
+        options[METIS_OPTION_UFACTOR] = 1.;
+        // options[METIS_OPTION_IPTYPE] = 
+        //     METIS_IPTYPE_GROW,
+        //     METIS_IPTYPE_RANDOM,
+        //     METIS_IPTYPE_EDGE,
+        //     METIS_IPTYPE_NODE,
+        //     METIS_IPTYPE_METISRB
+        // options[METIS_OPTION_RTYPE] = 
+        //     METIS_RTYPE_FM,
+        //     METIS_RTYPE_GREEDY,
+        //     METIS_RTYPE_SEP2SIDED,
+        //     METIS_RTYPE_SEP1SIDED
+
+        for(iter = 0; iter<max_iter; iter++){
+            FLUPS_INFO("METIS: graph partitioning attempt %d",iter+1);
+            METIS_PartGraphRecursive(&comm_size, &ncon, xadj, adj, NULL, NULL, adjw, &n_nodes, tpwgts, &tol, options, &objval, part);
+            tol=((tol-1.)/2.)+1.;
+            options[METIS_OPTION_NCUTS] +=10;
+            options[METIS_OPTION_NITER] +=10;
+            options[METIS_OPTION_SEED] += 1; 
+            // options[METIS_OPTION_UFACTOR] /= 1.;
+            
+            // compute how many proc in each group, resulting from metis partitioning
+            for (int i = 0; i < comm_size; ++i) {
+                rids[part[i]]++;
+            }
+            // check that we did respect the constraint on the size of the partitions
+            bool succeed = (rids[0] == (int)(tpwgts[0] * comm_size));
+            FLUPS_INFO("METIS:   part %d: size %d (should be %d)",0, rids[0], (int)(tpwgts[0] * comm_size));
+            for (int ip = 1; ip < n_nodes; ++ip) {
+                succeed &= (rids[ip] == (int)(tpwgts[ip] * comm_size));
+                FLUPS_INFO("METIS:   part %d: size %d (should be %d)",ip, rids[ip], (int)(tpwgts[ip] * comm_size));
+                rids[ip] += rids[ip-1]; //switch to cumulative numbering
+            }
+            for (int ip = n_nodes-1; ip > 0; --ip) {
+                rids[ip] = rids[ip-1]; //offset by 1
+            }
+            rids[0] = 0;
+            if(!succeed){
+                FLUPS_INFO("METIS:   attempt failed.");
+            }else{
+                // assign the rank value and redistribute
+                for (int i = 0; i < comm_size; ++i) {
+                    order[i] = rids[part[i]]++ ;
+                }
+                break;
+            }
+        }
+        // check that we did not reach max_iter
+        if(iter>=max_iter){
+            FLUPS_WARNING("Failed to find a graph partitioning with the current allocation. I will not change the rank orderegin in the graph_comm!",LOCATION);
+            for (int i = 0; i < comm_size; ++i) {
+                order[i] = i;
+            }
+        }
+
+        // result of the partitioning
+    #ifdef PART_OF_EQUAL_SIZE   
+        FLUPS_INFO("I have partitioned the graph in %d chunks of size %d\n",n_nodes,comm_size/n_nodes);
+    #else            
+        FLUPS_INFO("I have partitioned the graph in %d chunks.",n_nodes);
+    #endif
+#ifdef PROF
+        //writing graph to file, CSR format
+        string filename = "prof/partitions.txt";
+        FILE* file      = fopen(filename.c_str(), "w+");
+    #ifdef PART_OF_EQUAL_SIZE   
+        fprintf(file,"%d partitions of size %d\n",n_nodes,comm_size/n_nodes);
+    #else
+        fprintf(file,"%d partitions of size:\n",n_nodes);
+        for(int i=0; i<n_nodes; i++){
+            fprintf(file, "part %d with %d elems\n",i,(int)(comm_size*tpwgts[i]));
+        }
+    #endif
+        fclose(file);
+#endif        
+
+        flups_free(xadj);
+        flups_free(nadj);
+        flups_free(adj);
+        flups_free(adjw);
+
+        flups_free(part);
+        flups_free(rids);
+    } else {
+        MPI_Gather(&n_neighbours, 1, MPI_INT, NULL, 1, MPI_INT, 0, comm);
+        MPI_Gatherv(neighbours, n_neighbours, MPI_INT, NULL, NULL, NULL, MPI_INT, 0, comm);
+        MPI_Gatherv(weights, n_neighbours, MPI_INT, NULL, NULL, NULL, MPI_INT, 0, comm);
+    }
+    flups_free(neighbours);
+    flups_free(weights);
+#ifndef PART_OF_EQUAL_SIZE
+    flups_free(tpwgts);
+#endif
+
+    //-------------------------------------------------------------------------
+    /** - give the rank info to everybody */
+    //-------------------------------------------------------------------------
+    MPI_Bcast(order, comm_size, MPI_INT, 0, comm);
+#ifdef PROF        
+    if (comm_rank == 3) {
+        //writing reordering to file
+        string filename = "prof/order.txt";
+        FILE* file      = fopen(filename.c_str(), "w+");
+        for (int i = 0; i < comm_size; ++i) {
+            FLUPS_INFO("METIS ORDER %i : %i \n", i, order[i]);
+            printf("%i : %i \n", i, order[i]);
+            fprintf(file,"%i : %i \n", i, order[i]);
+        }
+        fclose(file);
+    }
+#endif
+#else
+    for (int i = 0; i < comm_size; ++i) {
+        order[i] = i;
+    }
+#endif
+}
\ No newline at end of file
diff --git a/src/Solver.hpp b/src/Solver.hpp
index 4e0a2efb..30fa97da 100644
--- a/src/Solver.hpp
+++ b/src/Solver.hpp
@@ -30,7 +30,7 @@
 #include <map>
 #include "FFTW_plan_dim.hpp"
 #include "defines.hpp"
-#include "green_functions_3d.hpp"
+#include "green_functions.hpp"
 #include "hdf5_io.hpp"
 
 #include "SwitchTopo.hpp"
@@ -70,6 +70,7 @@ class Solver {
     // even is the dimension is 2, we allocate arrays of dimension 3
 
    protected:
+    int _ndim          = 3; /**@brief the dimension of the problem, i.e. 2D or 3D */
     int _fftwalignment = 0; /**< @brief alignement assumed by the FFTW Solver  */
     int _orderdiff     = 0; /**< @brief the order of derivative (spectral = 0)  */
     int _nbr_imult     = 0; /**< @brief the number of time we have applied a DST transform */
@@ -141,6 +142,7 @@ class Solver {
      */
     void _allocate_switchTopo(const int ntopo, SwitchTopo** switchtopo, opt_double_ptr* send_buff, opt_double_ptr* recv_buff);
     void _deallocate_switchTopo(SwitchTopo** switchtopo, opt_double_ptr* send_buff, opt_double_ptr* recv_buff);
+    void _reorder_metis(MPI_Comm comm, int *sources, int *sourcesW, int *dests, int *destsW, int *order);
     /**@} */
 
     /**
@@ -182,7 +184,7 @@ class Solver {
      */
     size_t get_allocSize() {
         size_t size_tot = 1;
-        for (int id = 0; id < 3; id++) {
+        for (int id = 0; id < _ndim; id++) {
             size_tot = std::max(_topo_hat[id]->memsize(), size_tot);
         }
         return size_tot;
@@ -193,6 +195,7 @@ class Solver {
      * 
      * @param kfact  multiply the index by this factor to obtain the wave number (1/2/3 corresponds to x/y/z )
      * @param koffset  add this to the index to obtain the wave number (1/2/3 corresponds to x/y/z )
+     * @param symstart  returns the first index of the symmetry
      */
     void get_spectralInfo(double kfact[3], double koffset[3], double symstart[3]) {
         for (int ip = 0; ip < 3; ip++) {
@@ -231,61 +234,61 @@ class Solver {
     /**@} */
 };
 
-/**
- * @brief compute the pencil layout given the pencil direction
- * 
- * The pencil layout is computed so as to obtain pencils with an aspect
- * ratio close to 1, i.e. the same number points per proc in the the 2 other directions than id.
- * 
- * @param id the pencil direction
- * @param nproc the number of proc in each direction
- * @param comm_size the total communicator size
- * @param nglob the domain size in each direction
- */
-static inline void pencil_nproc(const int id, int nproc[3], const int comm_size, const int nglob[3]) {
-    int id1 = (id + 1) % 3;
-    int id2 = (id + 2) % 3;
-
-    nproc[id] = 1;
-
-    double       n1       = 1;
-    double       n2       = (double) comm_size;
-    //invert indexes so that id1 is the dimension where nglob is the smallest
-    if( nglob[id1] > nglob[id2]){
-        const int tmp = id2;
-        id2 = id1;
-        id1 = tmp;
-    }
-    double       np1      = (double) nglob[id1];
-    double       np2      = (double) nglob[id2]/ comm_size;
-    const double npsquare = sqrt((double)(nglob[id1] * nglob[id2]) / comm_size);  //target number of points per dimension
-
-    //keep on deviding as long as ncurr/2>nsquare
-    //we want to leave n1=1, and we do not want to reach n2=1
-    while ( (np1 > npsquare) && std::floor(n2*.5) == n2*.5) {
-        n1  *= 2.0;
-        np1 *= 0.5;
-        n2  *= 0.5;
-        np2 *= 2.0;
-    }
-    nproc[id1] = (int)n1;
-    nproc[id2] = (int)n2;
-
-    FLUPS_INFO("my proc repartition is %d %d %d",nproc[0],nproc[1],nproc[2]);
-    if(nproc[0] * nproc[1] * nproc[2] != comm_size){
-        FLUPS_ERROR("the number of proc %d %d %d does not match the comm size %d", nproc[0], nproc[1], nproc[2], comm_size, LOCATION);
-    }
-    if(comm_size>8 && (n1==1||n2==1)){
-        FLUPS_WARNING("A slab decomposition was used instead of a pencil decomposition in direction %d. This may increase communication time.",id, LOCATION);
-        //Loss of performance may originate in slab decompositions, as an actual All2All communication is required, whereas with the pencils,
-        // we manage to do All2All communications in subcoms of size sqrt(comm_size).
-        //We could prevent this to happen by doing something like:
-        // if(n2==1){
-        //     n2*=2;
-        //     n1*=0.5;
-        // }
-    }
-}
+// /**
+//  * @brief compute the pencil layout given the pencil direction
+//  * 
+//  * The pencil layout is computed so as to obtain pencils with an aspect
+//  * ratio close to 1, i.e. the same number points per proc in the the 2 other directions than id.
+//  * 
+//  * @param id the pencil direction
+//  * @param nproc the number of proc in each direction
+//  * @param comm_size the total communicator size
+//  * @param nglob the domain size in each direction
+//  */
+// static inline void pencil_nproc(const int id, int nproc[3], const int comm_size, const int nglob[3]) {
+//     int id1 = (id + 1) % 3;
+//     int id2 = (id + 2) % 3;
+
+//     nproc[id] = 1;
+
+//     double       n1       = 1;
+//     double       n2       = (double) comm_size;
+//     //invert indexes so that id1 is the dimension where nglob is the smallest
+//     if( nglob[id1] > nglob[id2]){
+//         const int tmp = id2;
+//         id2 = id1;
+//         id1 = tmp;
+//     }
+//     double       np1      = (double) nglob[id1];
+//     double       np2      = (double) nglob[id2]/ comm_size;
+//     const double npsquare = sqrt((double)(nglob[id1] * nglob[id2]) / comm_size);  //target number of points per dimension
+
+//     //keep on deviding as long as ncurr/2>nsquare
+//     //we want to leave n1=1, and we do not want to reach n2=1
+//     while ( (np1 > npsquare) && std::floor(n2*.5) == n2*.5) {
+//         n1  *= 2.0;
+//         np1 *= 0.5;
+//         n2  *= 0.5;
+//         np2 *= 2.0;
+//     }
+//     nproc[id1] = (int)n1;
+//     nproc[id2] = (int)n2;
+
+//     FLUPS_INFO("my proc repartition is %d %d %d",nproc[0],nproc[1],nproc[2]);
+//     if(nproc[0] * nproc[1] * nproc[2] != comm_size){
+//         FLUPS_ERROR("the number of proc %d %d %d does not match the comm size %d", nproc[0], nproc[1], nproc[2], comm_size, LOCATION);
+//     }
+//     if(comm_size>8 && (n1==1||n2==1)){
+//         FLUPS_WARNING("A slab decomposition was used instead of a pencil decomposition in direction %d. This may increase communication time.",id, LOCATION);
+//         //Loss of performance may originate in slab decompositions, as an actual All2All communication is required, whereas with the pencils,
+//         // we manage to do All2All communications in subcoms of size sqrt(comm_size).
+//         //We could prevent this to happen by doing something like:
+//         // if(n2==1){
+//         //     n2*=2;
+//         //     n1*=0.5;
+//         // }
+//     }
+// }
 
 /**
  * @brief compute the pencil layout given the pencil direction, compatible with another pencil decoposition given as a hint
@@ -293,8 +296,9 @@ static inline void pencil_nproc(const int id, int nproc[3], const int comm_size,
  * @param id the pencil direction
  * @param nproc the number of proc in each direction
  * @param comm_size the total communicator size
- * @param id_hint the axis of the pencils in another decomposition, which we want this decomposition to be compatible with
+ * @param id_hint the axis where we allow the proc decomposition to change
  * @param nproc_hint the number of procs in the other decomposition we want to be compatible with
+ * 
  */
 static inline void pencil_nproc_hint(const int id, int nproc[3], const int comm_size, const int id_hint, const int nproc_hint[3]) {
     // get the id shared between the hint topo
@@ -317,306 +321,5 @@ static inline void pencil_nproc_hint(const int id, int nproc[3], const int comm_
     }
 }
 
-/**
- * @brief reorder the MPI-ranks using metis
- * 
- * @warning this functions assume an evenly distributed amount of procs on the nodes
- * 
- * @param comm 
- * @param sources 
- * @param sourcesW 
- * @param dests 
- * @param destsW 
- * @param n_nodes 
- * @param order 
- */
-static void reorder_metis(MPI_Comm comm, int *sources, int *sourcesW, int *dests, int *destsW, int *order) {
-    int comm_size;
-    int comm_rank;
-    MPI_Comm_rank(comm, &comm_rank);
-    MPI_Comm_size(comm, &comm_size);
-
-#ifdef HAVE_METIS
-
-    //-------------------------------------------------------------------------
-    /** - get the total number of nodes */
-    //-------------------------------------------------------------------------
-    // create a group where everybody can create a shared memory region
-    MPI_Comm nodecomm;
-    MPI_Info mpinfo;
-    MPI_Info_create(&mpinfo);
-    MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, comm_rank, mpinfo, &nodecomm);
-    // we store the comm size
-    int local_nodesize;
-    MPI_Comm_size(nodecomm, &local_nodesize);
-
-// #define PART_OF_EQUAL_SIZE
-#ifdef PART_OF_EQUAL_SIZE
-    //_______ OPTION 1 with gcd (suboptimal)________
-    // gather on each proc the gcd
-    int *vec_nodesize = (int  *)flups_malloc(sizeof(int) * comm_size);
-    MPI_Allgather(&local_nodesize, 1, MPI_INT, vec_nodesize, 1, MPI_INT, comm);
-    // get the Greatest Common Divider among every process
-    int nodesize = comm_size;
-    for (int ip = 0; ip < comm_size; ip++) {
-        nodesize = gcd(nodesize, vec_nodesize[ip]);
-    }
-    // store the number of nodes
-    int n_nodes = comm_size / nodesize;
-    double* tpwgts = NULL;
-#else
-    //_______ OPTION 2 with various size partitions________
-    // gather on proc 1 the number of proc per node
-    int *vec_nodesize = (int *)flups_malloc(sizeof(int) * comm_size);
-    MPI_Allgather(&local_nodesize, 1, MPI_INT, vec_nodesize, 1, MPI_INT, comm);
-    
-    // count the number of partitions we'll need:
-    int n_nodes = 0;
-    int id = 0;
-    while( id < comm_size){
-        id += vec_nodesize[id];
-        n_nodes++;
-    }
-    
-#ifdef DEV_SIMULATE_GRAPHCOMM
-    //CHEATING: imposing that there will be 2 groups (there needs to be at least 4 procs)
-    n_nodes = 2;
-    for (int ip = 0; ip<comm_size; ip++ ){
-        vec_nodesize[ip]=comm_size/3;
-    }
-    vec_nodesize[0]=comm_size-vec_nodesize[1];
-#endif
-
-    real_t* tpwgts = (real_t*) flups_malloc(sizeof(real_t)*n_nodes);
-    // deduce the size of each partition:
-    id = 0;
-    for (int ip = 0; ip<n_nodes; ip++ ){
-        tpwgts[ip] = ((real_t)vec_nodesize[id])/((real_t) comm_size);
-        id += vec_nodesize[id];
-    }
-    //______________________________________________
-#endif
-
-    // free stuffs
-    flups_free(vec_nodesize);
-    MPI_Comm_free(&nodecomm);
-
-    //-------------------------------------------------------------------------
-    /** - get the neighbour list and the associated weights */
-    //-------------------------------------------------------------------------
-    // we count the number of neighbours
-    int n_neighbours = 0;
-    // if we have either one way in or one way out, we have a neighbour
-    for (int i = 0; i < comm_size; ++i) {
-        if ((sourcesW[i] + destsW[i]) > 0 && i != comm_rank) n_neighbours++;
-    }
-    // allocate the number of neighbours and their weights
-    int *neighbours = (int *)flups_malloc(sizeof(int) * n_neighbours);
-    int *weights    = (int *)flups_malloc(sizeof(int) * n_neighbours);
-    n_neighbours    = 0;
-    for (int i = 0; i < comm_size; ++i) {
-        if (sourcesW[i] + destsW[i] > 0 && i != comm_rank) {
-            neighbours[n_neighbours] = i;
-            weights[n_neighbours]    = sourcesW[i] + destsW[i];
-            n_neighbours++;
-        }
-    }
-
-    //-------------------------------------------------------------------------
-    /** - build the graph on proc 0 and ask for partioning
-     * The graph structure follows metis rules:
-     * the edges (= id of the destination of the edges) starting from proc k are located
-     * from adj[xadj[k]] to adj[xadj[k+1]-1]
-     * Same structure is used for the weights with the ajdw
-     * */
-    //-------------------------------------------------------------------------
-    if (comm_rank == 0) {
-        int *xadj = (int *)flups_malloc((comm_size + 1) * sizeof(int));
-        int *nadj = (int *)flups_malloc((comm_size) * sizeof(int));
-
-        // get the number of neighbours from everybody
-        MPI_Gather(&n_neighbours, 1, MPI_INT, nadj, 1, MPI_INT, 0, comm);
-        // get the starting indexes of the neighbour description for everybody
-        xadj[0] = 0;
-        for (int i = 0; i < comm_size; ++i) {
-            xadj[i + 1] = xadj[i] + nadj[i];
-        }
-
-        // allocate the adjency list + weights and fill it with the neighbour list from everybody
-        int *adj  = (int *)flups_malloc(xadj[comm_size] * sizeof(int));
-        int *adjw = (int *)flups_malloc(xadj[comm_size] * sizeof(int));
-        MPI_Gatherv(neighbours, n_neighbours, MPI_INT, adj, nadj, xadj, MPI_INT, 0, comm);
-        MPI_Gatherv(weights, n_neighbours, MPI_INT, adjw, nadj, xadj, MPI_INT, 0, comm);
-#ifdef PROF
-        {
-            //writing graph to file, CSR format
-            string filename = "prof/graph.csr";
-            FILE* file      = fopen(filename.c_str(), "w+");
-            if(file==NULL){FLUPS_ERROR("Could not create file in ./prof. Did you create the folder?",LOCATION);}
-            for(int i=0; i<=comm_size; i++){
-                fprintf(file, "%d ",xadj[i]);
-            }
-            fprintf(file,"\n");
-            for(int i=0; i<xadj[comm_size]; i++){
-                fprintf(file, "%d (%d), ",adj[i],adjw[i]);
-            }
-            fprintf(file,"\n");
-            fclose(file);
-
-            //writing graph to file, per node
-            filename = "prof/graph.txt";
-            file     = fopen(filename.c_str(), "w+");
-            for(int i=0; i<comm_size; i++){
-                fprintf(file, "%d: ",i);
-                for(int j = xadj[i]; j<xadj[i+1]; j++){
-                        fprintf(file, "%d (%d), ",adj[j],adjw[j]);
-                }
-                fprintf(file,"\n");
-            }
-            fclose(file);
-        }
-#endif
-
-        //prepare vall to metis
-        int  ncon = 1;  // the number of balancing constraints
-        real_t tol = 1.0001; //tolerance on the constraint 
-        int  objval;
-        int *part = (int *)flups_malloc(comm_size * sizeof(int));
-        int *rids = (int *)flups_malloc(n_nodes * sizeof(int));
-        std::memset(rids,0,sizeof(int)*n_nodes);
-
-        // ask of the partitioning. call metis several times in case the tolerance on the partition size is not exactly respected 
-        int max_iter = 10;
-        if(n_nodes==1){
-            max_iter = 0;
-            FLUPS_WARNING("METIS: you asked only 1 node. I can't do the partitaioning.",LOCATION);
-        }
-        int iter;
-        idx_t options[METIS_NOPTIONS];
-        METIS_SetDefaultOptions(options);
-
-        //METIS options: ncuts and niter seems to have the most effect
-        options[METIS_OPTION_SEED] = 1;
-        options[METIS_OPTION_NCUTS] = 50;
-        options[METIS_OPTION_NITER] = 50;
-        options[METIS_OPTION_UFACTOR] = 1.;
-        // options[METIS_OPTION_IPTYPE] = 
-        //     METIS_IPTYPE_GROW,
-        //     METIS_IPTYPE_RANDOM,
-        //     METIS_IPTYPE_EDGE,
-        //     METIS_IPTYPE_NODE,
-        //     METIS_IPTYPE_METISRB
-        // options[METIS_OPTION_RTYPE] = 
-        //     METIS_RTYPE_FM,
-        //     METIS_RTYPE_GREEDY,
-        //     METIS_RTYPE_SEP2SIDED,
-        //     METIS_RTYPE_SEP1SIDED
-
-        for(iter = 0; iter<max_iter; iter++){
-            FLUPS_INFO("METIS: graph partitioning attempt %d",iter+1);
-            METIS_PartGraphRecursive(&comm_size, &ncon, xadj, adj, NULL, NULL, adjw, &n_nodes, tpwgts, &tol, options, &objval, part);
-            tol=((tol-1.)/2.)+1.;
-            options[METIS_OPTION_NCUTS] +=10;
-            options[METIS_OPTION_NITER] +=10;
-            options[METIS_OPTION_SEED] += 1; 
-            // options[METIS_OPTION_UFACTOR] /= 1.;
-            
-            // compute how many proc in each group, resulting from metis partitioning
-            for (int i = 0; i < comm_size; ++i) {
-                rids[part[i]]++;
-            }
-            // check that we did respect the constraint on the size of the partitions
-            bool succeed = (rids[0] == (int)(tpwgts[0] * comm_size));
-            FLUPS_INFO("METIS:   part %d: size %d (should be %d)",0, rids[0], (int)(tpwgts[0] * comm_size));
-            for (int ip = 1; ip < n_nodes; ++ip) {
-                succeed &= (rids[ip] == (int)(tpwgts[ip] * comm_size));
-                FLUPS_INFO("METIS:   part %d: size %d (should be %d)",ip, rids[ip], (int)(tpwgts[ip] * comm_size));
-                rids[ip] += rids[ip-1]; //switch to cumulative numbering
-            }
-            for (int ip = n_nodes-1; ip > 0; --ip) {
-                rids[ip] = rids[ip-1]; //offset by 1
-            }
-            rids[0] = 0;
-            if(!succeed){
-                FLUPS_INFO("METIS:   attempt failed.");
-            }else{
-                // assign the rank value and redistribute
-                for (int i = 0; i < comm_size; ++i) {
-                    order[i] = rids[part[i]]++ ;
-                }
-                break;
-            }
-        }
-        // check that we did not reach max_iter
-        if(iter>=max_iter){
-            FLUPS_WARNING("Failed to find a graph partitioning with the current allocation. I will not change the rank orderegin in the graph_comm!",LOCATION);
-            for (int i = 0; i < comm_size; ++i) {
-                order[i] = i;
-            }
-        }
-
-        // result of the partitioning
-    #ifdef PART_OF_EQUAL_SIZE   
-        FLUPS_INFO("I have partitioned the graph in %d chunks of size %d\n",n_nodes,comm_size/n_nodes);
-    #else            
-        FLUPS_INFO("I have partitioned the graph in %d chunks.",n_nodes);
-    #endif
-#ifdef PROF
-        //writing graph to file, CSR format
-        string filename = "prof/partitions.txt";
-        FILE* file      = fopen(filename.c_str(), "w+");
-    #ifdef PART_OF_EQUAL_SIZE   
-        fprintf(file,"%d partitions of size %d\n",n_nodes,comm_size/n_nodes);
-    #else
-        fprintf(file,"%d partitions of size:\n",n_nodes);
-        for(int i=0; i<n_nodes; i++){
-            fprintf(file, "part %d with %d elems\n",i,(int)(comm_size*tpwgts[i]));
-        }
-    #endif
-        fclose(file);
-#endif        
-
-        flups_free(xadj);
-        flups_free(nadj);
-        flups_free(adj);
-        flups_free(adjw);
-
-        flups_free(part);
-        flups_free(rids);
-    } else {
-        MPI_Gather(&n_neighbours, 1, MPI_INT, NULL, 1, MPI_INT, 0, comm);
-        MPI_Gatherv(neighbours, n_neighbours, MPI_INT, NULL, NULL, NULL, MPI_INT, 0, comm);
-        MPI_Gatherv(weights, n_neighbours, MPI_INT, NULL, NULL, NULL, MPI_INT, 0, comm);
-    }
-    flups_free(neighbours);
-    flups_free(weights);
-#ifndef PART_OF_EQUAL_SIZE
-    flups_free(tpwgts);
-#endif
-
-    //-------------------------------------------------------------------------
-    /** - give the rank info to everybody */
-    //-------------------------------------------------------------------------
-    MPI_Bcast(order, comm_size, MPI_INT, 0, comm);
-#ifdef PROF        
-    if (comm_rank == 3) {
-        //writing reordering to file
-        string filename = "prof/order.txt";
-        FILE* file      = fopen(filename.c_str(), "w+");
-        for (int i = 0; i < comm_size; ++i) {
-            FLUPS_INFO("METIS ORDER %i : %i \n", i, order[i]);
-            printf("%i : %i \n", i, order[i]);
-            fprintf(file,"%i : %i \n", i, order[i]);
-        }
-        fclose(file);
-    }
-#endif
-#else
-    for (int i = 0; i < comm_size; ++i) {
-        order[i] = i;
-    }
-#endif
-}
-
 
 #endif
diff --git a/src/SwitchTopo.cpp b/src/SwitchTopo.cpp
index 3d4e72bc..9fca36cd 100644
--- a/src/SwitchTopo.cpp
+++ b/src/SwitchTopo.cpp
@@ -32,31 +32,28 @@
 #include "Topology.hpp"
 #include <limits>
 
-void SwitchTopo::_cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int oend[3],int nByBlock[3]){
+/**
+ * @brief computes nByBlock, the unit block size
+ * 
+ * @param istart the starting indexes on this rank in the input topology
+ * @param iend the end indexes on this rank in the input topology
+ * @param ostart the starting indexes on this rank in the output topology
+ * @param oend the end indexes on this rank in the output topology
+ * @param nByBlock 
+ */
+void SwitchTopo::_cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int oend[3], int nByBlock[3]) {
     BEGIN_FUNC;
 
     int comm_size;
-    MPI_Comm_size(_inComm,&comm_size);
+    MPI_Comm_size(_inComm, &comm_size);
 
     int* onProc = (int*)flups_malloc(comm_size * sizeof(int));
 
     for (int id = 0; id < 3; id++) {
         // get the gcd between send and receive
-        int isend = (iend[id] - istart[id]);
-        int osend = (oend[id] - ostart[id]);
-        // // compute the exchanged size same if from the input or output
-        // MPI_Allreduce(&isend, &_exSize[id], 1, MPI_INT, MPI_SUM, _inComm);
-        // // we have summed the size nproc(id+1)*size nproc(id+2) * size, so we divide
-        // _exSize[id] /= _topo_in->nproc((id+1)%3) * _topo_in->nproc((id+2)%3);
-
-        // // if I am the last one, I decrease the blocksize by one if needed
-        // if (_topo_in->rankd(id) == (_topo_in->nproc(id) - 1)) {
-        //     isend = isend - _exSize[id] % 2;
-        // }
-        // if (_topo_out->rankd(id) == (_topo_out->nproc(id) - 1)) {
-        //     osend = osend - _exSize[id] % 2;
-        // }
-        int npoints = gcd(isend,osend);
+        int isend   = (iend[id] - istart[id]);
+        int osend   = (oend[id] - ostart[id]);
+        int npoints = gcd(isend, osend);
         // gather on each proc the gcd
         MPI_Allgather(&npoints, 1, MPI_INT, onProc, 1, MPI_INT, _inComm);
         // get the Greatest Common Divider among every process
@@ -72,75 +69,7 @@ void SwitchTopo::_cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int o
 }
 
 /**
- * @brief compute the destination rank for every block on the current processor
- * 
- * @param nBlock the number of block on the current proc (012-indexing)
- * @param blockIDStart the global starting id of the block (0,0,0) in the current topo
- * @param topo the destination topology
- * @param nBlockOnProc the number of block on each proc in the destination topology
- * @param destRank the computed destination rank for each block
- */
-void SwitchTopo::_cmpt_blockDestRankAndTag(const int nBlock[3], const int blockIDStart[3], const Topology *topo, const int *startBlockEachProc, const int *nBlockEachProc, int *destRank, int *destTag) {
-    BEGIN_FUNC;
-    int comm_size;
-    MPI_Comm_size(_inComm, &comm_size);
-    // go through each block
-    for (int ib = 0; ib < nBlock[0] * nBlock[1] * nBlock[2]; ib++) {
-        // get the split index
-        int bidv[3];
-        localSplit(ib, nBlock, 0, bidv, 1);
-        // initialize the destrank
-        int global_bid[3] = {0, 0, 0};
-        int destrankd[3] = {0, 0, 0};
-        // determine the dest rank for each dimension
-        for (int id = 0; id < 3; id++) {
-            // we go trough every rank on the given dim
-            global_bid[id] = bidv[id] + blockIDStart[id];
-            for (int ir = 0; ir < topo->nproc(id); ir++) {
-                // update the destination rank
-                destrankd[id] = ir;
-
-                // update the number of block already visited
-                int minBlockLocal = startBlockEachProc[id * comm_size + rankindex(destrankd, topo)];
-                int maxBlockLocal = minBlockLocal + nBlockEachProc[id * comm_size + rankindex(destrankd, topo)];
-
-                // if we have already visited more block than my block id then we have found the destination rank
-                if (global_bid[id] >= minBlockLocal && global_bid[id] < maxBlockLocal) {
-                    break;
-                }
-            }
-        }
-
-        // get the global destination rank
-        const int destrank = rankindex(destrankd, topo);
-        // get the global destination tag
-        destRank[ib] = destrank;
-
-        FLUPS_CHECK(destrank < comm_size, "the destination rank is > than the commsize: %d = %d %d %d vs %d", destrank, destrankd[0], destrankd[1], destrankd[2], comm_size, LOCATION);
-        if (destTag != NULL) {
-            // get the number of block in the destination rank
-            int dest_nBlock[3] = {nBlockEachProc[0 * comm_size + destrank],
-                                  nBlockEachProc[1 * comm_size + destrank],
-                                  nBlockEachProc[2 * comm_size + destrank]};
-            // store the destination tag = local block index in the destination rank
-            // get the number of block in the destination rank
-            int dest_iBlock[3] = {global_bid[0]-startBlockEachProc[0 * comm_size + destrank],
-                                  global_bid[1]-startBlockEachProc[1 * comm_size + destrank],
-                                  global_bid[2]-startBlockEachProc[2 * comm_size + destrank]};
-            // create the tag 
-            destTag[ib] = localIndex(0, dest_iBlock[0], dest_iBlock[1], dest_iBlock[2], 0, dest_nBlock, 1);
-        }
-    }
-
-    //if the communicator of topo is not the same as the reference communicator, we need to adapt the destrank
-    //for now, it has been computed in the comm of topo. We thus change for the reference _inComm.
-    translate_ranks(nBlock[0] * nBlock[1] * nBlock[2], destRank, topo->get_comm(), _inComm);
-
-    END_FUNC;
-}
-
-/**
- * @brief compute the destination rank for every block on the current processor
+ * @brief compute the destination rank for every unit block on the current processor
  * 
  * @param nBlock the number of block on the current proc (012-indexing)
  * @param blockIDStart the global starting id of the block (0,0,0) in the current topo
@@ -164,12 +93,13 @@ void SwitchTopo::_cmpt_blockDestRank(const int nBlock[3],const int nByBlock[3],c
         // determine the dest rank for each dimension
         for (int id = 0; id < 3; id++) {
             // get the global starting index in my current topo = topo_in
-            global_id[id] = bidv[id] * nByBlock[id] + topo_in->nbyproc(id) * topo_in->rankd(id) + istart[id];
+            global_id[id] = bidv[id] * nByBlock[id] + topo_in->cmpt_start_id(id) + istart[id];
             // the (0,0,0) in topo in is located in shift in topo_out
-            FLUPS_INFO_4("block %d starts at %d / %d ",ib,(global_id[id] + shift[id]),topo_out->nbyproc(id));
-            destrankd[id] = (global_id[id] + shift[id]) / topo_out->nbyproc(id);
+            FLUPS_INFO_4("block %d starts at %d ",ib,(global_id[id] + shift[id]));
+            // destrankd[id] = (global_id[id] + shift[id]) / topo_out->nbyproc(id);
+            destrankd[id] = topo_out->cmpt_rank_fromid(global_id[id] + shift[id],id);
             // if the last proc has more data than the other ones, we need to max the destrank
-            destrankd[id] = std::min(destrankd[id],topo_out->nproc(id)-1);
+            // destrankd[id] = std::min(destrankd[id],topo_out->nproc(id)-1);
         }
         destRank[ib] = rankindex(destrankd, topo_out);
         
@@ -183,40 +113,7 @@ void SwitchTopo::_cmpt_blockDestRank(const int nBlock[3],const int nByBlock[3],c
 }
 
 /**
- * @brief compute the size of the blocks inside the given topology
- * 
- * @param nBlock 
- * @param blockIDStart 
- * @param nByBlock 
- * @param topo 
- * @param nBlockSize 
- */
-void SwitchTopo::_cmpt_blockSize(const int nBlock[3], const int blockIDStart[3], const int nByBlock[3], const int istart[3], const int iend[3], int *nBlockSize[3]) {
-    BEGIN_FUNC;
-    // go through each block
-    for (int ib2 = 0; ib2 < nBlock[2]; ib2++) {
-        for (int ib1 = 0; ib1 < nBlock[1]; ib1++) {
-            for (int ib0 = 0; ib0 < nBlock[0]; ib0++) {
-                // get the global block index
-                const int bidv[3] = {ib0, ib1, ib2};
-                const int bid     = localIndex(0, ib0, ib1, ib2, 0, nBlock, 1);
-                // determine the size in each direction
-                for (int id = 0; id < 3; id++) {
-                    //if I am the last block, I forgive a small difference between the blocksizes
-                    if (bidv[id] == (nBlock[id] - 1)) {
-                        nBlockSize[id][bid] = (iend[id] - istart[id]) - bidv[id] * nByBlock[id];
-                    } else {
-                        nBlockSize[id][bid] = nByBlock[id];
-                    }
-                }
-            }
-        }
-    }
-    END_FUNC;
-}
-
-/**
- * @brief given a topology, try to merge the blocks that go to the same destination
+ * @brief given a topology, merges the unit blocks that go to the same destination in order to create one big block for each proc
  * 
  * @param [in] topo the topology
  * @param [in] nByBlock the number of unknowns by blocks
@@ -238,6 +135,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar
     int* nblockToEachProc = (int*)flups_malloc(sizeof(int) * commsize);
     std::memset(nblockToEachProc, 0, sizeof(int) * commsize);
 
+    //-------------------------------------------------------------------------
+    /** - count the number of block going to each proc */
+    //-------------------------------------------------------------------------
     const int old_nBlock = nBlockv[0] * nBlockv[1] * nBlockv[2];
     
     for (int ib = 0; ib < old_nBlock; ib++) {
@@ -251,6 +151,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar
         }
     }
 
+    //-------------------------------------------------------------------------
+    /** - initialize destination rank, block sizes and blockiStart arrays */
+    //-------------------------------------------------------------------------
     // allocate the new arrays: rank, tag, blocksize, block istart
     int* newBlockSize[3]   = {NULL, NULL, NULL};
     int* newblockiStart[3] = {NULL, NULL, NULL};
@@ -284,8 +187,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar
     // free the temp array
     flups_free(nblockToEachProc);
 
-    // recompute the number of blocks on each proc and the starting index
-
+    //-------------------------------------------------------------------------
+    /** - Gathering blocks: recompute the blocksize, the blockiStart and the destination rank */
+    //-------------------------------------------------------------------------
     // loop over the blocks and store the information
     for (int nib = 0; nib < newNBlock; nib++) {
         // FLUPS_INFO(">>> looking for new block %d", nib);
@@ -311,9 +215,6 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar
                 // get the last index of the block
                 int nib_start[3] = {newblockiStart[0][nib], newblockiStart[1][nib], newblockiStart[2][nib]};
                 int nib_end[3]   = {nib_start[0] + newBlockSize[0][nib], nib_start[1] + newBlockSize[1][nib], nib_start[2] + newBlockSize[2][nib]};
-                
-                // FLUPS_INFO(">>> old block lim = %d %d %d -> %d %d %d",ib_start[0],ib_start[1],ib_start[2],ib_end[0],ib_end[1],ib_end[2]);
-                // FLUPS_INFO(">>> new block lim = %d %d %d -> %d %d %d",nib_start[0],nib_start[1],nib_start[2],nib_end[0],nib_end[1],nib_end[2]);
 
                 // get the new starting index (and overwrittes the INT_MAX if any!!)
                 newblockiStart[0][nib] = std::min(nib_start[0], ib_start[0]);
@@ -331,6 +232,9 @@ void SwitchTopo::_gather_blocks(const Topology* topo, int nByBlock[3], int istar
         }
         
     }
+    //-------------------------------------------------------------------------
+    /** - free old arrays and store the new ones */
+    //-------------------------------------------------------------------------
     // store the new block number
     (*nBlock) = newNBlock;
 
@@ -427,25 +331,14 @@ void SwitchTopo::_gather_tags(MPI_Comm comm, const int inBlock, const int onBloc
 }
 
 /**
- * @brief compute the number of blocks, the starting indexes of the block (0,0,0) and the number of block in each proc
- * 
- * This function computes several usefull indexes for the block:
- * - the number of blocks on the current procs
- * - the starting index in the topo of the block (0,0,0)
- * - the number of block on each proc.
+ * @brief compute the number of blocks on each rank
  * 
- * For a given proc, nBlockEachProc[comm_size * id + ip] is the number of proc in the dimension id on the proc ip
- * 
- * @param istart the starting indexes on this proc
- * @param iend the end indexes on this proc
+ * @param istart the starting local indexes on this proc
+ * @param iend the end local indexes on this proc
  * @param nByBlock the number of unkowns in one block (012-indexing)
  * @param topo the current topology
  * @param nBlock the number of block in this proc
- * @param blockIDStart the starting id of the block (0,0,0)
- * @param nBlockEachProc the number of blocks on each proc
  */
-// void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology *topo,
-//                                      int nBlock[3], int blockIDStart[3], int *startBlockEachProc, int *nBlockEachProc) {
 void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology *topo,int nBlock[3]) {
     BEGIN_FUNC;
     int comm_size;
@@ -453,24 +346,8 @@ void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], cons
     for (int id = 0; id < 3; id++) {
         // send/recv number of block on my proc
         nBlock[id] = (iend[id] - istart[id]) / nByBlock[id];
-        // // get the list of number of procs
-        // MPI_Allgather(&(nBlock[id]), 1, MPI_INT, &(nBlockEachProc[comm_size * id]), 1, MPI_INT, topo->get_comm());
-        // // set the starting indexes to 0
-        // blockIDStart[id] = 0;
-        // // compute the starting index
-        // const int myrankd  = topo->rankd(id);
-        // int       rankd[3] = {topo->rankd(0), topo->rankd(1), topo->rankd(2)};
-        // for (int ir = 0; ir < myrankd; ir++) {
-        //     // update the rankd
-        //     rankd[id] = ir;
-        //     // increment the block counter
-        //     blockIDStart[id] += nBlockEachProc[comm_size * id + rankindex(rankd, topo)];
-        // }
         // do some checks
         FLUPS_CHECK(nBlock[id] > 0, "The number of proc in one direction cannot be 0: istart = %d %d %d to iend = %d %d %d ", istart[0], istart[1], istart[2], iend[0], iend[1], iend[2], LOCATION);
-
-        //everybody needs to know the startID of the first block in each proc
-        // MPI_Allgather(&(blockIDStart[id]), 1, MPI_INT, &(startBlockEachProc[comm_size * id]), 1, MPI_INT, topo->get_comm());
     }
     END_FUNC;
 }
@@ -478,8 +355,8 @@ void SwitchTopo::_cmpt_blockIndexes(const int istart[3], const int iend[3], cons
 /**
  * @brief split the _inComm communicator into subcomms
  * 
- * We here find the colors of the call graph, i.e. ranks communicating together have the same color.
- * Once the color are known, we divide the graph into subcomms.
+ * We here find the colors of the comm, i.e. ranks communicating together have the same color.
+ * Once the color are known, we divide the current communicator into subcomms.
  * 
  * 
  */
@@ -490,7 +367,6 @@ void SwitchTopo::_cmpt_commSplit(){
     MPI_Comm_rank(_inComm,&rank);
     MPI_Comm_size(_inComm,&comm_size);
 
-    
     //-------------------------------------------------------------------------
     /** - Set the starting color and determine who I wish to get in my group */
     //-------------------------------------------------------------------------
@@ -589,7 +465,7 @@ void SwitchTopo::_cmpt_commSplit(){
 }
 
 /**
- * @brief setup the lists according to the master and sub communicators
+ * @brief setup the subcommunicator form the destRank and the _inComm communicator
  * 
  * We setup the following lists:
  * - destRank: transformed from the values in the world comm to the values in the new comm.
@@ -616,10 +492,6 @@ void SwitchTopo::_setup_subComm(const int nBlock,int* blockSize[3], int* destRan
     int* subRanks = (int*)flups_malloc(worldsize * sizeof(int));
     MPI_Allgather(&subrank, 1, MPI_INT, subRanks, 1, MPI_INT, _inComm);
 
-
-    // int* destRank_cpy = (int*) flups_malloc(nBlock[0] * nBlock[1] * nBlock[2] * sizeof(int));
-    // memcpy(destRank,destRank_cpy,nBlock[0] * nBlock[1] * nBlock[2] * sizeof(int));    
-
     // replace the old ranks by the newest ones
     for (int ib = 0; ib < nBlock; ib++) {
         destRank[ib] = subRanks[destRank[ib]];
@@ -637,6 +509,16 @@ void SwitchTopo::_setup_subComm(const int nBlock,int* blockSize[3], int* destRan
     END_FUNC;
 }
 
+/**
+ * @brief compute the start and count arrays needed for the all to all communication
+ * 
+ * @param comm the communicator to use
+ * @param nBlock the number of block
+ * @param blockSize the block sizes
+ * @param destRank the destination rank of each block
+ * @param count the count array
+ * @param start the start array
+ */
 void SwitchTopo::_cmpt_start_and_count(MPI_Comm comm, const int nBlock,int* blockSize[3], int* destRank, int** count, int** start) {
     BEGIN_FUNC;
     const int nf = std::max(_topo_in->nf(),_topo_out->nf());
diff --git a/src/SwitchTopo.hpp b/src/SwitchTopo.hpp
index 17decdc6..c9413e62 100644
--- a/src/SwitchTopo.hpp
+++ b/src/SwitchTopo.hpp
@@ -51,15 +51,8 @@ class SwitchTopo {
     MPI_Comm _inComm = NULL; /**<@brief the reference input communicator */
     MPI_Comm _outComm = NULL; /**<@brief the reference output communicator */
     MPI_Comm _subcomm = NULL; /**<@brief the subcomm for this switchTopo */
-    // int _exSize[3]; /**<@brief exchanged size in each dimension (012-indexing) */
     int _shift[3]; /**<@brief the shift in memory */
 
-    // int _nByBlock[3]; /**<@brief The number of data per blocks in each dim (!same on each process! and 012-indexing)  */
-    // int _istart[3]; /**<@brief the starting index for #_topo_in to be inside #_topo_out  */
-    // int _ostart[3]; /**<@brief the starting index for #_topo_out to be inside #_topo_in  */
-    // int _iend[3];   /**<@brief the ending index for #_topo_in to be inside #_topo_out  */
-    // int _oend[3];   /**<@brief the ending index for #_topo_out to be inside #_topo_in  */
-
     int _inBlock; /**<@brief the local number of block in each dim in the input topology */
     int _onBlock; /**<@brief the local number of block in each dim in the output topology  */
 
@@ -93,31 +86,12 @@ class SwitchTopo {
     virtual void execute(opt_double_ptr v, const int sign) const                            = 0;
     virtual void disp() const                                                               = 0;
 
-    // /**
-    //  * @brief return the memory size of a block (including the padding for odd numbers if needed)
-    //  * 
-    //  * @return size_t 
-    //  */
-    // inline size_t get_blockMemSize() const {
-    //     // get the max block size
-    //     size_t total = 1;
-    //     for (int id = 0; id < 3; id++) {
-    //         // if the block size is 1, no need to pad :)
-    //         total *= (_nByBlock[id] == 1) ? 1 : (size_t)(_nByBlock[id] + _exSize[id] % 2);
-    //     }
-    //     // the nf at the moment of the switchTopo is ALWAYS the one from the output topo!!
-    //     total *= (size_t)_topo_out->nf();
-    //     // add the difference with the alignement to be always aligned
-    //     size_t alignDelta = ((total*sizeof(double))%FLUPS_ALIGNMENT == 0) ? 0 : (FLUPS_ALIGNMENT - (total*sizeof(double))%FLUPS_ALIGNMENT )/sizeof(double);
-    //     // FLUPS_INFO("alignDelta = %d for a total of %d = %d %d %d",alignDelta,total,_nByBlock[0] + _exSize[0] % 2,_nByBlock[1] + _exSize[1] % 2,_nByBlock[2] + _exSize[2] % 2);
-    //     total = total + alignDelta;
-    //     FLUPS_CHECK((total*sizeof(double))%FLUPS_ALIGNMENT == 0 , "The total size of one block HAS to match the alignement size",LOCATION);
-    //     // return the total size
-    //     return total;
-    // };
-
     /**
      * @brief Get the memory size of a block padded to ensure alignment
+     *
+     * @warning
+     * Since we use gathered blocks, it is NOT STRAIGHTFORWARD to impose a common size for every block on every proc.
+     * Therefore, we chose not to do it!!
      * 
      * @param ib the block id
      * @param nf the number of fields inside an element
@@ -174,9 +148,6 @@ class SwitchTopo {
    protected:
     void _cmpt_nByBlock(int istart[3], int iend[3], int ostart[3], int oend[3],int nByBlock[3]);
     void _cmpt_blockDestRank(const int nBlock[3], const int nByBlock[3], const int shift[3], const int istart[3], const Topology* topo_in, const Topology* topo_out, int* destRank);
-    void _cmpt_blockDestRankAndTag(const int nBlock[3], const int blockIDStart[3], const Topology* topo, const int* startBlockEachProc, const int* nBlockEachProc, int* destRank, int* destTag);
-    void _cmpt_blockSize(const int nBlock[3], const int blockIDStart[3], const int nByBlock[3], const int istart[3], const int iend[3], int* nBlockSize[3]);
-    // void _cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology* topo, int nBlock[3], int blockIDStart[3], int* startBlockEachProc, int* nBlockEachProc);
     void _cmpt_blockIndexes(const int istart[3], const int iend[3], const int nByBlock[3], const Topology *topo,int nBlock[3]);
 
     void _cmpt_commSplit();
@@ -201,7 +172,7 @@ static inline int gcd(int a, int b) {
  * @param inComm input communicator
  * @param outComm output communicator
  */
-inline static void  translate_ranks(int size, int* ranks, MPI_Comm inComm, MPI_Comm outComm) {
+inline static void translate_ranks(int size, int* ranks, MPI_Comm inComm, MPI_Comm outComm) {
     BEGIN_FUNC;
 
     int comp;
diff --git a/src/SwitchTopo_a2a.cpp b/src/SwitchTopo_a2a.cpp
index 9701f0e5..267b827e 100644
--- a/src/SwitchTopo_a2a.cpp
+++ b/src/SwitchTopo_a2a.cpp
@@ -130,7 +130,15 @@ SwitchTopo_a2a::SwitchTopo_a2a(const Topology* topo_input, const Topology* topo_
 }
 
 /**
- * @brief initialize the blocks: compute their index, their number, their size and their source/destination
+ * @brief initialize the communication blocks
+ * 
+ * First, we compute nByBlock[3], the smallest size of unknowns that goes from one proc to another.
+ * This small nByBlock is the same accross each rank.
+ * 
+ * Then, for each of this unit block (of size nByBlock[3]), we compute their destination rank.
+ * 
+ * Afterwards, using the rank of those unit blocks, we try to gather them by destination ranks.
+ * all the kernels blocks that have the same destination will be packed together for the communication.
  * 
  */
 void SwitchTopo_a2a::_init_blockInfo(const Topology* topo_in, const Topology* topo_out){
@@ -153,14 +161,6 @@ void SwitchTopo_a2a::_init_blockInfo(const Topology* topo_in, const Topology* to
     int  oend[3];
     int  nByBlock[3];
 
-    // int  iblockIDStart[3];
-    // int  oblockIDStart[3];
-    // int* inBlockEachProc     = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-    // int* onBlockEachProc     = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-    // int* istartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-    // int* ostartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-
-
     //-------------------------------------------------------------------------
     /** - Compute intersection ids */
     //-------------------------------------------------------------------------
@@ -174,41 +174,22 @@ void SwitchTopo_a2a::_init_blockInfo(const Topology* topo_in, const Topology* to
     //-------------------------------------------------------------------------
     _cmpt_nByBlock(istart,iend,ostart,oend,nByBlock);
 
-    // _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv, iblockIDStart, istartBlockEachProc, inBlockEachProc);
-    // _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv, oblockIDStart, ostartBlockEachProc, onBlockEachProc);
     _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv);
     _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv);
 
-    // // allocte the block size
-    // for (int id = 0; id < 3; id++) {
-    //     _iBlockSize[id] = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int));
-    //     _oBlockSize[id] = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int));
-    // }
-
     // allocate the destination ranks
     _i2o_destRank = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int));
     _o2i_destRank = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int));
 
-    // // get the size of the blocks
-    // _cmpt_blockSize(inBlockv, iblockIDStart, nByBlock, istart, iend, _iBlockSize);
-    // _cmpt_blockSize(onBlockv, oblockIDStart, nByBlock, ostart, oend, _oBlockSize);
-
     // get the ranks
     // shift if the root position of the topo_in in the topo_out
     _cmpt_blockDestRank(inBlockv,nByBlock,_shift,istart,topo_in,topo_out,_i2o_destRank);
     _cmpt_blockDestRank(onBlockv,nByBlock,mshift,ostart,topo_out,topo_in,_o2i_destRank);
-    // _cmpt_blockDestRankAndTag(inBlockv, iblockIDStart, topo_out, ostartBlockEachProc, onBlockEachProc, _i2o_destRank, NULL);
-    // _cmpt_blockDestRankAndTag(onBlockv, oblockIDStart, topo_in, istartBlockEachProc, inBlockEachProc, _o2i_destRank,NULL);
 
     // try to gather blocks together if possible, rewrittes the sizes, the blockistart, the number of blocks, the ranks and the tags
     _gather_blocks(topo_in, nByBlock, istart,iend, inBlockv, _iBlockSize, _iBlockiStart, &_inBlock, &_i2o_destRank);
     _gather_blocks(topo_out, nByBlock, ostart,oend, onBlockv, _oBlockSize, _oBlockiStart, &_onBlock, &_o2i_destRank);
 
-    // free the temp arrays
-    // flups_free(inBlockEachProc);
-    // flups_free(onBlockEachProc);
-    // flups_free(istartBlockEachProc);
-    // flups_free(ostartBlockEachProc);
     END_FUNC;
 }
 
@@ -259,7 +240,8 @@ void SwitchTopo_a2a::setup() {
     int compIn, compOut;
     MPI_Comm_compare(inComm, _inComm, &compIn);
     MPI_Comm_compare(outComm, _outComm, &compOut);
-    if( compIn != MPI_IDENT || compOut != MPI_IDENT){
+    //if the graph communicator has the same numbering as the old commn we will skip the following
+    if( compIn != MPI_CONGRUENT || compOut != MPI_CONGRUENT){
         if (rank == 0){
             FLUPS_WARNING("The inComm and/or outComm have changed since this switchtopo was created. I will recompute the communication scheme.",LOCATION);
         }
@@ -312,6 +294,29 @@ void SwitchTopo_a2a::setup() {
         _is_all2all = _is_all2all && (tmp_size == _o2i_count[ir]);
     }
 
+    //-------------------------------------------------------------------------
+    /** - Check that everybody is in the same communication mode*/
+    //-------------------------------------------------------------------------
+    // determine if every proc is in the all_to_all mode
+    bool global_is_alltoall;
+    MPI_Allreduce(&_is_all2all, &global_is_alltoall, 1, MPI_CXX_BOOL, MPI_LAND, _subcomm);
+    // determine if at least one proc is in the all to all mode
+    bool any_is_alltoall;
+    MPI_Allreduce(&_is_all2all,&any_is_alltoall,1,MPI_CXX_BOOL,MPI_LOR,_subcomm);
+    // generate an error if it is not compatible
+    if (_is_all2all && (!global_is_alltoall)){
+        int rlen;
+        char myname[MPI_MAX_OBJECT_NAME];
+        MPI_Comm_get_name(_subcomm, myname, &rlen);
+        FLUPS_ERROR("communicator %s: at least one process is NOT in the all to all communication scheme",myname,LOCATION);
+    }
+    if((!_is_all2all) && any_is_alltoall){
+        int rlen;
+        char myname[MPI_MAX_OBJECT_NAME];
+        MPI_Comm_get_name(_subcomm, myname, &rlen);
+        FLUPS_ERROR("communicator %s: at least one process is in the all to all communication scheme",myname,LOCATION);
+    }
+    
     // if we are all to all, clean the start array
     if (_is_all2all) {
         if (_i2o_start != NULL) {
diff --git a/src/SwitchTopo_nb.cpp b/src/SwitchTopo_nb.cpp
index 554c93ea..d4e7906c 100644
--- a/src/SwitchTopo_nb.cpp
+++ b/src/SwitchTopo_nb.cpp
@@ -125,7 +125,17 @@ SwitchTopo_nb::SwitchTopo_nb(const Topology* topo_input, const Topology* topo_ou
 }
 
 /**
- * @brief initialize the blocks: compute their index, their number, their size and their source/destination
+ * @brief initialize the communication blocks
+ * 
+ * First, we compute nByBlock[3], the smallest size of unknowns that goes from one proc to another.
+ * This small nByBlock is the same accross each rank.
+ * 
+ * Then, for each of this unit block (of size nByBlock[3]), we compute their destination rank.
+ * 
+ * Afterwards, using the rank of those unit blocks, we try to gather them by destination ranks.
+ * all the kernels blocks that have the same destination will be packed together for the communication.
+ * 
+ * Finally, we compute the destination tag of each block. It is defined as the local block id of the block in the received topology.
  * 
  */
 void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* topo_out){
@@ -137,7 +147,6 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top
 
     FLUPS_CHECK(ocomm_size==comm_size,"In and out communicators must have the same size.",LOCATION);
 
-
     //-------------------------------------------------------------------------
     /** - get the number of blocks and for each block get the size and the destination rank */
     //-------------------------------------------------------------------------
@@ -149,13 +158,6 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top
     int  oend[3];
     int  nByBlock[3];
 
-    // int  iblockIDStart[3];
-    // int  oblockIDStart[3];
-    // int* inBlockEachProc     = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-    // int* onBlockEachProc     = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-    // int* istartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-    // int* ostartBlockEachProc = (int*)flups_malloc(comm_size * 3 * sizeof(int));
-
     //-------------------------------------------------------------------------
     /** - Compute intersection ids */
     //-------------------------------------------------------------------------
@@ -169,33 +171,16 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top
     //-------------------------------------------------------------------------
     _cmpt_nByBlock(istart,iend,ostart,oend,nByBlock);
 
-    // _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv, iblockIDStart, istartBlockEachProc, inBlockEachProc);
-    // _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv, oblockIDStart, ostartBlockEachProc, onBlockEachProc);
     _cmpt_blockIndexes(istart, iend, nByBlock, topo_in, inBlockv);
     _cmpt_blockIndexes(ostart, oend, nByBlock, topo_out, onBlockv);
 
-    // // allocte the block size
-    // for (int id = 0; id < 3; id++) {
-    //     _iBlockSize[id] = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int));
-    //     _oBlockSize[id] = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int));
-    // }
-
     // allocate the destination ranks
     _i2o_destRank = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int));
     _o2i_destRank = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int));
-    // // allocate the destination tags
-    // _i2o_destTag = (int*)flups_malloc(inBlockv[0] * inBlockv[1] * inBlockv[2] * sizeof(int));
-    // _o2i_destTag = (int*)flups_malloc(onBlockv[0] * onBlockv[1] * onBlockv[2] * sizeof(int));
-
-    // // get the size of the blocks
-    // _cmpt_blockSize(inBlockv, iblockIDStart, nByBlock, istart, iend, _iBlockSize);
-    // _cmpt_blockSize(onBlockv, oblockIDStart, nByBlock, ostart, oend, _oBlockSize);
 
     // get the ranks
     _cmpt_blockDestRank(inBlockv,nByBlock,_shift,istart,topo_in,topo_out,_i2o_destRank);
     _cmpt_blockDestRank(onBlockv,nByBlock,mshift,ostart,topo_out,topo_in,_o2i_destRank);
-    // _cmpt_blockDestRankAndTag(inBlockv, iblockIDStart, topo_out, ostartBlockEachProc, onBlockEachProc, _i2o_destRank, _i2o_destTag);
-    // _cmpt_blockDestRankAndTag(onBlockv, oblockIDStart, topo_in, istartBlockEachProc, inBlockEachProc, _o2i_destRank,_o2i_destTag);
 
     // try to gather blocks together if possible, rewrittes the sizes, the blockistart, the number of blocks, the ranks and the tags
     _gather_blocks(topo_in, nByBlock, istart, iend, inBlockv, _iBlockSize, _iBlockiStart, &_inBlock, &_i2o_destRank);
@@ -209,12 +194,6 @@ void SwitchTopo_nb::_init_blockInfo(const Topology* topo_in, const Topology* top
     _o2i_sendRequest = (MPI_Request*)flups_malloc(_onBlock * sizeof(MPI_Request));
     _o2i_recvRequest = (MPI_Request*)flups_malloc(_inBlock * sizeof(MPI_Request));
 
-    // free the temp arrays
-    // flups_free(inBlockEachProc);
-    // flups_free(onBlockEachProc);
-    // flups_free(istartBlockEachProc);
-    // flups_free(ostartBlockEachProc);
-
     END_FUNC;
 }
 
@@ -271,8 +250,11 @@ void SwitchTopo_nb::setup(){
     int compIn, compOut;
     MPI_Comm_compare(inComm, _inComm, &compIn);
     MPI_Comm_compare(outComm, _outComm, &compOut);
-    if( compIn != MPI_IDENT || compOut != MPI_IDENT){
-        FLUPS_WARNING("The inComm and/or outComm have changed since this switchtopo was created. I will recompute the communication scheme.",LOCATION);
+    //if the graph communicator has the same numbering as the old commn we will skip the following
+    if( compIn != MPI_CONGRUENT || compOut != MPI_CONGRUENT){
+        if (rank == 0){
+            FLUPS_WARNING("The inComm and/or outComm have changed since this switchtopo was created. I will recompute the communication scheme.",LOCATION);
+        }
 
         _inComm = inComm;
         _outComm = outComm;
diff --git a/src/SwitchTopo_nb.hpp b/src/SwitchTopo_nb.hpp
index 8150ec24..3838c94b 100644
--- a/src/SwitchTopo_nb.hpp
+++ b/src/SwitchTopo_nb.hpp
@@ -40,7 +40,7 @@
 #include "SwitchTopo.hpp"
 
 /**
- * @brief Takes care of the switch between to different topologies
+ * @brief Switch between to different topologies using non-blocking communications
  * 
  * Reorganize the memory between 2 different topologies, also accounting for a
  * "principal axis" which is aligned with the fast rotating index.
@@ -56,8 +56,8 @@
 class SwitchTopo_nb : public SwitchTopo {
    protected:
     int _selfBlockN=0;
-    int* _iselfBlockID = NULL;
-    int* _oselfBlockID = NULL;
+    int* _iselfBlockID = NULL; /**<@brief The list of the block iD that stays on the current rank in the input topology (used while output to input)  */
+    int* _oselfBlockID = NULL; /**<@brief The list of the block iD that stays on the current rank in the output topoloy (used while input to ouput) */
 
     int* _i2o_destTag = NULL; /**<@brief The destination rank in the output topo of each block */
     int* _o2i_destTag = NULL; /**<@brief The destination rank in the output topo of each block */
diff --git a/src/Topology.cpp b/src/Topology.cpp
index b21bdcb6..53a33172 100644
--- a/src/Topology.cpp
+++ b/src/Topology.cpp
@@ -96,34 +96,17 @@ Topology::Topology(const int axis, const int nglob[3], const int nproc[3], const
 void Topology::cmpt_sizes() {
     BEGIN_FUNC;
     for (int id = 0; id < 3; id++) {
-        // compute the _nbyproc
-        // number of unknows everywhere except the last one
-        _nbyproc[id] = _nglob[id] / _nproc[id];  // integer division = floor
-        // if we don't change anything
-        int nlastProc = std::max(_nbyproc[id], _nglob[id] - _nbyproc[id] * (_nproc[id] - 1));
-        // if the last proc has too much unknows compare to the other
-        // and we are able to give up some points
-        while((nlastProc - _nbyproc[id]) > 1 && nlastProc >= _nproc[id]){
-            _nbyproc[id] += 1;
-            nlastProc -= (_nproc[id] - 1);
-        }
-        // if we are the last rank in the direction, we take everything what is left
-        if ((_rankd[id] < (_nproc[id] - 1))) {
-            _nloc[id] = _nbyproc[id];
-            // the memory size is the same as the local size
-            _nmem[id] = _nloc[id];
-        } else {
-            // we get the max between the nglob and
-            _nloc[id] = _nglob[id] - _nbyproc[id] * (_nproc[id] - 1);
-            _nmem[id] = _nloc[id];
-            // if we are in the axis, we padd to ensure that every pencil is ok with alignment
-            if (id == _axis) {
-                // compute by how many we are not aligned: the global size in double = nglob * nf
-                const int modulo = (_nglob[id] * _nf * sizeof(double)) % _alignment;
-                // compute the number of points to add (in double indexing)
-                const int delta = (_alignment - modulo) / sizeof(double);
-                _nmem[id] += (modulo == 0) ? 0 : delta / _nf;
-            }
+        // we get the max between the nglob and
+        _nloc[id] = cmpt_nbyproc(id);
+        _nmem[id] = _nloc[id];
+        // if we are in the axis and the last proc, we padd to ensure that every pencil is ok with alignment
+        // if (id == _axis && _rankd[id] == (_nproc[id] - 1)) {
+        if (id == _axis) {
+            // compute by how many we are not aligned: the global size in double = nglob * nf
+            const int modulo = (_nloc[id] * _nf * sizeof(double)) % _alignment;
+            // compute the number of points to add (in double indexing)
+            const int delta = (_alignment - modulo) / sizeof(double);
+            _nmem[id] += (modulo == 0) ? 0 : delta / _nf;
         }
     }
     END_FUNC;
@@ -182,7 +165,7 @@ void Topology::cmpt_intersect_id(const int shift[3], const Topology* other, int
         // for the input configuration
         for (int i = 0; i < _nloc[id]; ++i) {
             // get the global id in the other topology
-            int oid_global = _rankd[id] * _nbyproc[id] + i + shift[id];
+            int oid_global = cmpt_start_id(id) + i + shift[id];
             if (oid_global <= 0) start[id] = i;
             if (oid_global < onglob) end[id] = i + 1;
         }
@@ -209,7 +192,7 @@ void Topology::disp() const {
     FLUPS_INFO(" - nmem = %d %d %d", _nmem[0], _nmem[1], _nmem[2]);
     FLUPS_INFO(" - nproc = %d %d %d", _nproc[0], _nproc[1], _nproc[2]);
     FLUPS_INFO(" - rankd = %d %d %d", _rankd[0], _rankd[1], _rankd[2]);
-    FLUPS_INFO(" - nbyproc = %d %d %d", _nbyproc[0], _nbyproc[1], _nbyproc[2]);
+    // FLUPS_INFO(" - nbyproc = %d %d %d", _nbyproc[0], _nbyproc[1], _nbyproc[2]);
     FLUPS_INFO(" - axproc = %d %d %d", _axproc[0], _axproc[1], _axproc[2]);
     FLUPS_INFO(" - isComplex = %d", _nf == 2);
     // FLUPS_INFO(" - h = %f %f %f",_h[0],_h[1],_h[2]);
diff --git a/src/Topology.hpp b/src/Topology.hpp
index cd7f301c..eaf54372 100644
--- a/src/Topology.hpp
+++ b/src/Topology.hpp
@@ -36,6 +36,12 @@
  * 
  * A topology describes the layout of the data on the current processor.
  * 
+ * The number of unkowns in each direction owned by a rank divides them in two groups.
+ * First, we compute the integer division, nbyproc, between _nglob and _nproc.
+ * 
+ * The first group, named g0, owns nbyproc+1 unknowns. The group starts at rank 0 and ends in rank mod(_nglob,_nproc)-1, included.
+ * The second group, named g1, owns nbyproc unknowns. The group starts at rank mod(_nglob,_nproc) to rank _nproc, included.
+ * 
  */
 class Topology {
    protected:
@@ -47,7 +53,7 @@ class Topology {
     int       _axis;       /**<@brief fastest rotating index in the topology  */
     int       _rankd[3];   /**<@brief rank of the current process per dim (012-indexing)  */
     int       _nglob[3];   /**<@brief number of unknows per dim, global (012-indexing)  */
-    int       _nbyproc[3]; /**<@brief mean number of unkows per dim = nloc except for the last one (012-indexing)  */
+    // int       _nbyproc[3]; /**<@brief mean number of unkows per dim = nloc except for the last one (012-indexing)  */
     const int _alignment;
     MPI_Comm  _comm; /**<@brief the comm associated with the topo, with ranks potentially optimized for switchtopos */
 
@@ -83,10 +89,36 @@ class Topology {
     inline int nmem(const int dim) const { return _nmem[dim]; }
     inline int nproc(const int dim) const { return _nproc[dim]; }
     inline int rankd(const int dim) const { return _rankd[dim]; }
-    inline int nbyproc(const int dim) const { return _nbyproc[dim]; }
-    inline int axproc(const int dim) const { return _axproc[dim]; }
-    inline MPI_Comm get_comm() const {return _comm; }
+    // inline int nbyproc(const int dim) const { return _nbyproc[dim]; }
+    inline int      axproc(const int dim) const { return _axproc[dim]; }
+    inline MPI_Comm get_comm() const { return _comm; }
+
     /**
+     * @brief compute the number of unknowns on each proc
+     * 
+     * @param id 
+     * @return int 
+     */
+    inline int cmpt_nbyproc(const int id) const {
+        return (_nglob[id] / _nproc[id]) + 1 * ((_nglob[id] % _nproc[id]) > _rankd[id]);
+    }
+
+    /**
+     * @name Functions to compute the starting index of each topology
+     */
+    inline int cmpt_start_id(const int id) const {
+        return (_rankd[id]) * (_nglob[id] / _nproc[id]) + std::min(_rankd[id], _nglob[id] % _nproc[id]);
+    }
+
+    inline int cmpt_rank_fromid(const int global_id, const int id) const{
+        const int nproc_g0 = _nglob[id]%_nproc[id]; // number of procs that have a +1 in their unkowns
+        const int nbyproc = _nglob[id]/_nproc[id]; // the number of unknowns in the integer division
+        const int global_g0 = nproc_g0*(nbyproc+1); // the number of unknowns in the first group of procs
+
+        return (global_id < global_g0)? global_id/(nbyproc+1) : (global_id-global_g0)/nbyproc + nproc_g0;
+    }
+
+     /**
      * @name Functions to compute intersection data with other Topologies
      * 
      * @{
@@ -118,9 +150,9 @@ class Topology {
      * 
      */
     inline void get_istart_glob(int istart[3]) const {
-        istart[0]   = _rankd[0] * _nbyproc[0];
-        istart[1]   = _rankd[1] * _nbyproc[1];
-        istart[2]   = _rankd[2] * _nbyproc[2];
+        istart[0]   = cmpt_start_id(0);
+        istart[1]   = cmpt_start_id(1);
+        istart[2]   = cmpt_start_id(2);
     }
 
     /**
@@ -133,7 +165,7 @@ class Topology {
             _nglob[_axis] /= 2;
             _nloc[_axis] /= 2;
             _nmem[_axis] /= 2;
-            _nbyproc[_axis] /= 2;
+            // _nbyproc[_axis] /= 2;
         }
     }
     /**
@@ -146,7 +178,7 @@ class Topology {
             _nglob[_axis] *= 2;
             _nloc[_axis] *= 2;
             _nmem[_axis] *= 2;
-            _nbyproc[_axis] *= 2;
+            // _nbyproc[_axis] *= 2;
         }
     }
 
@@ -331,22 +363,6 @@ static inline void localSplit(const size_t id, const int size[3], const int axtr
     (*id2) = id / (size0 * size[ax1]);
 }
 
-/**
- * @brief Get the istart in global indexing
- * 
- * @param istart start index along the ax0 direction (fast rotating index in current topo), ax1 and ax2
- * @param topo 
- */
-inline static void get_istart_glob(int istart[3], const Topology *topo) {
-    const int ax0 = topo->axis();
-    const int ax1 = (ax0 + 1) % 3;
-    const int ax2 = (ax0 + 2) % 3;
-
-    istart[ax0] = topo->rankd(ax0) * topo->nbyproc(ax0);
-    istart[ax1] = topo->rankd(ax1) * topo->nbyproc(ax1);
-    istart[ax2] = topo->rankd(ax2) * topo->nbyproc(ax2);
-}
-
 /**
  * @brief compute the global symmetrized index of a given point.
  * 
@@ -360,6 +376,19 @@ inline static void get_istart_glob(int istart[3], const Topology *topo) {
  * @param symstart 
  * @param axtrg 
  * @param is 
+ * 
+ * Symmetry computation:
+ * We have to take the symmetry around symstart.
+ * E.g. in X direction:
+ *      `symstart[0] - (ix - symstart[0]) = 2 symstart[0] - ix`
+ * In some cases when we have an R2C transform, it ask for 2 additional doubles.
+ * The value is meaningless but we would like to avoid segfault and nan's.
+ * To do so, we use 2 tricks:
+ * - The `abs` is used to stay on the positivie side and hence avoid negative memory access
+ * - The `max` is used to prevent the computation of the value in 0, which is never used in the symmetry.
+ * 
+ * The final formula is then ( in the X direction):
+ *      `max( abs(2.0 symstart[0] - ix) , 1)`
  */
 inline static void cmpt_symID(const int axsrc, const int i0, const int i1, const int i2, const int istart[3], const double symstart[3], const int axtrg, int is[3]) {
     // get the global indexes in the axsrc configuration
diff --git a/src/defines.hpp b/src/defines.hpp
index 3c97f410..41897af4 100644
--- a/src/defines.hpp
+++ b/src/defines.hpp
@@ -35,8 +35,6 @@
 #include "mpi.h"
 #include "flups.h"
 
-#define GREEN_DIM 3
-
 //=============================================================================
 // LOCATORS
 //=============================================================================
@@ -429,8 +427,6 @@ static inline void FLUPS_CHECK(bool a, std::string b, T1 c, T2 d, T3 e, T4 f, T5
     // CONSTANTS AND OTHERS
     //=============================================================================
 
-#define GAMMA 0.5772156649015328606
-
 template <typename T>
 static inline bool FLUPS_ISALIGNED(T a) {
     return ((uintptr_t)(const void*)a) % FLUPS_ALIGNMENT == 0;
diff --git a/src/expint.hpp b/src/expint.hpp
index f0d630c2..0cae7a1d 100644
--- a/src/expint.hpp
+++ b/src/expint.hpp
@@ -16,7 +16,7 @@
 
 #include <math.h>
 
-static const double c_gamma = 0.577215664901532861;
+static const double c_gamma = 0.5772156649015328606;
 
 static double expint1(double x);
 static double expint2(double x);
diff --git a/src/flups.h b/src/flups.h
index e53af561..f10d46cc 100644
--- a/src/flups.h
+++ b/src/flups.h
@@ -42,7 +42,7 @@ extern "C" {
 
 //=============================================================================
 /**
- * @name Common definitions
+ * @name STRUCTURES AND DEFINITIONS
  * @{
  */
 //=============================================================================
@@ -56,7 +56,8 @@ enum FLUPS_BoundaryType {
     EVEN = 0, /**< EVEN boundary condition = zero flux  */
     ODD  = 1, /**< ODD boundary condition = zero value */
     PER  = 3, /**< PERiodic boundary conditions */
-    UNB  = 4  /**< UNBounded boundary condition */
+    UNB  = 4,  /**< UNBounded boundary condition */
+    NONE = 9 /**< No boundary condition = dimension not used */
 };
 
 /**
@@ -83,19 +84,19 @@ enum FLUPS_SolverType {
 };
 
 /**
- * @brief to be used as "sign" for all of the FORARD tranform
+ * @brief to be used as "sign" for all of the FORWARD tranform
  * 
  */
-#define FLUPS_FORWARD -1  // = FFTW_FORWARD
+#define FLUPS_FORWARD -1  // equivalent to FFTW_FORWARD
 
 /**
  * @brief to be used as "sign" for all of the BACKWARD tranform
  * 
  */
-#define FLUPS_BACKWARD 1  // = FFTW_BACKWARD
+#define FLUPS_BACKWARD 1  // equivalen to FFTW_BACKWARD
 
 /**
- * @brief Memory alignment constant in bytes.
+ * @brief Memory alignment in bytes.
  * 
  */
 #define FLUPS_ALIGNMENT 16
@@ -129,12 +130,14 @@ typedef enum FLUPS_SolverType   FLUPS_SolverType;
  * 
  * @param size the data to be allocated
  */
-void * flups_malloc(size_t size);
+void* flups_malloc(size_t size);
 
 /**
  * 
  * @brief Free the memory allocated with flups_malloc
  * 
+ * @warning You must free the memory allocate using flups_malloc using this function.
+ * 
  * @param data the data to be freed
  */
 void flups_free(void* data);
@@ -143,19 +146,26 @@ void flups_free(void* data);
  * @brief compute the memory local index for a point (i0,i1,i2) in axsrc-indexing in a memory.
  * The returned value is in the axtrg-indexing
  * 
- * For example if going through a topology following the standard indexing:
+ * For example if going through a complex topology following the standard indexing:
  * @code{.cpp}
- *  const int ax0     = flups_topo_get_axis(topo);
+    // the topology is complex
+    const int nf = 2;
+    // get the topology indexing
+    const int ax0     = flups_topo_get_axis(topo);
+    // the memory size is given in the 012 order
     const int nmem[3] = {flups_topo_get_nmem(topo,0),flups_topo_get_nmem(topo,1), flups_topo_get_nmem(topo,2)};
     for (int i2 = 0; i2 < flups_topo_get_nloc(topo,2); i2++) {
         for (int i1 = 0; i1 < flups_topo_get_nloc(topo,1); i1++) {
             for (int i0 = 0; i0 < flups_topo_get_nloc(topo,0); i0++) {
-                const size_t id = flups_locID(0, i0, i1, i2, ax0, nmem, 1);
- *                  
- *              data[id] = ...;
- *          }
- *      }
- *  }
+                // the i0, i1 and i2 are given in a 0-indexing
+                // the id is aimed for an array in the ax0-indexing
+                const size_t id = flups_locID(0, i0, i1, i2, ax0, nmem, nf);
+                    
+                data[id+0] = ...;
+                data[id+1] = ...;
+            }
+        }
+    }
  * @endcode
  * 
  * @param axsrc the FRI, reference axis aligned with index i0
@@ -163,7 +173,7 @@ void flups_free(void* data);
  * @param i1 the index in the (axsrc+1)%3 direction
  * @param i2 the index in the (axsrc+2)%3 direction
  * @param axtrg the topology FRI, i.e. the way the memory is aligned in the current topology
- * @param size the size of the memory (012-indexing)
+ * @param size the size of the memory (given in the 012-order)
  * @param nf the number of unknows in one element
  * @return size_t 
  */
@@ -179,16 +189,17 @@ static inline size_t flups_locID(const int axsrc, const int i0, const int i1, co
 }
 
 /**
- * @brief compute the local k-index in spectral coordinates for a point (i0,i1,i2) in axsrc-indexing.
+ * @brief compute them symmetrized local index for a point (i0,i1,i2) in axsrc-indexing in an extended topology (e.g. spectral topologies).
  * The returned value is in the axtrg-indexing.
  * 
- * For example if going through a topology following the standard indexing:
+ * For example if going through a complex topology following the standard indexing, one can get the spectral indexing:
  * @code{.cpp}
- *  const int ax0     = flups_topo_get_axis(topoSpec);
+    const int ax0     = flups_topo_get_axis(topoSpec);
     const int ax1     = (ax0 + 1) % 3;
     const int ax2     = (ax0 + 2) % 3;
     const int nf      = 2; //topo is complex
     
+    // get the memory size of the spectral array
     int nmemSpec[3];
     for(int i=0;i<3;i++){
         nmemSpec[i] = flups_topo_get_nmem(topoSpec,i);
@@ -200,6 +211,7 @@ static inline size_t flups_locID(const int axsrc, const int i0, const int i1, co
             const size_t id = flups_locID(ax0, 0, i1, i2, ax0, nmemSpec,nf);
             for (int i0 = 0; i0 < flups_topo_get_nloc(topoSpec,ax0); i0++) {
                 int is[3];
+                // get the symmetrized ID
                 flups_symID(ax0, i0, i1, i2, istartSpec, symstart, 0, is);
 
                 // the (symmetrized) wave numbers:
@@ -218,8 +230,8 @@ static inline size_t flups_locID(const int axsrc, const int i0, const int i1, co
  * @param i0 the index in the axsrc direction
  * @param i1 the index in the (axsrc+1)%3 direction
  * @param i2 the index in the (axsrc+2)%3 direction
- * @param istart start index of the local block (as provided by @flups_get_istartGlob)
- * @param symstart indexes where the symmetry starts (as provided by @flups_get_spectralInfo)
+ * @param istart start index of the local block (as provided by @ref flups_get_istartGlob)
+ * @param symstart indexes where the symmetry starts, i.e. the first index which is symmetrized (as provided by @ref flups_get_spectralInfo)
  * @param axtrg the FRI of the target topology, i.e. the way the memory is aligned in the current topology
  * @param is the spectral index
  */
@@ -248,7 +260,11 @@ static inline void flups_symID(const int axsrc, const int i0, const int i1, cons
 //=============================================================================
 
 /**
- * @brief Create and returns a topology.
+ * @brief Creates and returns a topology.
+ * 
+ * @warning Once specified, the fastest rotating inde defines the memory layout.
+ * We assume fortran memory layout, i.e. if the FRI is 2, the next dimension is 0 and the last one is 1.
+ * This is opposed to the C indexing: when the FRI is 2, the next dimension is 1 and the last one is 0.
  * 
  * @param axis The direction which is aligned with the fastest rotating index
  * @param nglob The global number of points in each direction of the domain
@@ -292,7 +308,10 @@ int  flups_topo_get_axis(const FLUPS_Topology* t);
  */
 int  flups_topo_get_nglob(const FLUPS_Topology* t, const int dim);
 /**
- * @brief Determines the local number of points in the domain (on this process) in a given direction
+ * @brief Determines the local number of points in the domain (on this rank) in a given direction
+ * 
+ * @warning due to some memory padding to ensure memory alignement for the FFTs, @ref flups_topo_get_nloc may
+ * not return the same result as @ref flups_topo_get_nmem
  * 
  * @param t 
  * @param dim 
@@ -300,7 +319,10 @@ int  flups_topo_get_nglob(const FLUPS_Topology* t, const int dim);
  */
 int  flups_topo_get_nloc(const FLUPS_Topology* t, const int dim);
 /**
- * @brief Determines the local memory usage per direction
+ * @brief Determines the local memory size per direction
+ * 
+ * @warning due to some memory padding to ensure memory alignement for the FFTs, @ref flups_topo_get_nloc may
+ * not return the same result as @ref flups_topo_get_nmem
  * 
  * @param t 
  * @param dim 
@@ -324,21 +346,21 @@ int  flups_topo_get_nproc(const FLUPS_Topology* t, const int dim);
 void flups_topo_get_istartGlob(const FLUPS_Topology* t, int istart[3]);
 
 /**
- * @brief returns the local size of on this proc
+ * @brief returns the local size of on this rank, i.e. the number of unknowns in this rank
  * 
  * @return long 
  */
 size_t flups_topo_get_locsize(const FLUPS_Topology* t);
 
 /**
- * @brief returns the memory size of on this proc
+ * @brief returns the memory size of on this proc, i.e. the number of bytes in this proc, including padded memory
  * 
  * @return long 
  */
 size_t flups_topo_get_memsize(const FLUPS_Topology* t);
 
 /**
- * @brief returns the communicator of the topology
+ * @brief returns the MPI-communicator of the topology
  * 
  * @param t the Topology of interest
  * @param comm the communicator
@@ -364,21 +386,22 @@ MPI_Comm flups_topo_get_comm(FLUPS_Topology* t);
  */
 FLUPS_Solver* flups_init(FLUPS_Topology* t, const FLUPS_BoundaryType bc[3][2], const double h[3], const double L[3]);
 /**
- * @brief Same as @ref flups_init, with a profiler for the timing of the code (if compiled with PROF)
+ * @brief Same as @ref flups_init, with a profiler for the timing of the code (if compiled with PROF, if not, it will not use the profiler).
  * 
  * @param prof 
  */
 FLUPS_Solver* flups_init_timed(FLUPS_Topology* t, const FLUPS_BoundaryType bc[3][2], const double h[3], const double L[3],FLUPS_Profiler* prof);
 
 /**
- * @brief must be called before execution terminates
+ * @brief must be called before execution terminates as it frees the memory used by the solver
  * 
  * @param s 
  */
 void flups_cleanup(FLUPS_Solver* s);
 
 /**
- * @brief 
+ * @brief sets the type of the Green's function used by the solver
+ * 
  * @warning must be done before @ref flups_setup
  * 
  * @param s 
@@ -387,14 +410,14 @@ void flups_cleanup(FLUPS_Solver* s);
 void    flups_set_greenType(FLUPS_Solver* s, const FLUPS_GreenType type);
 
 /**
- * @brief setup the solver
+ * @brief setup the solver and do the memory allocation
  * 
- * @warning after this call the solver cannot change anymore!
+ * @warning after this call the solver cannot been change anymore!
  * 
  * @warning if changeComm is true, you need to update MPI rank based on the new communicator that is provided by @ref flups_topo_get_comm
  * 
  * @param s 
- * @param changeComm indicate if FLUPS is allowed to change the communicator of the Topology used to initialize the solver (only if compiled with RORDER_RANKS)
+ * @param changeComm indicate if FLUPS is allowed to change the communicator of the Topology used to initialize the solver (only valid if compiled with RORDER_RANKS)
  * @return double* 
  */
 double* flups_setup(FLUPS_Solver* s,const bool changeComm);
@@ -425,23 +448,80 @@ void flups_solve(FLUPS_Solver* s, double* field, double* rhs, const FLUPS_Solver
  */
 
 /**
- * @brief get the total amount of memory allocated by FLUPS
+ * @brief get the maximun amount of memory required by FLUPS
  * 
  * @param s 
  * @return size_t 
  */
 size_t flups_get_allocSize(FLUPS_Solver* s);
 
+/**
+ * @brief get information required to compute the spectral mode associated with each spectral field entry
+ * 
+ * The spectral mode in direction i is given by (index[i] + koffset[i])*kfact[i]
+ * 
+ * @param s the FLUPS solver
+ * @param kfact returns the multiplication factor to used to get 
+ * @param koffset returns the spectral offeset given the type of boundary condition used
+ * @param symstart the first point which is symmetrized, to use with @ref flups_symID
+ */
 void flups_get_spectralInfo(FLUPS_Solver* s, double kfact[3], double koffset[3], double symstart[3]);
 
+/**
+ * @brief while using Hejlesen kernels, set the alpha factor, i.e. the number of grid points in the smoothing Gaussian
+ * 
+ * @param s 
+ * @param alpha 
+ */
 void flups_set_alpha(FLUPS_Solver* s, const double alpha);   //must be done before setup
+
+/**
+ * @brief sets the order of derivative while using divergence or rotational formulation
+ * 
+ * @param s 
+ * @param order 
+ */
 void flups_set_OrderDiff(FLUPS_Solver* s, const int order);  //must be done before setup
 
+/**
+ * @brief returns the physical topology, i.e. the one used for rhs and solution
+ * 
+ * @param s 
+ * @return const FLUPS_Topology* 
+ */
 const FLUPS_Topology* flups_get_innerTopo_physical(FLUPS_Solver* s);
+/**
+ * @brief returns the spectral topology, i.e. the one which is fully spectral
+ * 
+ * @param s 
+ * @return const FLUPS_Topology* 
+ */
 const FLUPS_Topology* flups_get_innerTopo_spectral(FLUPS_Solver* s);
 
+/**
+ * @brief do the copy from the data provided by the user to FLUPS owned data arrays
+ * 
+ * @param s 
+ * @param topo 
+ * @param data 
+ * @param sign 
+ */
 void flups_do_copy(FLUPS_Solver* s, const FLUPS_Topology* topo, double* data, const int sign);
+/**
+ * @brief compute the FFT, go from the physical space to the spectral one
+ * 
+ * @param s 
+ * @param data 
+ * @param sign 
+ */
 void flups_do_FFT(FLUPS_Solver* s, double* data, const int sign);
+/**
+ * @brief compute the multiplication between the Green's function and the field
+ * 
+ * @param s 
+ * @param data 
+ * @param type 
+ */
 void flups_do_mult(FLUPS_Solver* s, double* data, const FLUPS_SolverType type);
 
 /**@} */
@@ -453,10 +533,37 @@ void flups_do_mult(FLUPS_Solver* s, double* data, const FLUPS_SolverType type);
  * @{
  */
 
+/**
+ * @brief create a timer using the default name "default".
+ * 
+ * @return FLUPS_Profiler* 
+ */
 FLUPS_Profiler* flups_profiler_new();
+/**
+ * @brief create a timer with a name "name"
+ * 
+ * @param name 
+ * @return FLUPS_Profiler* 
+ */
 FLUPS_Profiler* flups_profiler_new_n(const char name[]);
+/**
+ * @brief free the profiler created
+ * 
+ * @param p 
+ */
 void            flups_profiler_free(FLUPS_Profiler* p);
+/**
+ * @brief display the profiler using the "root" as a reference
+ * 
+ * @param p 
+ */
 void            flups_profiler_disp_root(FLUPS_Profiler* p);
+/**
+ * @brief display the profiler using "name" as reference
+ * 
+ * @param p 
+ * @param name 
+ */
 void            flups_profiler_disp(FLUPS_Profiler* p,const char name[]);
 
 /**@} */
diff --git a/src/green_functions_3d.cpp b/src/green_functions.cpp
similarity index 56%
rename from src/green_functions_3d.cpp
rename to src/green_functions.cpp
index 7aa8b966..f81f778c 100644
--- a/src/green_functions_3d.cpp
+++ b/src/green_functions.cpp
@@ -1,5 +1,5 @@
 /**
- * @file green_functions_3d.cpp
+ * @file green_functions.cpp
  * @author Thomas Gillis and Denis-Gabriel Caprace
  * @copyright Copyright © UCLouvain 2019
  * 
@@ -23,59 +23,17 @@
  * 
  */
 
-#include "green_functions_3d.hpp"
-
-// **Symmetry computation:**
-// 
-// We have to take the symmetry around symstart. e.g. in X direction: `symstart[0] - (ix - symstart[0]) = 2 symstart[0] - ix`
-// 
-// In some cases when we have an R2C transform, it ask for 2 additional doubles.
-// The value is meaningless but we would like to avoid segfault and nan's.
-// To do so, we use 2 tricks:
-// - The `abs` is used to stay on the positivie side and hence avoid negative memory access
-// - The `max` is used to prevent the computation of the value in 0, which is never used in the symmetry.
-// 
-// As an example, the final formula is then ( in the X direction):
-// `max( abs(2 symstart[0] - ix) , 1)`
+#include "green_functions.hpp"
+#include "green_kernels.hpp"
 
 /**
  * @brief generic type for Green kernel, takes a table of parameters that can be used depending on the kernel
  * 
  */
-typedef double (*GreenKernel)(const void* );
-
-
-/**
- * @name 3 directions unbounded - 0 direction spectral
- * 
- * @{
- */
-// ----------------------------------------------------------- KERNELS ----------------------------------------------------------
-//notice that these function will likely not be inlined as we have a pointer to them...
-static inline double _hej_2_3unb0spe(const void* params) {
-    double r   = ((double*)params) [0];
-    double eps = ((double*)params) [1];
-    return c_1o4pi / r * (erf(r / eps * c_1osqrt2));
-}
-static inline double _hej_4_3unb0spe(const void* params) {
-    double r   = ((double*)params) [0];
-    double eps = ((double*)params) [1];
-    double rho = r / eps;
-    return c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (rho)*exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2));
-}
-static inline double _hej_6_3unb0spe(const void* params) {
-    double r   = ((double*)params) [0];
-    double eps = ((double*)params) [1];
-    double rho = r / eps;
-    return c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (c_7o4 * rho - c_1o4 * pow(rho, 3)) * exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2));
-}
-static inline double _chat_2_3unb0spe(const void* params) {
-    double r   = ((double*)params) [0];
-    return c_1o4pi / r ;
-}
+typedef double (*GreenKernel)(const void*,const double*);
 
 /**
- * @brief Compute the Green function for 3dirunbounded
+ * @brief Compute the Green function for 0 dir spectral (i.e. 3 dir unbounded or 2 dirunbounded)
  * 
  * @param topo the topology associated to the Green's function
  * @param hfact the h multiplication factors
@@ -83,9 +41,8 @@ static inline double _chat_2_3unb0spe(const void* params) {
  * @param green the Green function array
  * @param typeGreen the type of Green function 
  * @param eps the smoothing length (only used for HEJ kernels)
- * 
  */
-void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double hfact[3], const double symstart[3], double *green, GreenType typeGreen, const double eps){
+void cmpt_Green_3dirunbounded(const Topology *topo, const double hfact[3], const double symstart[3], double *green, GreenType typeGreen, const double eps){
     BEGIN_FUNC;
 
     FLUPS_CHECK(!(topo->isComplex()),"Green topology cannot been complex with 0 dir spectral", LOCATION);
@@ -95,29 +52,37 @@ void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double
     FLUPS_CHECK(hfact[1] != 0.0, "grid spacing cannot be 0", LOCATION);
     FLUPS_CHECK(hfact[2] != 0.0, "grid spacing cannot be 0", LOCATION);
 
-    double      G0;  //value of G in 0
     GreenKernel G;
 
+    double  G0;  //value of G in 0
+    int     GN    = 0;
+    double *Gdata = NULL;
+
+    //==========================    3D  =================================
     switch (typeGreen) {
         case HEJ_2:
             G  = &_hej_2_3unb0spe;
-            G0 =       M_SQRT2 / (4.0 * eps * sqrt(M_PI * M_PI * M_PI));
+            G0 = - M_SQRT2 / (4.0 * eps * sqrt(M_PI * M_PI * M_PI));
             break;
         case HEJ_4:
             G  = &_hej_4_3unb0spe;
-            G0 = 3.0 * M_SQRT2 / (8.0 * eps * sqrt(M_PI * M_PI * M_PI));
+            G0 = - 3.0 * M_SQRT2 / (8.0 * eps * sqrt(M_PI * M_PI * M_PI));
             break;
         case HEJ_6:
             G  = &_hej_6_3unb0spe;
-            G0 = 15.0 * M_SQRT2 / (32.0 * eps * sqrt(M_PI * M_PI * M_PI));
+            G0 = - 15.0 * M_SQRT2 / (32.0 * eps * sqrt(M_PI * M_PI * M_PI));
             break;
         case CHAT_2:
             G  = &_chat_2_3unb0spe;
-            G0 = .5 * pow(1.5 * c_1o2pi * hfact[0] * hfact[1] * hfact[2], 2. / 3.);
+            G0 = - 0.5 * pow(1.5 * c_1o2pi * hfact[0] * hfact[1] * hfact[2], 2. / 3.);
             break;
         case LGF_2:
-            FLUPS_ERROR("Lattice Green Function not implemented yet.", LOCATION);
-            //please add the parameters you need to params
+            FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs", LOCATION);
+            FLUPS_CHECK(hfact[1] == hfact[2], "the grid has to be isotropic to use the LGFs", LOCATION);
+            // read the LGF data and store it
+            _lgf_readfile(3,&GN, &Gdata);
+            // associate the Green's function
+            G = &_lgf_2_3unb0spe;
             break;
         default:
             FLUPS_ERROR("Green Function type unknow.", LOCATION);
@@ -149,95 +114,26 @@ void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double
                 const double r2 = x0 * x0 + x1 * x1 + x2 * x2;
                 const double r  = sqrt(r2);
 
-                const double tmp[2] = {r, eps};
-                green[id + i0 * nf] = -G(tmp);
+                // the first two arguments are used in standard kernels, the two zeros are for compatibility with the 2dirunbounded function,
+                // and the others 5 ones are aimed for LGFs only
+                const double tmp[9] = {r, eps, 0, 0, is[ax0], is[ax1], is[ax2], GN, hfact[ax0]};
+                green[id + i0 * nf] = G(tmp,Gdata);
             }
         }
     }
-    // reset the value in 0.0
-    if (istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) {
-        green[0] = -G0;
+    // reset the value in 0.0 but not for LGF's since we have already pre-computed its value
+    if (typeGreen != LGF_2 && istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) {
+        green[0] = G0;
+    }
+    // free Gdata if needed
+    if (Gdata != NULL) {
+        flups_free(Gdata);
     }
-    END_FUNC;
-}
-/**@} */
-
-
-/**
- * @name 2 directions unbounded - 1 direction spectral
- * 
- * @{
- */
-// ----------------------------------------------------------- KERNELS ----------------------------------------------------------
-static inline double _hej_2_2unb1spe_k0(const void* params) {
-    const double r   = ((double*)params)[0];
-    const double sig = ((double*)params)[2];
-
-    const double rho = r/sig;
-    const double rho2 = rho*rho;
-    // return -c_1o2pi * (log(r) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018]
-    return -c_1o2pi * (log(r) + .5 * expint_ei(rho2 / 2));
-    // return -c_1o2pi * (.5*log(rho*.5) + .5 * expint_ei(rho2 / 2));
-}
-static inline double _hej_2_2unb1spe_r0(const void* params) {
-    const double sig = ((double*)params)[2];
-
-    return c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig));
-}
-
-static inline double _hej_4_2unb1spe_k0(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double sig = ((double*)params) [2];
-
-    const double rho  = r/sig;
-    const double rho2 = rho*rho;
-    // return -c_1o2pi * (log(r) - (1 - .5 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018]
-    return -c_1o2pi * (log(r) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2));
-    // return -c_1o2pi * (.5*log(rho2*.5) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2));
-}
-static inline double _hej_4_2unb1spe_r0(const void* params) {
-    const double sig = ((double*)params)[2];
-
-    return c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .5);
-}
-
-static inline double _hej_6_2unb1spe_k0(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double sig = ((double*)params) [2];
-
-    const double rho = r/sig;
-    const double rho2 = rho*rho;
-    // return -c_1o2pi * (log(r) - (1 - rho2 + .125 * rho2 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018]
-    return -c_1o2pi * (log(r) - (.75 - .125 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2));
-    // return -c_1o2pi * (.5*log(rho2*.5) - (.75 - .125 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2));
-}
-static inline double _hej_6_2unb1spe_r0(const void* params) {
-    const double sig = ((double*)params)[2];
 
-    return c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .75);
-}
-static inline double _zero(const void* params) {   
-    return - 0.0;
+    END_FUNC;
 }
 
-static inline double _chat_2_2unb1spe(const void* params) {
-    const double r      = ((double*)params) [0];
-    const double k      = ((double*)params) [1];
 
-    return c_1o2pi * besselk0(fabs(k) * r);
-}
-static inline double _chat_2_2unb1spe_r0(const void* params) {
-    const double k      = ((double*)params) [1];
-    const double r_eq2D = ((double*)params) [3];
-
-    return (1.0 - k * r_eq2D * besselk1(k * r_eq2D)) * c_1opi / ((k * r_eq2D) * (k * r_eq2D));
-}
-static inline double _chat_2_2unb1spe_k0(const void* params) {
-    const double r      = ((double*)params) [0];
-    // const double sig = ((double*)params)[2];
-    
-    return  - c_1o2pi * log(r) ; //caution: mistake on the sign in [Chatelain2010]
-}
 
 /**
  * @brief Compute the Green function for 2dirunbounded and 1dirspectral
@@ -252,47 +148,52 @@ static inline double _chat_2_2unb1spe_k0(const void* params) {
  * @param green the Green function array
  * @param typeGreen the type of Green function 
  * @param eps the smoothing length (only used for HEJ kernels)
+ * 
+ * @warning For 3D kernels: According to [Spietz2018], we can obtain the **approximate** Green kernel by using the 2D unbounded kernel 
+            for mode 0 in the spectral direction, and the rest of the Green kernel is the same as in full spectral.
+            We here fill with zero most part of Green data. Indeed, we are interested only in doing the FFT
+            of _hej_*_2unb1spe_k0 in the 2 remaining spatial directions. We will complete the Green function with the
+            full spectral part afterwards, while going through Solver::_cmptGreenFunction.
+ * 
  */
-void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) {
+void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) {
     BEGIN_FUNC;
     
     // assert that the green spacing and dk is not 0.0 - this is also a way to check that ax0 will be spectral, and the others are still to be transformed
     FLUPS_CHECK(kfact[0] != hfact[0], "grid spacing[0] cannot be = to dk[0]", LOCATION);
     FLUPS_CHECK(kfact[1] != hfact[1], "grid spacing[1] cannot be = to dk[1]", LOCATION);
-    FLUPS_CHECK(kfact[2] != hfact[2], "grid spacing[2] cannot be = to dk[2]", LOCATION);
+    // check that if hfact or kfact != 0, they are not the same
+    FLUPS_CHECK(!(kfact[2] == hfact[2] && (kfact[2]!= 0.0 || hfact[2] != 0.0)), "grid spacing[2] cannot be = to dk[2]", LOCATION);
 
     // @Todo For Helmolz, we need Green to be complex 
     // FLUPS_CHECK(topo->isComplex(), "I can't fill a non complex topo with a complex green function.", LOCATION);
     // opt_double_ptr mygreen = green; //casting of the Green function to be able to access real and complex part
     //Implementation note: if you want to do Helmolz, you need Hankel functions (3rd order Bessel) which are not implemented in stdC. Consider the use of boost lib.
     //notice that bessel_k has been introduced in c++17
-    
+
     GreenKernel G;    // the Green kernel (general expression in the whole domain)
     GreenKernel Gk0;  // the Green kernel (particular expression in k=0)
     GreenKernel Gr0;  // the Green kernel (particular expression in r=0)
 
+    int     GN    = 0;
+    double *Gdata = NULL;
+
     switch (typeGreen) {
         case HEJ_2:
-            FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail a approximation.", LOCATION);
-            
-            // Note: 
-            // According to [Spietz2018], we can obtain the **approximate** Green kernel by using the 2D unbounded kernel 
-            // for mode 0 in the spectral direction, and the rest of the Green kernel is the same as in full spectral.
-            // We here fill with zero the greatest part of Green: we are actually interested only in doing the FFT
-            // of _hej_*_2unb1spe_k0 in the 2 remaining spatial directions. We will complete the Green function with the
-            // full spectral part afterwards. 
+            FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail an approximation.", LOCATION);
+            // see warning in the function description
             G   = &_zero;
             Gk0 = &_hej_2_2unb1spe_k0;
             Gr0 = &_hej_2_2unb1spe_r0;
             break;
         case HEJ_4:
-            // FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail a approximation.");
+            FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail an approximation.", LOCATION);
             G   = &_zero;
             Gk0 = &_hej_4_2unb1spe_k0;
             Gr0 = &_hej_4_2unb1spe_r0;
             break;
         case HEJ_6:
-            // FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail a approximation.");
+            FLUPS_WARNING("HEJ kernels in 2dirunbounded 1dirspectral entail an approximation.", LOCATION);
             G   = &_zero;
             Gk0 = &_hej_6_2unb1spe_k0;
             Gr0 = &_hej_6_2unb1spe_r0;
@@ -304,7 +205,13 @@ void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double
             // caution: the value of G in k=r=0 is specified at the end of this routine
             break;
         case LGF_2:
-            FLUPS_ERROR("Lattice Green Function not implemented yet.", LOCATION);
+            FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs", LOCATION);
+            // read the LGF data and store it
+            _lgf_readfile(2,&GN, &Gdata);
+            // associate the Green's function
+            G   = &_zero;
+            Gk0 = &_lgf_2_2unb0spe;
+            Gr0 = &_lgf_2_2unb0spe;
             break;
         default:
             FLUPS_ERROR("Green Function type unknow.", LOCATION);
@@ -323,19 +230,18 @@ void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double
     for (int i2 = 0; i2 < topo->nloc(ax2); i2++) {
         for (int i1 = 0; i1 < topo->nloc(ax1); i1++) {
             //local indexes start
-            const size_t id = localIndex(ax0,0, i1, i2, ax0, nmem,nf);
-        
+            const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, nf);
+
             for (int i0 = 0; i0 < topo->nloc(ax0); i0++) {
-                
                 // global indexes
                 int is[3];
-                cmpt_symID(ax0,i0,i1,i2,istart,symstart,0,is);
+                cmpt_symID(ax0, i0, i1, i2, istart, symstart, 0, is);
 
                 // (symmetrized) wave number : only one kfact is non-zero
                 const double k0 = (is[ax0] + koffset[ax0]) * kfact[ax0];
                 const double k1 = (is[ax1] + koffset[ax1]) * kfact[ax1];
                 const double k2 = (is[ax2] + koffset[ax2]) * kfact[ax2];
-                const double k = k0 + k1 + k2;
+                const double k  = k0 + k1 + k2;
 
                 //(symmetrized) position : only one hfact is zero
                 const double x0 = (is[ax0]) * hfact[ax0];
@@ -343,110 +249,32 @@ void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double
                 const double x2 = (is[ax2]) * hfact[ax2];
                 const double r  = sqrt(x0 * x0 + x1 * x1 + x2 * x2);
 
-                const double tmp[4] = {r, k, eps, r_eq2D};
+                const double tmp[9] = {r, k, eps, r_eq2D, is[ax0], is[ax1], is[ax2], GN, hfact[ax0]};
 
                 // green function value
                 // Implementation note: having a 'if' in a loop is highly discouraged... however, this is the init so we prefer having a
                 // this routine with a high readability and lower efficency than the opposite.
                 if (r <= (hfact[ax0] + hfact[ax1] + hfact[ax2]) * .2) {
-                    green[id + i0 * topo->nf()] = -Gr0(tmp);
+                    // we should enter this case for 2d and 3d cases
+                    green[id + i0 * topo->nf()] = Gr0(tmp, Gdata);
                 } else if (k <= (kfact[ax0] + kfact[ax1] + kfact[ax2]) * 0.2) {
-                    green[id + i0 * topo->nf()] = -Gk0(tmp);
+                    // we should always enter this routine for 2d case and sometimes for 3d cases
+                    green[id + i0 * topo->nf()] = Gk0(tmp, Gdata);
                 } else {
-                    green[id + i0 * topo->nf()] = -G(tmp);
+                    green[id + i0 * topo->nf()] = G(tmp, Gdata);
                 }
             }
         }
     }
     
-    // reset the value in x=y=0.0 and k=0
-    if (typeGreen == CHAT_2 && istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) {
+    // reset the value in x=y=0.0 and k=0 for singular expressions
+    if ((typeGreen == CHAT_2) && istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0) {
         // green[0] = -2.0 * log(1 + sqrt(2)) * c_1opiE3o2 / r_eq2D;
-        green[0] = .25 * c_1o2pi * (M_PI - 6.0 + 2. * log(.5 * M_PI * r_eq2D));  //caution: mistake in [Chatelain2010]
+        green[0] = - 0.25 * c_1o2pi * (M_PI - 6.0 + 2.0 * log(0.5 * M_PI * r_eq2D));  //caution: mistake in [Chatelain2010]
     }
     END_FUNC;
 }
-/**@} */
-
-
-/**
- * @name 1 direction unbounded - 2 directions spectral
- * 
- * @{
- */
-// ----------------------------------------------------------- KERNELS ----------------------------------------------------------
-static inline double _hej_2_1unb2spe(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double k   = ((double*)params) [1];
-    const double sig = ((double*)params) [2];
 
-    const double rho = r/sig;
-    const double s   = k*sig;
-
-    const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho));
-    return .25 * sig / s * subfun ;
-}
-static inline double _hej_2_1unb2spe_k0(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double sig = ((double*)params) [2];
-
-    const double rho = r/sig;
-    const double rosqrt2 = r*c_1osqrt2;
-    // return -.5* (r * erf(rosqrt2/sig) + (exp(-r*r/(2*sig*sig)) - 1.)*sig*M_SQRT2*c_1osqrtpi) ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
-    return -.5* r * erf(rosqrt2/sig) + (1.-exp(-rho*rho*.5)) *sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
-}
-
-static inline double _hej_4_1unb2spe(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double k   = ((double*)params) [1];
-    const double sig = ((double*)params) [2];
-
-    const double rho = r/sig;
-    const double s   = k*sig;
-    const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho));
-    return .25 * sig / s * subfun + \
-           sig * M_SQRT2 * c_1osqrtpi * .25 * exp(-.5 * (s * s + rho * rho));
-}
-static inline double _hej_4_1unb2spe_k0(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double sig = ((double*)params) [2];
-
-    const double rho = r/sig;
-    const double rosqrt2 = r*c_1osqrt2;
-    return -.5* r * erf(rosqrt2/sig) + (1.-exp(-rho*rho*.5)) *.5*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
-}
-
-static inline double _hej_6_1unb2spe(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double k   = ((double*)params) [1];
-    const double sig = ((double*)params) [2];
-
-    const double rho = r/sig;
-    const double s   = k*sig;
-    const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho));
-    return .25 * sig / s * subfun + \
-           sig * M_SQRT2 * c_1osqrtpi * (c_5o16 + c_1o16 * (s * s - rho * rho)) * exp(-.5 * (s * s + rho * rho));
-}
-static inline double _hej_6_1unb2spe_k0(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double sig = ((double*)params) [2];
-
-    const double rho = r/sig;
-    const double rosqrt2 = r*c_1osqrt2;
-    return -.5* r * erf(rosqrt2/sig) + (3.-exp(-rho*rho*.5) * (rho*rho+3.) ) *.125*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
-}
-
-static inline double _chat_2_1unb2spe(const void* params) {
-    const double r   = ((double*)params) [0];
-    const double k   = ((double*)params) [1];
-
-    return .5 * exp(-k * r) / k;
-}
-static inline double _chat_2_1unb2spe_k0(const void* params) {
-    const double r   = ((double*)params) [0];
-
-    return -.5 * fabs(r);
-}
 
 
 /**
@@ -463,13 +291,14 @@ static inline double _chat_2_1unb2spe_k0(const void* params) {
  * @param typeGreen the type of Green function 
  * @param eps the smoothing length (only used for HEJ kernels)
  */
-void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) {
+void cmpt_Green_1dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) {
     BEGIN_FUNC;
 
     // assert that the green spacing and dk is not 0.0 - this is also a way to check that ax0 will be spectral, and the others are still to be transformed
     FLUPS_CHECK(kfact[0] != hfact[0], "grid spacing[0] cannot be = to dk[0]", LOCATION);
     FLUPS_CHECK(kfact[1] != hfact[1], "grid spacing[1] cannot be = to dk[1]", LOCATION);
-    FLUPS_CHECK(kfact[2] != hfact[2], "grid spacing[2] cannot be = to dk[2]", LOCATION);
+    // check that if hfact or kfact != 0, they are not the same
+    FLUPS_CHECK(!(kfact[2] == hfact[2] && (kfact[2]!= 0.0 || hfact[2] != 0.0)), "grid spacing[2] cannot be = to dk[2]", LOCATION);
 
     // @Todo For Helmolz, we need Green to be complex 
     // FLUPS_CHECK(topo->isComplex(), "I can't fill a non complex topo with a complex green function.", LOCATION);
@@ -537,10 +366,10 @@ void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double
                 // Implementation note: having a 'if' in a loop is highly discouraged... however, this is the init so we prefer having a
                 // this routine with a high readability and lower efficency than the opposite.
                 if (k <= (kfact[ax0] + kfact[ax1] + kfact[ax2]) * 0.2) {
-                    green[id + i0 * nf] = -G0(tmp);
+                    green[id + i0 * nf] = G0(tmp,NULL);
                 }
                 else{
-                    green[id + i0 * nf] = -G(tmp);
+                    green[id + i0 * nf] = G(tmp,NULL);
                 }
             }
         }
@@ -548,43 +377,6 @@ void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double
     END_FUNC;
 }
 
-/**@} */
-
-
-/**
- * @name 3 directions spectral
- * 
- * @{
- */
-// ----------------------------------------------------------- KERNELS ----------------------------------------------------------
-static inline double _hej_2_0unb3spe(const void* params) {
-    const double ksqr = ((double*)params)[0];
-    const double sig  = ((double*)params)[1];
-
-    const double ssqr = ksqr * (sig * sig);
-    return exp(-ssqr / 2) / (ksqr); 
-}
-static inline double _hej_4_0unb3spe(const void* params) {
-    const double ksqr = ((double*)params)[0];
-    const double sig  = ((double*)params)[1];
-
-    const double ssqr = ksqr * (sig * sig);
-    return (1 + ssqr / 2) * exp(-ssqr / 2) / (ksqr);
-}
-static inline double _hej_6_0unb3spe(const void* params) {
-    const double ksqr = ((double*)params)[0];
-    const double sig  = ((double*)params)[1];
-
-    const double ssqr = ksqr * (sig * sig);
-    return (1 + ssqr / 2 + ssqr * ssqr / 8) * exp(-ssqr / 2) / (ksqr);
-}
-
-static inline double _chat_2_0unb3spe(const void* params) {
-    const double ksqr   = ((double*)params) [0];
-
-    return 1 / ksqr;
-}
-
 /**
  * @brief Compute the Green function for 3dirspectral (in the whole spectral domain)
  * 
@@ -597,6 +389,7 @@ static inline double _chat_2_0unb3spe(const void* params) {
  * The wave number in each direction is obtained as k_i = (i_s + koffset_i) * kfact_i, where is the global (potentially symmetric) index.
  * 
  * @param topo the topology associated to the Green's function
+ * @param hgrid the grid spacing h = hx = hy = hz, used only for the LGF
  * @param kfact the k multiplicative factor
  * @param koffset the k additive factor
  * @param symstart index of the symmetry in each direction
@@ -604,11 +397,10 @@ static inline double _chat_2_0unb3spe(const void* params) {
  * @param typeGreen the type of Green function 
  * @param eps the smoothing length (only used for HEJ kernels)
  */
-void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps){
-    cmpt_Green_3D_0dirunbounded_3dirspectral(topo, kfact, koffset, symstart, green, typeGreen, eps, NULL, NULL);
+void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps) {
+    cmpt_Green_0dirunbounded(topo, hgrid, kfact, koffset, symstart, green, typeGreen, eps, NULL, NULL);
 }
 
-
 /**
  * @brief Compute the Green function for 3dirspectral (in a portion of the spectral domain)
  * 
@@ -627,13 +419,13 @@ void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double
  * @param istart_custom global index where we start to fill data, in each dir. If NULL, we start at the beginning of the spectral space.
  * @param iend_custom global index where we end to fill data, in each dir. If NULL, we end at the end of the spectral space.
  */
-void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]){
+void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid, const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]) {
     BEGIN_FUNC;
 
     // assert that the green spacing is not 0.0 everywhere
     FLUPS_CHECK(kfact[0] != 0.0, "dk cannot be 0", LOCATION);
     FLUPS_CHECK(kfact[1] != 0.0, "dk cannot be 0", LOCATION);
-    FLUPS_CHECK(kfact[2] != 0.0, "dk cannot be 0", LOCATION);
+    // FLUPS_CHECK(kfact[2] != 0.0, "dk cannot be 0", LOCATION);
 
     GreenKernel G;   // the Green kernel (general expression in the whole domain)
 
@@ -651,7 +443,7 @@ void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double
             G = &_chat_2_0unb3spe;
             break;
         case LGF_2:
-            FLUPS_ERROR("Lattice Green Function not implemented yet.", LOCATION);
+            G = &_lgf_2_0unb3spe;
             break;
         default:
             FLUPS_ERROR("Green Function type unknow.", LOCATION);
@@ -708,17 +500,18 @@ void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo, const double
                 // green function value
                 const double ksqr = k0 * k0 + k1 * k1 + k2 * k2;
 
-                const double tmp[2] = {ksqr, eps};
+                // const double tmp[2] = {ksqr, eps};
+                const double tmp[6] = {ksqr, eps, k0, k1, k2, hgrid};
 
-                green[id + i0 * nf] = -G(tmp);
+                green[id + i0 * nf] = G(tmp,NULL);
             }
         }
     }
     // reset the value in 0.0
     if (istart[ax0] == 0 && istart[ax1] == 0 && istart[ax2] == 0 \
         && koffset[0]+koffset[1]+koffset[2]<0.2 ) {
-        green[0] = -0.0;
+        green[0] = 0.0;
     }
     END_FUNC;
 }
-/**@} */
+
diff --git a/src/green_functions.hpp b/src/green_functions.hpp
new file mode 100644
index 00000000..85c276aa
--- /dev/null
+++ b/src/green_functions.hpp
@@ -0,0 +1,83 @@
+/**
+ * @file green_functions.hpp
+ * @author Thomas Gillis and Denis-Gabriel Caprace
+ * @copyright Copyright © UCLouvain 2019
+ * 
+ * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers.
+ * 
+ * Copyright (C) <2019> <Universite catholique de Louvain (UCLouvain), Belgique>
+ * 
+ * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE file.
+ * 
+ * This program (FLUPS) is free software: 
+ * you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program (see COPYING file).  If not, 
+ * see <http://www.gnu.org/licenses/>.
+ * 
+ */
+
+#include "defines.hpp"
+#include "Topology.hpp"
+#include "bessel.hpp"
+#include "expint.hpp"
+
+// define macros to strigyfy, both are required!
+#define STR(a) ZSTR(a)
+#define ZSTR(a) #a
+
+
+void cmpt_Green_3dirunbounded(const Topology *topo, const double hfact[3],                                                 const double symstart[3], double *green, GreenType typeGreen, const double eps);
+void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps);
+void cmpt_Green_1dirunbounded(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps);
+void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid   , const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps);
+void cmpt_Green_0dirunbounded(const Topology *topo, const double hgrid   , const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]);
+
+/**
+ * @brief read the LGF file in the KERNEL_PATH folder
+ * 
+ * @param [in] greendim the dimension of the Green function to use, 2D or 3D
+ * @param [out] N the size above which we switch to the approximation, i.e. the size of the pre-stored kernel is N^3
+ * @param [out] data the data where we store the 
+ */
+static void _lgf_readfile(const int greendim, int* N, double** data) {
+    BEGIN_FUNC;
+
+    // some defined parameters:
+    char lgfname[512];
+    char path[] = STR(KERNEL_PATH);
+    if (greendim == 3) {
+        (*N) = 64;
+        sprintf(lgfname, "%s/LGF_3d_sym_acc12_%d.ker", path, (*N));
+    } else if (greendim == 2) {
+        (*N) = 32;
+        sprintf(lgfname, "%s/LGF_2d_sym_acc12_%d.ker", path, (*N));
+    } else {
+        FLUPS_ERROR("Greendim = %d is not available in this version", greendim, LOCATION);
+    }
+
+    // open the file
+    FILE *lgf_file = fopen(lgfname, "r");
+    // display the information to the user
+    FLUPS_INFO_1("loading the LGF kernel function %s", lgfname);
+
+    (*data) = NULL;
+    // start to read the file
+    if (lgf_file != NULL) {
+        // allocate the data
+        const int size = (*N) * (*N) * (*N);
+        (*data) = (double *)flups_malloc(sizeof(double) * size);
+        fread((*data), sizeof(double), size, lgf_file);
+        // close the file
+        fclose(lgf_file);
+    } else {
+        FLUPS_ERROR("unable to read file %s", lgfname, LOCATION);
+    }
+    END_FUNC;
+}
\ No newline at end of file
diff --git a/src/green_functions_3d.hpp b/src/green_functions_3d.hpp
deleted file mode 100644
index 3619e38a..00000000
--- a/src/green_functions_3d.hpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * @file green_functions_3d.hpp
- * @author Thomas Gillis and Denis-Gabriel Caprace
- * @copyright Copyright © UCLouvain 2019
- * 
- * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers.
- * 
- * Copyright (C) <2019> <Universite catholique de Louvain (UCLouvain), Belgique>
- * 
- * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE file.
- * 
- * This program (FLUPS) is free software: 
- * you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program (see COPYING file).  If not, 
- * see <http://www.gnu.org/licenses/>.
- * 
- */
-
-#include "defines.hpp"
-#include "Topology.hpp"
-#include "bessel.hpp"
-#include "expint.hpp"
-
-void cmpt_Green_3D_3dirunbounded_0dirspectral(const Topology *topo, const double hfact[3],                                                 const double symstart[3], double *green, GreenType typeGreen, const double eps);
-void cmpt_Green_3D_2dirunbounded_1dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps);
-void cmpt_Green_3D_1dirunbounded_2dirspectral(const Topology *topo, const double hfact[3], const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps);
-void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo,                        const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps);
-void cmpt_Green_3D_0dirunbounded_3dirspectral(const Topology *topo,                        const double kfact[3], const double koffset[3], const double symstart[3], double *green, GreenType typeGreen, const double eps, const int istart_custom[3], const int iend_custom[3]);
diff --git a/src/green_kernels.hpp b/src/green_kernels.hpp
new file mode 100644
index 00000000..af99bd0f
--- /dev/null
+++ b/src/green_kernels.hpp
@@ -0,0 +1,368 @@
+/**
+ * @file green_kernels.hpp
+ * @author Thomas Gillis and Denis-Gabriel Caprace
+ * @brief defines the 3D Green functions kernels
+ * @version
+ * @date 2019-11-20
+ * 
+ * @copyright Copyright © UCLouvain 2019
+ * 
+ * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers.
+ * 
+ * Copyright (C) <2019> <Université catholique de Louvain (UCLouvain), Belgique>
+ * 
+ * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE file.
+ * 
+ * This program (FLUPS) is free software: 
+ * you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program (see COPYING file).  If not, 
+ * see <http://www.gnu.org/licenses/>.
+ * 
+ */
+
+#include "defines.hpp"
+#include "expint.hpp"
+
+/**
+ * @name 3 directions unbounded - 0 direction spectral
+ * 
+ * @{
+ */
+// ----------------------------------------------------------- 3D - KERNELS ----------------------------------------------------------
+//notice that these function will likely not be inlined as we have a pointer to them...
+static inline double _hej_2_3unb0spe(const void* params,const double* data) {
+    double r   = ((double*)params) [0];
+    double eps = ((double*)params) [1];
+    return -c_1o4pi / r * (erf(r / eps * c_1osqrt2));
+}
+static inline double _hej_4_3unb0spe(const void* params,const double* data) {
+    double r   = ((double*)params) [0];
+    double eps = ((double*)params) [1];
+    double rho = r / eps;
+    return -c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (rho)*exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2));
+}
+static inline double _hej_6_3unb0spe(const void* params,const double* data) {
+    double r   = ((double*)params) [0];
+    double eps = ((double*)params) [1];
+    double rho = r / eps;
+    return -c_1o4pi / r * (c_1osqrt2 * c_1osqrtpi * (c_7o4 * rho - c_1o4 * pow(rho, 3)) * exp(-rho * rho * .5 ) + erf(rho * c_1osqrt2));
+}
+static inline double _chat_2_3unb0spe(const void* params,const double* data) {
+    double r   = ((double*)params) [0];
+    return -c_1o4pi / r ;
+}
+/**
+ * @brief LGF 3D
+ * 
+ * @param params 
+ * @param data 
+ * @return double 
+ */
+static inline double _lgf_2_3unb0spe(const void* params,const double* data) {
+    int    ix = (int)((double*)params)[4];
+    int    iy = (int)((double*)params)[5];
+    int    iz = (int)((double*)params)[6];
+    int    N  = (int)((double*)params)[7];
+    double h  = ((double*)params)[8];
+
+    // if the point is close enough, it will be already precomputed
+    double green;
+    if (ix < N && iy < N && iz < N) {
+        green = - data[ix + iy * N + iz * N * N];
+
+    } else {  // if not, we use the extrapolation
+        const double rho     = sqrt(ix * ix + iy * iy + iz * iz);
+        const double rho_2   = rho * rho;
+        const double oorho_6 = 1.0 / std::pow(rho, 6.0);
+        const double oorho_7 = 1.0 / std::pow(rho, 7.0);
+        // ix
+        const double ix_2  = std::pow(ix, 2.0);
+        const double ix_4  = std::pow(ix, 4.0);
+        const double ix_6  = std::pow(ix, 6.0);
+        const double ix_8  = std::pow(ix, 8.0);
+        const double ix_10 = std::pow(ix, 10.0);
+        const double ix_12 = std::pow(ix, 12.0);
+        // iy
+        const double iy_2  = std::pow(iy, 2.0);
+        const double iy_4  = std::pow(iy, 4.0);
+        const double iy_6  = std::pow(iy, 6.0);
+        const double iy_8  = std::pow(iy, 8.0);
+        const double iy_10 = std::pow(iy, 10.0);
+        const double iy_12 = std::pow(iy, 12.0);
+        //iz
+        const double iz_2  = std::pow(iz, 2.0);
+        const double iz_4  = std::pow(iz, 4.0);
+        const double iz_6  = std::pow(iz, 6.0);
+        const double iz_8  = std::pow(iz, 8.0);
+        const double iz_10 = std::pow(iz, 10.0);
+        const double iz_12 = std::pow(iz, 12.0);
+
+        green = - c_1o4pi / rho \
+            - 1.0/(  16.0 * M_PI) * (ix_4 + iy_4 + iz_4 - 3.0 * (ix_2 * iy_2 + iy_2 * iz_2 + ix_2 * iz_2)) * oorho_7 \
+            - 1.0/( 128.0 * M_PI) * (23.0 * (ix_8 + iy_8 + iz_8) - 244.0 * (ix_6 * (iy_2 + iz_2) + iy_6 * (ix_2 + iz_2) + iz_6 * (ix_2 + iy_2)) - 228.0 * ix_2 * iy_2 * iz_2 * rho_2 + 621.0 * (ix_4 * iy_4 + ix_4 * iz_4 + iy_4 * iz_4)) * oorho_7 * oorho_6 \
+            - 1.0/(2048.0 * M_PI) * (2588.0 * (ix_12 + iy_12 + iz_12) - 65676.0 * (ix_10 * iy_2 + ix_10 * iz_2 + ix_2 * iy_10 + iy_10 * iz_2 + ix_2 * iz_10 + iy_2 * iz_10) + 426144.0 * (ix_8 * iy_4 + ix_4 * iy_8 + ix_8 * iz_4 + iy_8 * iz_4 + ix_4 * iz_8 + iy_4 * iz_8) - 712884.0 * (ix_6 * iy_6 + iy_6 * iz_6 + ix_6 * iz_6) - 62892.0 * (ix_8 * iy_2 * iz_2 + ix_2 * iy_8 * iz_2 + ix_2 * iy_2 * iz_8) - 297876.0 * (ix_6 * iy_4 * iz_2 + ix_4 * iy_6 * iz_2 + ix_4 * iy_2 * iz_6 + ix_2 * iy_4 * iz_6 + ix_6 * iy_2 * iz_4 + ix_2 * iy_6 * iz_4) + 2507340.0 * ix_4 * iy_4 * iz_4) * oorho_7 * oorho_6 * oorho_6;
+    }
+    
+    return green/(h);
+}
+/**
+ * @brief LGF 2D
+ * 
+ * @param params 
+ * @param data 
+ * @return double 
+ */
+static inline double _lgf_2_2unb0spe(const void* params,const double* data) {
+    int    ix = (int)((double*)params)[4];
+    int    iy = (int)((double*)params)[5];
+    int    iz = (int)((double*)params)[6];
+    int    N  = (int)((double*)params)[7];
+
+    // if the point is close enough, it will be already precomputed
+    double green;
+    if (ix < N && iy < N && iz < N) {
+        green = - data[ix + iy * N];
+
+    } else {  // if not, we use the extrapolation
+        const double rho     = sqrt(ix * ix + iy * iy);
+        const double oorho_6 = 1.0 / std::pow(rho, 6.0);
+        // const double ix_1     = ix;
+        const double ix_2  = std::pow(ix, 2.0);
+        const double ix_4  = std::pow(ix, 4.0);
+        const double ix_6  = std::pow(ix, 6.0);
+        const double ix_8  = std::pow(ix, 8.0);
+        const double ix_10 = std::pow(ix, 10.0);
+        const double ix_12 = std::pow(ix, 12.0);
+        const double ix_14 = std::pow(ix, 14.0);
+        const double ix_16 = std::pow(ix, 16.0);
+        // const double iy_1     = iy;
+        const double iy_2  = std::pow(iy, 2.0);
+        const double iy_4  = std::pow(iy, 4.0);
+        const double iy_6  = std::pow(iy, 6.0);
+        const double iy_8  = std::pow(iy, 8.0);
+        const double iy_10 = std::pow(iy, 10.0);
+        const double iy_12 = std::pow(iy, 12.0);
+        const double iy_14 = std::pow(iy, 14.0);
+        const double iy_16 = std::pow(iy, 16.0);
+
+        green =   1.0 / (   2.0 * M_PI) * (log(rho) + c_gamma + log(8.0) * c_1o2)\
+                - 1.0 / (  24.0 * M_PI) * (ix_4 - 6.0 * ix_2 * iy_2 + iy_4) * oorho_6\
+                - 1.0 / ( 480.0 * M_PI) * (43.0 * (ix_8 + iy_8) - 772.0 * (ix_6 * iy_2 + ix_2 * iy_6) + 1570.0 * ix_4 * iy_4) * oorho_6 * oorho_6\
+                - 1.0 / (2016.0 * M_PI) * (609.0 * (ix_12 + iy_12) - 24234.0 * (ix_10 * iy_2 + ix_2 * iy_10) + 109935.0 * (ix_8 * iy_4 + ix_4 * iy_8) - 160524.0 * ix_6 * iy_6) * oorho_6 * oorho_6 * oorho_6\
+                - 1.0 / (2880.0 * M_PI) * (63139.0 * (ix_16 + iy_16) - 4467336.0 * (ix_14 * iy_2 + ix_2 * iy_14) + 38334996.0 * (ix_12 * iy_4 + ix_4 * iy_12) - 98512568.0 * (ix_10 * iy_6 + ix_6 * iy_10) + 122747922.0 * ix_8 * iy_8) * oorho_6 * oorho_6 * oorho_6 * oorho_6;
+    }
+    return green;
+}
+/**@} */
+
+
+/**
+ * @name 2 directions unbounded - 1 direction spectral
+ * 
+ * @{
+ */
+// ----------------------------------------------------------- KERNELS ----------------------------------------------------------
+static inline double _hej_2_2unb1spe_k0(const void* params,const double* data) {
+    const double r   = ((double*)params)[0];
+    const double sig = ((double*)params)[2];
+
+    const double rho = r/sig;
+    const double rho2 = rho*rho;
+    // return -c_1o2pi * (log(r) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018]
+    // return -c_1o2pi * (log(r) + .5 * expint_ei(rho2 / 2.0));
+    return c_1o2pi * (log(r) + 0.5 * expint_ei(rho2 * 0.5));
+    // return -c_1o2pi * (.5*log(rho*.5) + .5 * expint_ei(rho2 / 2));
+}
+static inline double _hej_2_2unb1spe_r0(const void* params,const double* data) {
+    const double sig = ((double*)params)[2];
+    return -c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig));
+}
+
+static inline double _hej_4_2unb1spe_k0(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double sig = ((double*)params) [2];
+
+    const double rho  = r/sig;
+    const double rho2 = rho*rho;
+    // return -c_1o2pi * (log(r) - (1 - .5 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018]
+    // return -c_1o2pi * (log(r) - exp(-rho2 / 2.0) + .5 * expint_ei(rho2 / 2.0));
+    return c_1o2pi * (log(r) - 0.5 * exp(-rho2 * 0.5) + 0.5 * expint_ei(rho2 * 0.5));
+    // return -c_1o2pi * (.5*log(rho2*.5) - exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2));
+}
+static inline double _hej_4_2unb1spe_r0(const void* params,const double* data) {
+    const double sig = ((double*)params)[2];
+
+    return -c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .5);
+}
+
+static inline double _hej_6_2unb1spe_k0(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double sig = ((double*)params) [2];
+
+    const double rho = r/sig;
+    const double rho2 = rho*rho;
+    // return -c_1o2pi * (log(r) - (1 - rho2 + .125 * rho2 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2)); //mistaken coefs in [Spietz2018]
+    return c_1o2pi * (log(r) - (0.75 - 0.125 * rho2) * exp(-rho2 * 0.5) + 0.5 * expint_ei(rho2 * 0.5));
+    // return -c_1o2pi * (.5*log(rho2*.5) - (.75 - .125 * rho2) * exp(-rho2 / 2) + .5 * expint_ei(rho2 / 2));
+}
+static inline double _hej_6_2unb1spe_r0(const void* params,const double* data) {
+    const double sig = ((double*)params)[2];
+
+    return -c_1o2pi * (c_gamma * .5 - log(M_SQRT2 * sig) + .75);
+}
+static inline double _zero(const void* params,const double* data) {   
+    return 0.0;
+}
+
+static inline double _chat_2_2unb1spe(const void* params,const double* data) {
+    const double r      = ((double*)params) [0];
+    const double k      = ((double*)params) [1];
+
+    return -c_1o2pi * besselk0(fabs(k) * r);
+}
+static inline double _chat_2_2unb1spe_r0(const void* params,const double* data) {
+    const double k      = ((double*)params) [1];
+    const double r_eq2D = ((double*)params) [3];
+
+    return -(1.0 - k * r_eq2D * besselk1(k * r_eq2D)) * c_1opi / ((k * r_eq2D) * (k * r_eq2D));
+}
+static inline double _chat_2_2unb1spe_k0(const void* params,const double* data) {
+    const double r      = ((double*)params) [0];
+    // const double sig = ((double*)params)[2];
+    
+    return  c_1o2pi * log(r) ; //caution: mistake on the sign in [Chatelain2010]
+}
+
+/**@} */
+
+/**
+ * @name 1 direction unbounded - 2 directions spectral
+ * 
+ * @{
+ */
+// ----------------------------------------------------------- KERNELS ----------------------------------------------------------
+static inline double _hej_2_1unb2spe(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double k   = ((double*)params) [1];
+    const double sig = ((double*)params) [2];
+
+    const double rho = r/sig;
+    const double s   = k*sig;
+
+    const double subfun = s * rho > 100. ? 0.0 : ((1.0 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1.0 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho));
+    return - .25 * sig / s * subfun ;
+}
+static inline double _hej_2_1unb2spe_k0(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double sig = ((double*)params) [2];
+
+    const double rho = r/sig;
+    const double rosqrt2 = r*c_1osqrt2;
+    // return -.5* (r * erf(rosqrt2/sig) + (exp(-r*r/(2*sig*sig)) - 1.)*sig*M_SQRT2*c_1osqrtpi) ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
+    return 0.5* r * erf(rosqrt2/sig) - (1.-exp(-rho*rho*.5)) *sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
+}
+
+static inline double _hej_4_1unb2spe(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double k   = ((double*)params) [1];
+    const double sig = ((double*)params) [2];
+
+    const double rho = r/sig;
+    const double s   = k*sig;
+    const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho));
+    return - 0.25 * sig / s * subfun \
+           - sig * M_SQRT2 * c_1osqrtpi * .25 * exp(-.5 * (s * s + rho * rho));
+}
+static inline double _hej_4_1unb2spe_k0(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double sig = ((double*)params) [2];
+
+    const double rho = r/sig;
+    const double rosqrt2 = r*c_1osqrt2;
+    return 0.5* r * erf(rosqrt2/sig) - (1.-exp(-rho*rho*.5)) *.5*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
+}
+
+static inline double _hej_6_1unb2spe(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double k   = ((double*)params) [1];
+    const double sig = ((double*)params) [2];
+
+    const double rho = r/sig;
+    const double s   = k*sig;
+    const double subfun = s * rho > 100. ? 0 : ((1 - erf(c_1osqrt2 * (s - rho))) * exp(-s * rho) + (1 - erf(c_1osqrt2 * (s + rho))) * exp(s * rho));
+    return - 0.25 * sig / s * subfun \
+           - sig * M_SQRT2 * c_1osqrtpi * (c_5o16 + c_1o16 * (s * s - rho * rho)) * exp(-.5 * (s * s + rho * rho));
+}
+static inline double _hej_6_1unb2spe_k0(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double sig = ((double*)params) [2];
+
+    const double rho = r/sig;
+    const double rosqrt2 = r*c_1osqrt2;
+    return 0.5* r * erf(rosqrt2/sig) - (3.-exp(-rho*rho*.5) * (rho*rho+3.) ) *.125*sig*c_1osqrt2*c_1osqrtpi ; //mistakenly 0.0 in [Hejlesen:2013] and [Spietz:2018]
+}
+
+static inline double _chat_2_1unb2spe(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+    const double k   = ((double*)params) [1];
+
+    return -0.5 * exp(-k * r) / k;
+}
+static inline double _chat_2_1unb2spe_k0(const void* params,const double* data) {
+    const double r   = ((double*)params) [0];
+
+    return 0.5 * fabs(r);
+}
+
+/**@} */
+
+
+/**
+ * @name 3 directions spectral
+ * 
+ * @{
+ */
+// ----------------------------------------------------------- KERNELS ----------------------------------------------------------
+static inline double _hej_2_0unb3spe(const void* params,const double* data) {
+    const double ksqr = ((double*)params)[0];
+    const double sig  = ((double*)params)[1];
+
+    const double ssqr = ksqr * (sig * sig);
+    return - exp(-ssqr / 2.0) / (ksqr); 
+}
+static inline double _hej_4_0unb3spe(const void* params,const double* data) {
+    const double ksqr = ((double*)params)[0];
+    const double sig  = ((double*)params)[1];
+
+    const double ssqr = ksqr * (sig * sig);
+    return - (1.0 + ssqr / 2.0) * exp(-ssqr / 2.0) / (ksqr);
+}
+static inline double _hej_6_0unb3spe(const void* params,const double* data) {
+    const double ksqr = ((double*)params)[0];
+    const double sig  = ((double*)params)[1];
+
+    const double ssqr = ksqr * (sig * sig);
+    return - (1.0 + ssqr / 2.0 + ssqr * ssqr / 8.0) * exp(-ssqr / 2.0) / (ksqr);
+}
+
+static inline double _chat_2_0unb3spe(const void* params,const double* data) {
+    const double ksqr   = ((double*)params) [0];
+
+    return - 1.0 / ksqr;
+}
+static inline double _lgf_2_0unb3spe(const void* params, const double* data) {
+    const double kx = ((double*)params)[2];
+    const double ky = ((double*)params)[3];
+    const double kz = ((double*)params)[4];
+    const double h  = ((double*)params)[5];
+
+    return - h * h / (4.0 * pow(sin(kx * h / 2.0), 2.0) + 4.0 * pow(sin(ky * h / 2.0), 2.0) + 4.0 * pow(sin(kz * h / 2.0), 2.0));
+}
+/**@} */
\ No newline at end of file