From b5c8668de0c94b99cc5cbfad1ad1757274211f1f Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 13 Apr 2024 14:45:52 +0200
Subject: [PATCH 01/32] FEL-IR-a works

---
 CMakeLists.txt                    |  114 +-
 src/Expression/IpplOperations.h   |    2 +-
 src/MaxwellSolvers/CMakeLists.txt |    1 +
 src/MaxwellSolvers/FDTD.h         |   51 +
 src/Particle/ParticleAttrib.hpp   |   24 +-
 src/Types/Vector.h                |  306 +++--
 test/solver/CMakeLists.txt        |    6 +
 test/solver/TestFDTDSolver.cpp    | 1774 +++++++++++++++++++++++++++++
 8 files changed, 2169 insertions(+), 109 deletions(-)
 create mode 100644 src/MaxwellSolvers/FDTD.h
 create mode 100644 test/solver/TestFDTDSolver.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9483cc6cf..46a2ac866 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,19 +1,33 @@
 cmake_minimum_required (VERSION 3.20.3)
 project (IPPL CXX)
 set (IPPL_VERSION_MAJOR 3)
-set (IPPL_VERSION_MINOR 0.1)
-
+set (IPPL_VERSION_MINOR 0.2)
+include(FetchContent)
 set (IPPL_VERSION_NAME "V${IPPL_VERSION_MAJOR}.${IPPL_VERSION_MINOR}")
-
+option(DOWNLOAD_EXTRACT_TIMESTAMP ON FORCE)
 set (IPPL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
+if(NOT WIN32)
+  string(ASCII 27 Esc)
+  set(ColourReset "${Esc}[m")
+  set(Red         "${Esc}[31m")
+  set(Green       "${Esc}[32m")
+  set(Yellow      "${Esc}[1;33m")
+endif()
+
 if (NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
+
     set (CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING
         "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
         FORCE
-        )
-endif ()
-
+    )
+    message(STATUS "${Yellow}CMAKE_BUILD_TYPE not set, defaulting to ${CMAKE_BUILD_TYPE} ${ColourReset}")
+else ()
+    message (STATUS "${Green}Build type is: ${CMAKE_BUILD_TYPE}${ColourReset}")
+endif()
+if(POLICY CMP0135)
+    cmake_policy(SET CMP0135 NEW)
+endif()
 if (${CMAKE_BUILD_TYPE} STREQUAL "Release")
     add_definitions (-DNOPAssert)
 endif ()
@@ -21,27 +35,85 @@ endif ()
 add_compile_options (-Wall)
 add_compile_options (-Wunused)
 add_compile_options (-Wextra)
-add_compile_options (-Werror)
+#add_compile_options (-Werror)
 
 # allow deprecated functions
 add_compile_options (-Wno-deprecated-declarations)
-
+file(
+    DOWNLOAD
+    https://github.com/nlohmann/json/releases/download/v3.11.3/json.hpp
+    "${CMAKE_BINARY_DIR}/downloaded_headers/json.hpp"
+)
+file(
+    DOWNLOAD
+    https://raw.githubusercontent.com/nothings/stb/master/stb_image_write.h
+    "${CMAKE_BINARY_DIR}/downloaded_headers/stb_image_write.hpp"
+)
+include_directories("${CMAKE_BINARY_DIR}/downloaded_headers/")
 option (USE_STATIC_LIBRARIES "Link with static libraries if available" ON)
 
-message (STATUS "Build type is: " ${CMAKE_BUILD_TYPE})
+if(NOT IPPL_PLATFORMS)
+    message(STATUS "${Yellow}IPPL_PLATFORMS flag is not set, defaulting to serial backend${ColourReset}")
+    set(IPPL_PLATFORMS "SERIAL")
+endif()
 
-find_package(Kokkos 4.0.0 REQUIRED)
+if(IPPL_PLATFORMS)
+    #message(STATUS "IPPL_PLATFORMS flag is set to: ${IPPL_PLATFORMS}")
+    string(TOUPPER "${IPPL_PLATFORMS}" IPPL_PLATFORMS)
+    # Perform case distinction
+    if("${IPPL_PLATFORMS}" STREQUAL "CUDA")
+        message(STATUS "${Green}CUDA backend enabled${ColourReset}")
+        set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Enable Kokkos CUDA Backend")
+        set(Heffte_ENABLE_CUDA ON CACHE BOOL "Set Heffte CUDA Backend")
+    elseif("${IPPL_PLATFORMS}" STREQUAL "OPENMP")
+        message(STATUS "${Green}OpenMP backend enabled${ColourReset}")
+        if(NOT Heffte_ENABLE_FFTW)
+            message(STATUS "${Yellow}Because Heffte_ENABLE_FFTW is not set defaulting to Stock with AVX${ColourReset}")
+            set(Heffte_ENABLE_AVX2 ON CACHE BOOL "Enable Heffte Stock backend lol")
+        else()
+            message(STATUS "${Green}Enabled FFTW${ColourReset}")
+        endif()
+        set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Enable Kokkos OpenMP Backend")
+    elseif("${IPPL_PLATFORMS}" STREQUAL "SERIAL")
+
+        message(STATUS "${Green}Serial backend enabled${ColourReset}")
+        if(NOT Heffte_ENABLE_FFTW)
+            message(STATUS "${Yellow}Because Heffte_ENABLE_FFTW is not set defaulting to Stock with AVX${ColourReset}")
+            set(Heffte_ENABLE_AVX2 ON CACHE BOOL "Enable Heffte Stock backend lol")
+        endif()
+
+    elseif("${IPPL_PLATFORMS}" STREQUAL "CUDA;OPENMP" OR "${IPPL_PLATFORMS}" STREQUAL "OPENMP;CUDA")
+        message(STATUS "${Green}CUDA and OpenMP backend enabled${ColourReset}")
+        set(Heffte_ENABLE_CUDA ON CACHE BOOL "Set Heffte CUDA Backend")
+        set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Enable Kokkos OpenMP Backend")
+        set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Enable Kokkos CUDA Backend") 
+    else()
+        message(FATAL_ERROR "Unknown or unsupported platform: ${IPPL_PLATFORMS}")
+        # Handle unknown or unsupported platform
+    endif()
+endif()
 
+find_package(Kokkos 4.0.0 QUIET)
+if(NOT Kokkos_FOUND)
+    FetchContent_Declare(
+        #DOWNLOAD_EXTRACT_TIMESTAMP
+        kokko
+        URL https://github.com/kokkos/kokkos/archive/refs/tags/4.1.00.tar.gz                
+    )
+    FetchContent_MakeAvailable(kokko)
+endif()
 # Select flags.
 set (CMAKE_CXX_STANDARD 20)
+set (CMAKE_CUDA_EXTENSIONS OFF)
+set (CMAKE_CXX_EXTENSIONS OFF)
 set (CMAKE_CXX_STANDARD_REQUIRED ON)
-set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g ")
-set (CMAKE_CXX_FLAGS_RELEASE "-O3")
-set (CMAKE_CXX_FLAGS_DEBUG  "-O0 -g")
+set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINGO} -O3 -g ")
+set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+#set (CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g")
 
 if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
     # Use sanitizer flags for GCC or Clang only
-    set (CMAKE_CXX_FLAGS_DEBUG  "-O0 -g -fsanitize=address,undefined")
+    set (CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address,undefined")
 endif ()
 
 if ("${Kokkos_DEVICES}" MATCHES "CUDA")
@@ -73,7 +145,16 @@ endif()
 option (ENABLE_FFT "Enable FFT transform" OFF)
 if (ENABLE_FFT)
     add_definitions (-DENABLE_FFT)
-    find_package (Heffte 2.2.0 REQUIRED)
+    find_package (Heffte 2.2.0 QUIET)
+    if(NOT Heffte_FOUND)
+        message(STATUS "Using FetchContent for Heffte")
+        FetchContent_Declare(
+             Heffte
+             #DOWNLOAD_EXTRACT_TIMESTAMP
+             GIT_REPOSITORY https://github.com/icl-utk-edu/heffte 
+        )
+        FetchContent_MakeAvailable(Heffte)
+    endif()
     message (STATUS "Found Heffte_DIR: ${Heffte_DIR}")
 endif ()
 
@@ -100,6 +181,9 @@ endif ()
 
 option (ENABLE_UNIT_TESTS "Enable unit tests" OFF)
 if (ENABLE_UNIT_TESTS)
+    if (NOT ENABLE_FFT)
+        message(WARNING "Enabling unit tests even though FFT not enabled (-DENABLE_FFT=ON)!")
+    endif ()
     include(FetchContent)
     include(GoogleTest)
 
diff --git a/src/Expression/IpplOperations.h b/src/Expression/IpplOperations.h
index dad442901..198f20b44 100644
--- a/src/Expression/IpplOperations.h
+++ b/src/Expression/IpplOperations.h
@@ -282,7 +282,7 @@ namespace ippl {
              */
             template <typename... Args>
             KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const {
-                return dot(u_m(args...), v_m(args...)).apply();
+                return dot(u_m(args...), v_m(args...));
             }
 
         private:
diff --git a/src/MaxwellSolvers/CMakeLists.txt b/src/MaxwellSolvers/CMakeLists.txt
index 62d93dd36..d961415ea 100644
--- a/src/MaxwellSolvers/CMakeLists.txt
+++ b/src/MaxwellSolvers/CMakeLists.txt
@@ -3,6 +3,7 @@ set (_SRCS
 
 set (_HDRS
     Maxwell.h
+    FDTD.h
 )
 
 include_DIRECTORIES (
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
new file mode 100644
index 000000000..f4a1d5b9a
--- /dev/null
+++ b/src/MaxwellSolvers/FDTD.h
@@ -0,0 +1,51 @@
+#ifndef IPPL_FDTD_H
+#define IPPL_FDTD_H
+#include "Types/Vector.h"
+
+#include "Field/Field.h"
+
+#include "FieldLayout/FieldLayout.h"
+#include "Meshes/UniformCartesian.h"
+constexpr double sqrt_4pi = 3.54490770181103205459;
+constexpr double alpha_scaling_factor = 1e30;
+constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor;
+constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi;
+constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor;
+constexpr double electron_mass_in_kg = 9.1093837015e-31;
+constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor;
+constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
+constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg;
+constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters;
+constexpr double electron_charge_in_coulombs = 1.602176634e-19;
+constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs;
+
+constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges;
+constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds;
+constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses;
+constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
+
+constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges;
+constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges;
+constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges;
+constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds;
+constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes;
+constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters;
+constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters);
+constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters);
+constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters;
+constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor);
+constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts;
+constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters);
+constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes);
+constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds);
+constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G;
+constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons;
+
+namespace ippl {
+    //template <typename scalar>
+    //struct FDTDFieldState{
+    //    
+    //};
+}
+
+#endif
\ No newline at end of file
diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index 3b37f920a..9f22b72be 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -121,7 +121,7 @@ namespace ippl {
 
         const vector_type& dx     = mesh.getMeshSpacing();
         const vector_type& origin = mesh.getOrigin();
-        const vector_type invdx   = 1.0 / dx;
+        const vector_type invdx   = vector_type(1.0) / dx;
 
         const FieldLayout<Dim>& layout = f.getLayout();
         const NDIndex<Dim>& lDom       = layout.getLocalNDIndex();
@@ -132,12 +132,12 @@ namespace ippl {
             "ParticleAttrib::scatter", policy_type(0, *(this->localNum_mp)),
             KOKKOS_CLASS_LAMBDA(const size_t idx) {
                 // find nearest grid point
-                vector_type l                        = (pp(idx) - origin) * invdx + 0.5;
-                Vector<int, Field::dim> index        = l;
-                Vector<PositionType, Field::dim> whi = l - index;
-                Vector<PositionType, Field::dim> wlo = 1.0 - whi;
+                vector_type l                        = (pp(idx) - origin) * invdx + vector_type(1.0); // Terribile;
+                Vector<int, Field::dim> index        = l.template cast<int>();
+                Vector<PositionType, Field::dim> whi = l - index.template cast<PositionType>();
+                Vector<PositionType, Field::dim> wlo = vector_type(1.0) - whi;
 
-                Vector<size_t, Field::dim> args = index - lDom.first() + nghost;
+                Vector<size_t, Field::dim> args = (index - lDom.first() + nghost).template cast<size_t>();
 
                 // scatter
                 const value_type& val = dview_m(idx);
@@ -175,7 +175,7 @@ namespace ippl {
 
         const vector_type& dx     = mesh.getMeshSpacing();
         const vector_type& origin = mesh.getOrigin();
-        const vector_type invdx   = 1.0 / dx;
+        const vector_type invdx   = vector_type(1.0) / dx;
 
         const FieldLayout<Dim>& layout = f.getLayout();
         const NDIndex<Dim>& lDom       = layout.getLocalNDIndex();
@@ -186,12 +186,12 @@ namespace ippl {
             "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)),
             KOKKOS_CLASS_LAMBDA(const size_t idx) {
                 // find nearest grid point
-                vector_type l                        = (pp(idx) - origin) * invdx + 0.5;
-                Vector<int, Field::dim> index        = l;
-                Vector<PositionType, Field::dim> whi = l - index;
-                Vector<PositionType, Field::dim> wlo = 1.0 - whi;
+                vector_type l                        = (pp(idx) - origin) * invdx + vector_type(1.0); //terribile
+                Vector<int, Field::dim> index        = l.template cast<int>();
+                Vector<PositionType, Field::dim> whi = l - index.template cast<PositionType>();
+                Vector<PositionType, Field::dim> wlo = vector_type(1.0) - whi;
 
-                Vector<size_t, Field::dim> args = index - lDom.first() + nghost;
+                Vector<size_t, Field::dim> args = (index - lDom.first() + nghost).template cast<size_t>();
 
                 // gather
                 dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{},
diff --git a/src/Types/Vector.h b/src/Types/Vector.h
index 4380fc8cf..56963f864 100644
--- a/src/Types/Vector.h
+++ b/src/Types/Vector.h
@@ -20,95 +20,239 @@ namespace ippl {
      * @tparam Dim vector dimension
      */
     template <typename T, unsigned Dim>
-    class Vector : public detail::Expression<Vector<T, Dim>, sizeof(T) * Dim> {
-    public:
-        typedef T value_type;
-        static constexpr unsigned dim = Dim;
+    struct Vector {
+        using value_type = T;
 
-        KOKKOS_FUNCTION
-        Vector()
-            : Vector(value_type(0)) {}
-
-        template <typename... Args,
-                  typename std::enable_if<sizeof...(Args) == Dim, bool>::type = true>
-        explicit KOKKOS_FUNCTION Vector(const Args&... args);
-
-        template <typename E, size_t N>
-        KOKKOS_FUNCTION Vector(const detail::Expression<E, N>& expr);
-
-        KOKKOS_DEFAULTED_FUNCTION
-        Vector(const Vector<T, Dim>& v) = default;
-
-        KOKKOS_FUNCTION
-        Vector(const T& val);
-        
-        
-        Vector(const std::array<T, Dim>& a);
-
-	    Vector(const std::array<std::vector<T>, Dim>& a);
-
-        /*!
-         * @param list of values
-         */
-        KOKKOS_FUNCTION
-        Vector(const std::initializer_list<T>& list);
-
-        KOKKOS_FUNCTION
-        ~Vector() {}
-
-        // Get and Set Operations
-        KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i);
-
-        KOKKOS_INLINE_FUNCTION value_type operator[](unsigned int i) const;
-
-        KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i);
-
-        KOKKOS_INLINE_FUNCTION value_type operator()(unsigned int i) const;
-
-        // Assignment Operators
-        template <typename E, size_t N>
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator=(const detail::Expression<E, N>& expr);
-
-        template <typename E, size_t N>
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator+=(const detail::Expression<E, N>& expr);
-
-        template <typename E, size_t N>
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator-=(const detail::Expression<E, N>& expr);
-
-        template <typename E, size_t N>
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator*=(const detail::Expression<E, N>& expr);
-
-        template <typename E, size_t N>
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator/=(const detail::Expression<E, N>& expr);
-
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator+=(const T& val);
-
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator-=(const T& val);
-
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator*=(const T& val);
-
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator/=(const T& val);
+        constexpr static unsigned dim = Dim;
+        T data[Dim];
+        KOKKOS_INLINE_FUNCTION constexpr Vector(const std::initializer_list<T>& list) {
+            // PAssert(list.size() == Dim);
+            unsigned int i = 0;
+            for (auto& l : list) {
+                data[i] = l;
+                ++i;
+            }
+        }
+        Vector() = default;
+        constexpr KOKKOS_INLINE_FUNCTION Vector(T v) { fill(v); }
+        KOKKOS_INLINE_FUNCTION value_type dot(const Vector<T, Dim>& v) const noexcept {
+            value_type ret = 0;
+            for (unsigned i = 0; i < dim; i++) {
+                ret += data[i] * v[i];
+            }
+            return ret;
+        }
+        template <unsigned N>
+        KOKKOS_INLINE_FUNCTION Vector<T, N> tail() const noexcept {
+            Vector<T, N> ret;
+            static_assert(N <= Dim, "N must be smaller than Dim");
+            constexpr unsigned diff = Dim - N;
+            for (unsigned i = 0; i < N; i++) {
+                ret[i] = (*this)[i + diff];
+            }
+            return ret;
+        }
+        template <unsigned N>
+        KOKKOS_INLINE_FUNCTION Vector<T, N> head() const noexcept {
+            Vector<T, N> ret;
+            static_assert(N <= Dim, "N must be smaller than Dim");
+            for (unsigned i = 0; i < N; i++) {
+                ret[i] = (*this)[i];
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION value_type squaredNorm() const noexcept { return dot(*this); }
+        KOKKOS_INLINE_FUNCTION value_type norm() {
+#ifndef __CUDA_ARCH__
+            using std::sqrt;
+#endif
+            return sqrt(squaredNorm());
+        }
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim> normalized() const noexcept { return *this / norm(); }
+        KOKKOS_INLINE_FUNCTION value_type sum() const noexcept {
+            value_type ret = 0;
+            for (unsigned i = 0; i < dim; i++) {
+                ret += data[i];
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION value_type average() const noexcept {
+            value_type ret = 0;
+            for (unsigned i = 0; i < dim; i++) {
+                ret += data[i];
+            }
+            return ret / dim;
+        }
+        KOKKOS_INLINE_FUNCTION Vector<T, 3> cross(const Vector<T, Dim>& v) const noexcept
+            requires(Dim == 3)
+        {
+            Vector<T, 3> ret(0);
+            ret[0] = data[1] * v[2] - data[2] * v[1];
+            ret[1] = data[2] * v[0] - data[0] * v[2];
+            ret[2] = data[0] * v[1] - data[1] * v[0];
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION bool operator==(const Vector<T, dim>& o) const noexcept {
+            for (unsigned i = 0; i < dim; i++) {
+                if (data[i] != o[i])
+                    return false;
+            }
+            return true;
+        }
+        KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i) noexcept {
+            assert(i < dim);
+            return data[i];
+        }
+        KOKKOS_INLINE_FUNCTION T* begin() noexcept { return data; }
+        KOKKOS_INLINE_FUNCTION T* end() noexcept { return data + dim; }
+        KOKKOS_INLINE_FUNCTION const T* begin() const noexcept { return data; }
+        KOKKOS_INLINE_FUNCTION const T* end() const noexcept { return data + dim; }
+        KOKKOS_INLINE_FUNCTION constexpr void fill(value_type x) {
+            for (unsigned i = 0; i < dim; i++) {
+                data[i] = value_type(x);
+            }
+        }
+        KOKKOS_INLINE_FUNCTION const value_type& operator[](unsigned int i) const noexcept {
+            assert(i < dim);
+            return data[i];
+        }
 
-        using iterator       = T*;
-        using const_iterator = const T*;
-        KOKKOS_INLINE_FUNCTION constexpr iterator begin();
-        KOKKOS_INLINE_FUNCTION constexpr iterator end();
-        KOKKOS_INLINE_FUNCTION constexpr const_iterator begin() const;
-        KOKKOS_INLINE_FUNCTION constexpr const_iterator end() const;
+        KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i) noexcept {
+            assert(i < dim);
+            return data[i];
+        }
 
-        KOKKOS_INLINE_FUNCTION T dot(const Vector<T, Dim>& rhs) const;
+        KOKKOS_INLINE_FUNCTION const value_type& operator()(unsigned int i) const noexcept {
+            assert(i < dim);
+            return data[i];
+        }
+        KOKKOS_INLINE_FUNCTION Vector operator-() const noexcept {
+            Vector ret;
+            for (unsigned i = 0; i < dim; i++) {
+                ret[i] = -(*this)[i];
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION Vector<T, dim> decompose(Vector<int, dim>* integral) {
+#ifndef __CUDA_ARCH__
+            using std::modf;
+#endif
+            Vector<T, dim> ret;
+            for (unsigned i = 0; i < dim; i++) {
+                if constexpr (std::is_same_v<T, float>) {
+                    float tmp;
+                    ret[i]         = modff((*this)[i], &tmp);
+                    (*integral)[i] = (int)tmp;
+                } else if constexpr (std::is_same_v<T, double>) {
+                    double tmp;
+                    ret[i]         = modf((*this)[i], &tmp);
+                    (*integral)[i] = (int)tmp;
+                }
+            }
+            return ret;
+        }
+        template <typename O>
+        constexpr KOKKOS_INLINE_FUNCTION Vector<O, dim> cast() const noexcept {
+            Vector<O, dim> ret;
+            for (unsigned i = 0; i < dim; i++) {
+                ret.data[i] = (O)(data[i]);
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION Vector operator*(const value_type& o) const noexcept {
+            Vector ret;
+            for (unsigned i = 0; i < dim; i++) {
+                ret[i] = (*this)[i] * o;
+            }
+            return ret;
+        }
+#define defop_kf(OP)                                                                  \
+    constexpr KOKKOS_INLINE_FUNCTION Vector<T, Dim> operator OP(const Vector<T, Dim>& o) const noexcept { \
+        Vector<T, Dim> ret;                                                           \
+        for (unsigned i = 0; i < dim; i++) {                                          \
+            ret.data[i] = data[i] OP o.data[i];                                       \
+        }                                                                             \
+        return ret;                                                                   \
+    }
+        defop_kf(+)
+        defop_kf(-)
+        defop_kf(*)
+        defop_kf(/)
 
-    //Needs to be public to be a standard-layout type
-    //private:
-        T data_m[Dim];
+#define def_aop_kf(OP)                                                 \
+    KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator OP(const Vector<T, Dim>& o) noexcept { \
+        for (unsigned i = 0; i < dim; i++) {                           \
+            (*this)[i] OP o[i];                                        \
+        }                                                              \
+        return *this;                                                  \
+    }
+            def_aop_kf(+=)
+            def_aop_kf(-=)
+            def_aop_kf(*=)
+            def_aop_kf(/=)
+            template <typename stream_t>
+            friend stream_t& operator<<(stream_t& str, const Vector<T, Dim>& v) {
+            // tr << "{";
+            for (unsigned i = 0; i < Dim; i++) {
+                str << v[i];
+                if (i < Dim - 1)
+                    str << ", ";
+            }
+            // str << "}";
+            return str;
+        }
+        // defop_kf(*)
+        // defop_kf(/)
     };
-    
+
+    //template <typename T, unsigned Dim>
+    //KOKKOS_INLINE_FUNCTION Vector<T, Dim> min(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
+    //template <typename T, unsigned Dim>
+    //KOKKOS_INLINE_FUNCTION Vector<T, Dim> max(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
     template<typename T, unsigned Dim>
-    KOKKOS_INLINE_FUNCTION Vector<T, Dim> min(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
+    KOKKOS_INLINE_FUNCTION Vector<T, Dim> min(const Vector<T, Dim>& a, const Vector<T, Dim>& b){
+        using Kokkos::min;
+        Vector<T, Dim> ret;
+        for(unsigned d = 0; d < Dim;d++){
+            ret[d] = min(a[d], b[d]);
+        }
+        return ret;
+    }
     template<typename T, unsigned Dim>
-    KOKKOS_INLINE_FUNCTION Vector<T, Dim> max(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
+    KOKKOS_INLINE_FUNCTION Vector<T, Dim> max(const Vector<T, Dim>& a, const Vector<T, Dim>& b){
+        using Kokkos::max;
+        Vector<T, Dim> ret;
+        for(unsigned d = 0; d < Dim;d++){
+            ret[d] = max(a[d], b[d]);
+        }
+        return ret;
+    }
+    template<typename O, typename T, unsigned N>
+    KOKKOS_INLINE_FUNCTION ippl::Vector<T, N> operator *(const O &o, const ippl::Vector<T, N>& v) noexcept {
+        ippl::Vector<T, N> ret;
+        for (unsigned i = 0; i < N; i++) {
+            ret[i] = v[i] * o;
+        }
+        return ret;
+    }
+    template <typename T>
+    KOKKOS_INLINE_FUNCTION Vector<T, 3> cross(const ippl::Vector<T, 3>& a, const ippl::Vector<T, 3>& b) {
+        ippl::Vector<T, 3> ret{0.0, 0.0, 0.0};
+        ret[0] = a[1] * b[2] - a[2] * b[1];
+        ret[1] = a[2] * b[0] - a[0] * b[2];
+        ret[2] = a[0] * b[1] - a[1] * b[0];
+        return ret;
+    }
+    template <typename T, unsigned dim>
+    KOKKOS_INLINE_FUNCTION T dot(const ippl::Vector<T, dim>& a, const ippl::Vector<T, dim>& b) {
+        T ret = 0;
+        for(unsigned k = 0;k < dim;k++){
+            ret += a[k] * b[k];
+        }
+        return ret;
+    }
 }  // namespace ippl
 
-#include "Vector.hpp"
+//#include "Vector.hpp"
 
 #endif
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 358c475cf..dda292fff 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -25,6 +25,12 @@ target_link_libraries (
     ${IPPL_LIBS}
     ${MPI_CXX_LIBRARIES}
 )
+add_executable (TestFDTDSolver TestFDTDSolver.cpp)
+target_link_libraries (
+    TestFDTDSolver
+    ${IPPL_LIBS}
+    ${MPI_CXX_LIBRARIES}
+)
 
 if (ENABLE_FFT)
     add_executable (TestGaussian_convergence TestGaussian_convergence.cpp)
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
new file mode 100644
index 000000000..65446b03f
--- /dev/null
+++ b/test/solver/TestFDTDSolver.cpp
@@ -0,0 +1,1774 @@
+#include "Ippl.h"
+
+#include "Types/Vector.h"
+
+#include "Field/Field.h"
+
+#include "MaxwellSolvers/FDTD.h"
+#include <json.hpp>
+#include <fstream>
+#include <list>
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <stb_image_write.hpp>
+//CONFIG
+KOKKOS_INLINE_FUNCTION bool isNaN(float x){
+    #ifdef __CUDA_ARCH__
+    return isnan(x);
+    #else
+    return std::isnan(x);
+    #endif
+}
+KOKKOS_INLINE_FUNCTION bool isINF(float x){
+    #ifdef __CUDA_ARCH__
+    return isinf(x);
+    #else
+    return std::isinf(x);
+    #endif
+}
+KOKKOS_INLINE_FUNCTION bool isNaN(double x){
+    #ifdef __CUDA_ARCH__
+    return isnan(x);
+    #else
+    return std::isnan(x);
+    #endif
+}
+KOKKOS_INLINE_FUNCTION bool isINF(double x){
+    #ifdef __CUDA_ARCH__
+    return isinf(x);
+    #else
+    return std::isinf(x);
+    #endif
+}
+#define assert_isreal(X) assert(!isNaN(X) && !isINF(X))
+
+
+
+
+
+
+struct config {
+    using scalar = double;
+
+    //using length_unit = funits::length<scalar, funits::planck_base>;
+    //using duration_unit = funits::time<scalar, funits::planck_base>;
+    // GRID PARAMETERS
+    ippl::Vector<uint32_t, 3> resolution;
+
+    ippl::Vector<scalar, 3> extents;
+    scalar total_time;
+    scalar timestep_ratio;
+
+    scalar length_scale_in_jobfile, temporal_scale_in_jobfile;
+
+        // All in unit_charge, or unit_mass
+    scalar charge, mass;
+
+    uint64_t num_particles;
+    bool space_charge;
+
+        // BUNCH PARAMETERS
+    ippl::Vector<scalar, 3> mean_position;
+    ippl::Vector<scalar, 3> sigma_position;
+    ippl::Vector<scalar, 3> position_truncations;
+    ippl::Vector<scalar, 3> sigma_momentum;
+    scalar bunch_gamma;
+
+    scalar undulator_K;
+    scalar undulator_period;
+    scalar undulator_length;
+
+    uint32_t output_rhythm;
+    std::unordered_map<std::string, double> experiment_options;
+};
+template<typename scalar, unsigned Dim>
+ippl::Vector<scalar, Dim> getVector(const nlohmann::json& j){
+    if(j.is_array()){
+        assert(j.size() == Dim);
+        ippl::Vector<scalar, Dim> ret;
+        for(unsigned i = 0;i < Dim;i++)
+            ret[i] = (scalar)j[i];
+        return ret;
+    }
+    else{
+        std::cerr << "Warning: Obtaining Vector from scalar json\n";
+        ippl::Vector<scalar, Dim> ret;
+        ret.fill((scalar)j);
+        return ret;
+    }
+}
+template<size_t N, typename T>
+struct DefaultedStringLiteral {
+    constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) {
+        std::copy_n(str, N, key);
+    }
+    
+    T value;
+    char key[N];
+};
+template<size_t N>
+struct StringLiteral {
+    constexpr StringLiteral(const char (&str)[N]) {
+        std::copy_n(str, N, value);
+    }
+    
+    char value[N];
+    constexpr DefaultedStringLiteral<N, int> operator>>(int t)const noexcept{
+        return DefaultedStringLiteral<N, int>(value, t);
+    }
+    constexpr size_t size()const noexcept{return N - 1;}
+};
+template<StringLiteral lit>
+constexpr size_t chash(){
+    size_t hash = 5381;
+    int c;
+
+    for(size_t i = 0;i < lit.size();i++){
+        c = lit.value[i];
+        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    }
+
+    return hash;
+}
+size_t chash(const char* val) {
+    size_t hash = 5381;
+    int c;
+
+    while ((c = *val++)) {
+        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    }
+
+    return hash;
+}
+size_t chash(const std::string& _val) {
+    size_t hash = 5381;
+    const char* val = _val.c_str();
+    int c;
+
+    while ((c = *val++)) {
+        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    }
+
+    return hash;
+}
+std::string lowercase_singular(std::string str) {
+    // Convert string to lowercase
+    std::transform(str.begin(), str.end(), str.begin(), ::tolower);
+
+    // Check if the string ends with "s" and remove it if it does
+    if (!str.empty() && str.back() == 's') {
+        str.pop_back();
+    }
+
+    return str;
+}
+double get_time_multiplier(const nlohmann::json& j){
+    std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]);
+    double time_factor = 1.0;
+    switch (chash(length_scale_string)) {
+        case chash<"planck-time">():
+        case chash<"plancktime">():
+        case chash<"pt">():
+        case chash<"natural">():
+            time_factor = unit_time_in_seconds;
+        break;
+        case chash<"picosecond">():
+            time_factor = 1e-12;
+        break;
+        case chash<"nanosecond">():
+            time_factor = 1e-9;
+        break;
+        case chash<"microsecond">():
+            time_factor = 1e-6;
+        break;
+        case chash<"millisecond">():
+            time_factor = 1e-3;
+        break;
+        case chash<"second">():
+            time_factor = 1.0;
+        break;
+        default:
+            std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n";
+        break;
+    }
+    return time_factor;
+}
+double get_length_multiplier(const nlohmann::json& options){
+    std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]);
+    double length_factor = 1.0;
+    switch (chash(length_scale_string)) {
+        case chash<"planck-length">():
+        case chash<"plancklength">():
+        case chash<"pl">():
+        case chash<"natural">():
+            length_factor = unit_length_in_meters;
+        break;
+        case chash<"picometer">():
+            length_factor = 1e-12;
+        break;
+        case chash<"nanometer">():
+            length_factor = 1e-9;
+        break;
+        case chash<"micrometer">():
+            length_factor = 1e-6;
+        break;
+        case chash<"millimeter">():
+            length_factor = 1e-3;
+        break;
+        case chash<"meter">():
+            length_factor = 1.0;
+        break;
+        default:
+            std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n";
+        break;
+    }
+    return length_factor;
+}
+config read_config(const char *filepath){
+    std::ifstream cfile(filepath);
+    nlohmann::json j;
+    cfile >> j;
+    config::scalar lmult = get_length_multiplier(j);
+    config::scalar tmult = get_time_multiplier(j);
+    config ret;
+
+    ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters;
+    ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters;
+    ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters;
+    ret.resolution = getVector<uint32_t, 3>(j["mesh"]["resolution"]);
+
+    //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n";
+    //std::cerr << "Tmult: " << tmult << "\n";
+    if(j.contains("timestep-ratio")){
+        ret.timestep_ratio = (config::scalar)j["timestep-ratio"];
+    }
+
+    else{
+        ret.timestep_ratio = 1;
+    }
+    ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds;
+    ret.space_charge = (bool)(j["mesh"]["space-charge"]);
+    ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]);
+    if(ret.bunch_gamma < config::scalar(1)){
+        std::cerr << "Gamma must be >= 1\n";
+        exit(1);
+    }
+    assert(j.contains("undulator"));
+    assert(j["undulator"].contains("static-undulator"));
+
+    ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"];
+    ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters;
+    ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters;
+    assert(!std::isnan(ret.undulator_length));
+    assert(!std::isnan(ret.undulator_period));
+    assert(!std::isnan(ret.extents[0]));
+    assert(!std::isnan(ret.extents[1]));
+    assert(!std::isnan(ret.extents[2]));
+    assert(!std::isnan(ret.total_time));
+    ret.length_scale_in_jobfile = get_length_multiplier(j);
+    ret.temporal_scale_in_jobfile = get_time_multiplier(j);
+    ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges;
+    ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses;
+    ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"];
+    ret.mean_position  = getVector<config::scalar, 3>(j["bunch"]["position"])                       * lmult / unit_length_in_meters;
+    ret.sigma_position = getVector<config::scalar, 3>(j["bunch"]["sigma-position"])                 * lmult / unit_length_in_meters;
+    ret.position_truncations = getVector<config::scalar, 3>(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters;
+    ret.sigma_momentum = getVector<config::scalar, 3>(j["bunch"]["sigma-momentum"]);
+    ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0;
+    if(j.contains("experimentation")){
+        nlohmann::json je = j["experimentation"];
+        for(auto it = je.begin(); it!= je.end();it++){
+            ret.experiment_options[it.key()] = double(it.value());
+        }
+    }
+    return ret;
+}
+template<typename scalar>
+using FieldVector = ippl::Vector<scalar, 3>;
+template<typename scalar>
+struct BunchInitialize {
+
+    /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is
+     * manual the charge at points of the position vector will be produced.    				*/
+    // std::string     			bunchType_;
+
+    /* Type of the distributions (transverse or longitudinal) in the bunch.				*/
+    std::string distribution_;
+
+    /* Type of the generator for creating the bunch distribution.					*/
+    std::string generator_;
+
+    /* Total number of macroparticles in the bunch.                                                     */
+    unsigned int numberOfParticles_;
+
+    /* Total charge of the bunch in pC.                                                                 */
+    scalar cloudCharge_;
+
+    /* Initial energy of the bunch in MeV.                                                              */
+    scalar initialGamma_;
+
+    /* Initial normalized speed of the bunch.                                                           */
+    scalar initialBeta_;
+
+    /* Initial movement direction of the bunch, which is a unit vector.                                 */
+    FieldVector<scalar> initialDirection_;
+
+    /* Position of the center of the bunch in the unit of length scale.                           	*/
+    // std::vector<FieldVector<scalar> >	position_;
+    FieldVector<scalar> position_;
+
+    /* Number of macroparticles in each direction for 3Dcrystal type.                                   */
+    FieldVector<unsigned int> numbers_;
+
+    /* Lattice constant in x, y, and z directions for 3D crystal type.                                  */
+    FieldVector<scalar> latticeConstants_;
+
+    /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal
+     * type, it will be the spread in position for each micro-bunch of the crystal.			*/
+    FieldVector<scalar> sigmaPosition_;
+
+    /* Spread in energy in each direction.                                                              */
+    FieldVector<scalar> sigmaGammaBeta_;
+
+    /* Store the truncation transverse distance for the electron generation.				*/
+    scalar tranTrun_;
+
+    /* Store the truncation longitudinal distance for the electron generation.				*/
+    scalar longTrun_;
+
+    /* Name of the file for reading the electrons distribution from.					*/
+    std::string fileName_;
+
+    /* The radiation wavelength corresponding to the bunch length outside the undulator			*/
+    scalar lambda_;
+
+    /* Bunching factor for the initialization of the bunch.						*/
+    scalar bF_;
+
+    /* Phase of the bunching factor for the initialization of the bunch.				*/
+    scalar bFP_;
+
+    /* Boolean flag determining the activation of shot-noise.						*/
+    bool shotNoise_;
+
+    /* Initial beta vector of the bunch, which is obtained as the product of beta and direction.	*/
+    FieldVector<scalar> betaVector_;
+
+    /* Initialize the parameters for the bunch initialization to some first values.                     */
+    // BunchInitialize ();
+};
+
+
+
+
+
+//END CONFIG
+
+//LORENTZ FRAME AND UNDULATOR
+template<typename T, unsigned axis = 2>
+struct UniaxialLorentzframe{
+    constexpr static T c = 1.0;
+    using scalar = T;
+    using Vector3 = ippl::Vector<T, 3>;
+    scalar beta_m;
+    scalar gammaBeta_m;
+    scalar gamma_m;
+    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){
+        gammaBeta_m = gammaBeta;
+        beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta));
+        gamma_m = sqrt(1 + gammaBeta * (gammaBeta));
+    }
+    KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{
+        arg[axis] = gamma_m * (arg[axis] + beta_m * time); 
+    }
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB)const noexcept{
+        
+        Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
+        ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
+        ret.first  = ippl::Vector<T, 3>(unprimedEB.first  + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
+        ret.second = ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
+        ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
+        ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
+        return ret;
+    }
+};
+template<typename scalar>
+BunchInitialize<scalar> generate_mithra_config(const config& cfg, const UniaxialLorentzframe<scalar>& /*frame_boost unused*/) {
+    using vec3 = ippl::Vector<scalar, 3>;
+    scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K);
+    BunchInitialize<scalar> init;
+    init.generator_ = "random";
+    init.distribution_ = "uniform";
+    init.initialDirection_ = vec3{0, 0, 1};
+    init.initialGamma_ = cfg.bunch_gamma;
+    init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma);
+    init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast<scalar>();
+    init.sigmaPosition_ = cfg.sigma_position.template cast<scalar>();
+
+    // TODO: Initial bunching factor huh
+    init.bF_ = 0.01;
+    init.bFP_ = 0;
+    init.shotNoise_ = false;
+    init.cloudCharge_ = cfg.charge;
+    init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma);
+    init.longTrun_ = cfg.position_truncations[2];
+    init.tranTrun_ = cfg.position_truncations[0];
+    init.position_ = cfg.mean_position.cast<scalar>();
+    init.betaVector_ = ippl::Vector<scalar, 3>{0,0,init.initialBeta_};
+    init.numberOfParticles_ = cfg.num_particles;
+
+    init.numbers_ = 0;                      // UNUSED
+    init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED
+
+    return init;
+}
+template <typename Double>
+struct Charge {
+
+    Double q;                     /* Charge of the point in the unit of electron charge.	*/
+    FieldVector<Double> rnp, rnm; /* Position vector of the charge.			*/
+    FieldVector<Double> gb;       /* Normalized velocity vector of the charge.		*/
+
+    /* Double flag determining if the particle is passing the entrance point of the undulator. This flag
+     * can be used for better boosting the bunch to the moving frame. We need to consider it to be double,
+     * because this flag needs to be communicated during bunch update.					*/
+    Double e;
+
+    // Charge();
+};
+template <typename scalar>
+using ChargeVector = std::list<Charge<scalar>>;
+template<typename Double>
+void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<Double> & chargeVector, int rank, int size, int ia){
+    /* Correct the number of particles if it is not a multiple of four.					*/
+    if ( bunchInit.numberOfParticles_ % 4 != 0 )
+      {
+        unsigned int n = bunchInit.numberOfParticles_ % 4;
+        bunchInit.numberOfParticles_ += 4 - n;
+        //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") +
+        //    std::string("It is corrected to ") +  std::to_string(bunchInit.numberOfParticles_) );
+    }
+
+    /* Save the initially given number of particles.							*/
+    unsigned int	Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size();
+
+    /* Declare the required parameters for the initialization of charge vectors.                      	*/
+    Charge<Double>         	charge; charge.q  = bunchInit.cloudCharge_ / Np;
+    FieldVector<Double> gb = bunchInit.initialGamma_ * bunchInit.betaVector_;
+    FieldVector<Double> r  (0.0);
+    FieldVector<Double> t  (0.0);
+    Double            	t0, g;
+    Double		zmin = 1e100;
+    Double		Ne, bF, bFi;
+    unsigned int	bmi;
+    std::vector<Double>	randomNumbers;
+
+    /* The initialization in group of four particles should only be done if there exists an undulator in
+     * the interaction.											*/
+    unsigned int	ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4;
+
+    /* Check the bunching factor.                                                                     	*/
+    if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 )
+      {
+        //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") );
+        //exit(1);
+      }
+
+    /* If the generator is random we should make sure that different processors do not produce the same
+     * random numbers.											*/
+    if 	( bunchInit.generator_ == "random" )
+      {
+        /* Initialize the random number generator.								*/
+        srand ( time(NULL) );
+        /* Np / ng * 20 is the maximum number of particles.							*/
+        randomNumbers.resize( Np / ng * 20, 0.0);
+        for ( unsigned int ri = 0; ri < Np / ng * 20; ri++)
+          randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX));
+      }
+
+    /* Declare the generator function depending on the input.						*/
+    auto generate = [&] (unsigned int n, unsigned int m) {
+      //if 	( bunchInit.generator_ == "random" )
+        return  ( randomNumbers.at( n * 2 * Np/ng + m ) );
+      //else
+      //  return  ( randomNumbers[ n * 2 * Np/ng + m ] );
+    //TODO: Return halton properly
+        //return 	( halton(n,m) );
+    };
+
+    /* Declare the function for injecting the shot noise.						*/
+    auto insertCharge = [&] (Charge<Double> q) {
+
+      for ( unsigned int ii = 0; ii < ng; ii++ )
+        {
+          /* The random modulation is introduced depending on the shot-noise being activated.		*/
+          if ( bunchInit.shotNoise_ )
+            {
+              /* Obtain the number of beamlet.								*/
+              bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ );
+
+              /* Obtain the phase and amplitude of the modulation.					*/
+              bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) );
+
+              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+
+              q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) );
+            }
+          else if ( bunchInit.lambda_ != 0.0)
+            {
+              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+
+              q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 );
+            }
+
+          /* Set this charge into the charge vector.							*/
+          chargeVector.push_back(q);
+        }
+    };
+
+    /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to
+     * calculate the FEL bucket number.									*/
+    if ( bunchInit.shotNoise_ )
+      {
+        for (i = 0; i < Np / ng; i++)
+          {
+            if ( bunchInit.distribution_ == "uniform" )
+              zmin = std::min(   Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin );
+            else if ( bunchInit.distribution_ == "gaussian" )
+              zmin = std::min(  (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin );
+            else
+              {
+                //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
+                exit(1);
+              }
+          }
+
+        if ( bunchInit.distribution_ == "uniform" )
+          for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++)
+            {
+              t0  = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) );
+              t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
+
+              zmin = std::min(   t0 , zmin );
+            }
+
+        //zmin = zmin + bunchInit.position_[ia][2];
+        zmin = zmin + bunchInit.position_[2];
+
+        /* Obtain the average number of electrons per FEL beamlet.					*/
+        Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] );
+
+        /* Set the bunching factor level for the shot noise depending on the given values.		*/
+        bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_;
+
+        //printmessage(std::string(__FILE__), __LINE__, std::string("The standard deviation of the bunching factor for the shot noise implementation is set to ") + stringify(bF) );
+      }
+
+    /* Determine the properties of each charge point and add them to the charge vector.               	*/
+    for (i = rank; i < Np / ng; i += size)
+      {
+        /* Determine the transverse coordinate.								*/
+        r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
+        r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
+
+        /* Determine the longitudinal coordinate.							*/
+        if ( bunchInit.distribution_ == "uniform" )
+          r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2];
+        else if ( bunchInit.distribution_ == "gaussian" )
+          r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
+        else
+          {
+            //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
+            exit(1);
+          }
+
+        /* Determine the transverse momentum.								*/
+        t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
+        t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
+        t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
+
+        if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
+          {
+            /* Shift the generated charge to the center position and momentum space.			*/
+            //charge.rnp    = bunchInit.position_[ia];
+            charge.rnp    = bunchInit.position_;
+            charge.rnp   += r;
+
+            charge.gb   = gb;
+            charge.gb  += t;
+            //std::cout << gb << "\n";
+            if(std::isinf(gb[2])){
+                std::cerr << "it klonked here\n";
+            }
+
+            /* Insert this charge and the mirrored ones into the charge vector.				*/
+            insertCharge(charge);
+          }
+      }
+
+    /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the
+     * CSE from the tail of the bunch.									*/
+    if ( bunchInit.distribution_ == "uniform" ){
+      for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size)
+        {
+            
+          r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
+          r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
+
+          /* Determine the longitudinal coordinate.							*/
+          r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
+          r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
+
+          /* Determine the transverse momentum.								*/
+          t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
+          t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
+          t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
+          //std::cerr << "DOING UNIFORM tapering!!!\n";
+          if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
+            {
+                //std::cerr << "ACTUALLY DOING UNIFORM tapering!!!\n";
+              /* Shift the generated charge to the center position and momentum space.			*/
+              charge.rnp   = bunchInit.position_[ia];
+              charge.rnp  += r;
+
+              charge.gb  = gb;
+              
+              charge.gb += t;
+              //std::cout << gb[0] << "\n";
+              //if(std::isinf(gb.squaredNorm())){
+              //    std::cerr << "it klonked here\n";
+              //}
+              /* Insert this charge and the mirrored ones into the charge vector.			*/
+              insertCharge(charge);
+            }
+        }
+    }
+
+    /* Reset the value for the number of particle variable according to the installed number of
+     * macro-particles and perform the corresponding changes.                                         	*/
+    bunchInit.numberOfParticles_ = chargeVector.size();
+}
+template<typename Double>
+void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma){
+    Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma);
+    Double zmaxL = -1.0e100, zmaxG;
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
+      {
+        Double g  	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        if(std::isinf(g)){
+            std::cerr << __FILE__  << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n";
+            abort();
+        }
+        Double bz 	= iterQ->gb[2] / g;
+        iterQ->rnp[2]  *= frame_gamma;
+        
+        iterQ->gb[2] 	= frame_gamma * g * ( bz - frame_beta );
+        
+        zmaxL 		= std::max( zmaxL , iterQ->rnp[2] );
+      }
+    zmaxG = zmaxL;
+    struct {
+       Double zu_;
+       Double beta_;
+    } bunch_;
+    bunch_.zu_ 		= zmaxG;
+    bunch_.beta_ 	= frame_beta;
+
+    /****************************************************************************************************/
+
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
+      {
+        Double g	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        iterQ->rnp[0]  += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
+        iterQ->rnp[1]  += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
+        iterQ->rnp[2]  += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
+        if(std::isnan(iterQ->rnp[2])){
+            std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_  << ", " <<  frame_beta << "\n";
+            std::cerr << __FILE__  << ": " << __LINE__ << "   OOOOOF\n";
+            abort();
+        }
+      }
+}
+
+
+
+
+template<typename bunch_type, typename scalar>
+size_t initialize_bunch_mithra(
+    bunch_type& bunch,
+    const BunchInitialize<scalar>& bunchInit,
+    scalar frame_gamma){
+
+    ChargeVector<scalar> oof;
+    initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0);
+    for(auto& c : oof){
+        if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2]))
+            std::cout << "Pos before boost: " << c.rnp << "\n";
+        if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2]))
+            std::cout << "Pos before boost: " << c.rnp << "\n";
+    }
+    boost_bunch(oof, frame_gamma);
+    std::cout << "boost gamma" << frame_gamma << "\n";
+    for(auto& c : oof){
+         if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){
+            std::cout << "Pos after boost: " << c.rnp << "\n";
+            break;
+        }
+    }
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> positions("", oof.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> gammabetas("", oof.size());
+    auto iterQ = oof.begin();
+    for (size_t i = 0; i < oof.size(); i++) {
+        assert_isreal(iterQ->gb[0]);
+        assert_isreal(iterQ->gb[1]);
+        assert_isreal(iterQ->gb[2]);
+        assert(iterQ->gb[2] != 0.0f);
+        scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        assert_isreal(g);
+        scalar bz = iterQ->gb[2] / g;
+        assert_isreal(bz);
+        (void)bz;
+        positions(i) = iterQ->rnp;
+        gammabetas(i) = iterQ->gb;
+        ++iterQ;
+    }
+    if(oof.size() > bunch.getLocalNum()){
+        bunch.create(oof.size() - bunch.getLocalNum());
+    }
+    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions ("", oof.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*> dgammabetas("", oof.size());
+    
+    Kokkos::deep_copy(dpositions, positions);
+    Kokkos::deep_copy(dgammabetas, gammabetas);
+    Kokkos::deep_copy(bunch.R_nm1.getView(), positions);
+    Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas);
+    auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();;
+    Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){
+        rview(i) = dpositions(i);
+        rm1view(i) = dpositions(i);
+        gbview(i) = dgammabetas(i);
+    });
+    Kokkos::fence();
+    ippl::Vector<scalar, 3> meanpos = bunch.R.sum() * (1.0 / oof.size());
+    
+    Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){
+        rview(i) -= meanpos;
+        rm1view(i) -= meanpos;
+    });
+    return oof.size();
+}
+
+
+
+
+
+
+
+//END LORENTZ FRAME AND UNDULATOR AND BUNCH
+
+
+
+
+
+//PREAMBLE
+
+template<typename scalar1, typename... scalar>
+    requires((std::is_floating_point_v<scalar1>))
+KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
+    uint32_t dim = sizeof...(scalar);
+    ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
+    for(unsigned d = 0;d < dim;d++){
+        vec[d] = vec[d] * vec[d];
+    }
+    #ifndef __CUDA_ARCH__
+    using std::exp;
+    #endif
+    return exp(-(vec.sum()) / (stddev * stddev)); 
+}
+
+constexpr double sq(double x){
+    return x * x;
+}
+constexpr float sq(float x){
+    return x * x;
+}
+
+
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto first(){
+    return a;
+}
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto second(){
+    return b;
+}
+
+template <typename T>
+KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a,
+                                                     const ippl::Vector<T, 3>& b) {
+    ippl::Vector<T, 3> ret{0.0, 0.0, 0.0};
+    ret[0] = a[1] * b[2] - a[2] * b[1];
+    ret[1] = a[2] * b[0] - a[0] * b[2];
+    ret[2] = a[0] * b[1] - a[1] * b[0];
+    return ret;
+}
+template<typename T>
+KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos){
+    //return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5}, ippl::Vector<T, 3>{0,0,0}};
+    //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
+    //pos -= spacing * T(0.5);
+    ippl::Vector<T, 3> relpos = pos - origin;
+    ippl::Vector<T, 3> gridpos = relpos / hr;
+    ippl::Vector<int, 3> ipos;
+    ipos = gridpos.template cast<int>();
+    ippl::Vector<T, 3> fracpos;
+    for(unsigned k = 0;k < 3;k++){
+        fracpos[k] = gridpos[k] - (int)ipos[k];
+    }
+    //TODO: NGHOST!!!!!!!
+    ipos += ippl::Vector<int, 3>(1);
+    ret.first = ipos;
+    ret.second = fracpos;
+    return ret;
+}
+template<typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+    if(
+        ipos[0] < 0
+        ||ipos[1] < 0
+        ||ipos[2] < 0
+        ||size_t(ipos[0]) >= view.extent(0) - 1
+        ||size_t(ipos[1]) >= view.extent(1) - 1
+        ||size_t(ipos[2]) >= view.extent(2) - 1
+        ||fracpos[0] < 0
+        ||fracpos[1] < 0
+        ||fracpos[2] < 0
+    ){
+        return;
+    }
+    assert(fracpos[0] >= 0.0f);
+    assert(fracpos[0] <= 1.0f);
+    assert(fracpos[1] >= 0.0f);
+    assert(fracpos[1] <= 1.0f);
+    assert(fracpos[2] >= 0.0f);
+    assert(fracpos[2] <= 1.0f);
+    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+    assert(one_minus_fracpos[0] >= 0.0f);
+    assert(one_minus_fracpos[0] <= 1.0f);
+    assert(one_minus_fracpos[1] >= 0.0f);
+    assert(one_minus_fracpos[1] <= 1.0f);
+    assert(one_minus_fracpos[2] >= 0.0f);
+    assert(one_minus_fracpos[2] <= 1.0f);
+    scalar accum = 0;
+    
+    for(unsigned i = 0;i < 8;i++){
+        scalar weight = 1;
+        ippl::Vector<int, 3> ipos_l = ipos;
+        for(unsigned d = 0;d < 3;d++){
+            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+            ipos_l[d] += !!(i & (1 << d));
+        }
+        assert_isreal(value);
+        assert_isreal(weight);
+        accum += weight;
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
+    }
+    assert(abs(accum - 1.0f) < 1e-6f);
+}
+template<typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
+    //std::cout << "Value: " << value << "\n";
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+    if(
+        ipos[0] < 0
+        ||ipos[1] < 0
+        ||ipos[2] < 0
+        ||size_t(ipos[0]) >= view.extent(0) - 1
+        ||size_t(ipos[1]) >= view.extent(1) - 1
+        ||size_t(ipos[2]) >= view.extent(2) - 1
+        ||fracpos[0] < 0
+        ||fracpos[1] < 0
+        ||fracpos[2] < 0
+    ){
+        //std::cout << "out of bounds\n";
+        return;
+    }
+    assert(fracpos[0] >= 0.0f);
+    assert(fracpos[0] <= 1.0f);
+    assert(fracpos[1] >= 0.0f);
+    assert(fracpos[1] <= 1.0f);
+    assert(fracpos[2] >= 0.0f);
+    assert(fracpos[2] <= 1.0f);
+    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+    assert(one_minus_fracpos[0] >= 0.0f);
+    assert(one_minus_fracpos[0] <= 1.0f);
+    assert(one_minus_fracpos[1] >= 0.0f);
+    assert(one_minus_fracpos[1] <= 1.0f);
+    assert(one_minus_fracpos[2] >= 0.0f);
+    assert(one_minus_fracpos[2] <= 1.0f);
+    scalar accum = 0;
+    
+    for(unsigned i = 0;i < 8;i++){
+        scalar weight = 1;
+        ippl::Vector<int, 3> ipos_l = ipos;
+        for(unsigned d = 0;d < 3;d++){
+            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+            ipos_l[d] += !!(i & (1 << d));
+        }
+        assert_isreal(weight);
+        accum += weight;
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
+    }
+    assert(abs(accum - 1.0f) < 1e-6f);
+}
+
+template<typename view_type, typename scalar>
+KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
+
+    
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid   = gridCoordinatesOf(hr, origin, to  );
+    //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]);
+    for(int d = 0;d < 3;d++){
+        //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
+        //    std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " << from_grid.first << " " << to_grid.first << std::endl;
+        //}
+        assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
+    }
+    //const uint32_t nghost = g.nghost();
+    //from_ipos += ippl::Vector<int, 3>(nghost);
+    //to_ipos += ippl::Vector<int, 3>(nghost);
+    
+    if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
+        scatterToGrid(Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
+        
+        return;
+    }
+    ippl::Vector<scalar, 3> relay;
+    const int nghost = 1;
+    const ippl::Vector<scalar, 3> orig = origin;
+    using Kokkos::max;
+    using Kokkos::min;
+    for (unsigned int i = 0; i < 3; i++) {
+        relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i],
+                       max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i],
+                           scalar(0.5) * (to[i] + from[i])));
+    }
+    scatterToGrid(Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
+    scatterToGrid(Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
+}
+
+// END PREAMBLE
+
+
+//BEGIN ABSORBING BOUNDARY CONDITION
+template<typename _scalar, unsigned _main_axis, unsigned... _side_axes>
+struct second_order_abc_face{
+    using scalar = _scalar;
+    scalar Cweights[5];
+    int sign;
+    constexpr static unsigned main_axis = _main_axis;
+    KOKKOS_FUNCTION second_order_abc_face(ippl::Vector<scalar, 3> hr, scalar dt, int _sign) : sign(_sign){
+        constexpr scalar c = 1;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        static_assert(
+            (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) ||
+            (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) ||
+            (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1)
+        );
+        assert(_main_axis != side_axes[0]);
+        assert(_main_axis != side_axes[1]);
+        assert(side_axes[0] != side_axes[1]);
+        constexpr scalar truncation_order = 2.0;
+        scalar p      = ( 1.0 + 1 * 1 ) / ( 1 + 1 );
+        scalar q      = - 1.0 / ( 1 + 1 );
+
+        scalar d  	 = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt);
+
+        Cweights[0]	= (   1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        Cweights[1]	= ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        assert(abs(Cweights[1] + 1) < 1e-6); //Like literally
+        Cweights[2]  	= (   p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d;
+        Cweights[3]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d;
+        Cweights[4]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d;
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        ippl::Vector<uint32_t, 3> side_axis1_onehot = ippl::Vector<uint32_t, 3>{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2};
+        ippl::Vector<uint32_t, 3> side_axis2_onehot = ippl::Vector<uint32_t, 3>{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2};
+        ippl::Vector<uint32_t, 3> mainaxis_off = ippl::Vector<int32_t, 3>{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast<uint32_t>();
+        return advanceBoundaryS(
+		    A_nm1(i,j,k), A_n(i,j,k),
+		    apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off),
+		    apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off),
+		    apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot),                apply(A_n, c - side_axis1_onehot),
+		    apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 ,
+						 const value_type& v3 , const value_type& v4 , const value_type& v5 ,
+						 const value_type& v6 , const value_type& v7 , const value_type& v8 ,
+						 const value_type& v9 , const value_type& v10, const value_type& v11,
+						 const value_type& v12, const value_type& v13)const noexcept
+    {
+        
+      value_type v0 =
+    	 Cweights[0]  * (v1 + v5) +
+    	(Cweights[1]) * v3 +
+    	(Cweights[2]) * ( v2 + v4 ) +
+    	(Cweights[3]) * ( v6 + v7 + v10 + v11 ) +
+    	(Cweights[4]) * ( v8 + v9 + v12 + v13 );
+      return v0;
+    }
+};
+template<typename _scalar, unsigned edge_axis, unsigned normal_axis1, unsigned normal_axis2, bool na1_zero, bool na2_zero>
+struct second_order_abc_edge{
+    using scalar = _scalar;
+    //
+    scalar Eweights[5];
+    
+    KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector<scalar, 3> hr, scalar dt){
+        static_assert(normal_axis1 != normal_axis2);
+        static_assert(edge_axis != normal_axis2);
+        static_assert(edge_axis != normal_axis1);
+        static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0));
+        constexpr scalar c0_ = scalar(1);
+        scalar d    =    ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt );
+        if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else{
+            assert(false);
+        }
+        
+
+
+        
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        //constexpr unsigned nax[2] = {normal_axis1, normal_axis2};
+        ippl::Vector<int32_t, 3> normal_axis1_onehot = ippl::Vector<int32_t, 3>{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1);
+        ippl::Vector<int32_t, 3> normal_axis2_onehot = ippl::Vector<int32_t, 3>{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1);
+        ippl::Vector<uint32_t, 3> acc0 = {i, j, k};
+        ippl::Vector<uint32_t, 3> acc1 = acc0 + normal_axis1_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc2 = acc0 + normal_axis2_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc3 = acc0 + normal_axis1_onehot.cast<uint32_t>() + normal_axis2_onehot.cast<uint32_t>();
+        //ippl::Vector<uint32_t, 3> axism = (-ippl::Vector<int, 3>{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2};
+        //return A_n(i, j, k);
+        return advanceEdgeS(
+                                        A_n(i, j, k),      A_nm1(i, j, k),
+            apply(A_np1, acc1),   apply(A_n, acc1   ), apply(A_nm1, acc1),
+            apply(A_np1, acc2),   apply(A_n, acc2   ), apply(A_nm1, acc2),
+            apply(A_np1, acc3),   apply(A_n, acc3   ), apply(A_nm1, acc3),
+            apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp),
+            apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceEdgeS 		
+            (              value_type v1 , value_type v2 ,
+                           value_type v3 , value_type v4 , value_type v5 ,
+                           value_type v6 , value_type v7 , value_type v8 ,
+                           value_type v9 , value_type v10, value_type v11,
+                           value_type v12, value_type v13, value_type v14,
+                           value_type v15, value_type v16, value_type v17,
+                           value_type v18, value_type v19)const noexcept{
+    value_type v0 =
+    Eweights[0] * (v3 + v8) +
+    Eweights[1] * (v5 + v6) +
+    Eweights[2] * (v2 + v9) +
+    Eweights[3] * (v1 + v4 + v7 + v10) +
+    Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11;
+    return v0;
+  }
+};
+template<typename _scalar, bool x0, bool y0, bool z0>
+struct second_order_abc_corner{
+    using scalar = _scalar;
+    scalar Cweights[17];
+    KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector<scalar, 3> hr, scalar dt){
+        constexpr scalar c0_ = scalar(1);
+        Cweights[0]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[1]  =   (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[2]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[3]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[4]  =   (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[5]  =   (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[6]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[7]  =   (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[8]  = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[9]  = - (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[12] = - (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[13] = - (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[15] = - (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt);
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        //First implementation: 0,0,0 corner
+        constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1);
+        constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1);
+        constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1);
+        using ippl::apply;
+        constexpr ippl::Vector<uint32_t, 3> offsets[8] = {
+            ippl::Vector<uint32_t, 3>{0,0,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,0},
+            ippl::Vector<uint32_t, 3>{0,yoff,0},
+            ippl::Vector<uint32_t, 3>{0,0,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,zoff},
+            ippl::Vector<uint32_t, 3>{0,yoff,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,zoff},
+        };
+        return advanceCornerS(
+                                          apply(A_n, c), apply(A_nm1, c),
+            apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]),
+            apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]),
+            apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]),
+            apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]),
+            apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]),
+            apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]),
+            apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7])
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceCornerS         
+                            (       value_type v1 , value_type v2 ,
+                             value_type v3 , value_type v4 , value_type v5 ,
+                             value_type v6 , value_type v7 , value_type v8 ,
+                             value_type v9 , value_type v10, value_type v11,
+                             value_type v12, value_type v13, value_type v14,
+                             value_type v15, value_type v16, value_type v17,
+                             value_type v18, value_type v19, value_type v20,
+                             value_type v21, value_type v22, value_type v23)const noexcept{
+    return      - ( v1  * (Cweights[16]) + v2  * (Cweights[8]) +
+    v3  * Cweights[1] + v4  * Cweights[16] + v5  * Cweights[9] +
+    v6  * Cweights[2] + v7  * Cweights[16] + v8  * Cweights[10] +
+    v9  * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] +
+    v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] +
+    v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] +
+    v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] +
+    v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0];
+  }
+};
+
+
+
+
+
+
+struct second_order_mur_boundary_conditions{
+    template<typename field_type, typename dt_type>
+    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr){
+        using scalar = decltype(dt);
+        //TODO: tbh don't know
+        //const unsigned nghost = 1;
+        const ippl::Vector<scalar, 3> hr = FA_n.get_mesh().getMeshSpacing();
+        //assert_isreal((betaMur[0]));
+        //assert_isreal((betaMur[1]));
+        //assert_isreal((betaMur[2]));
+        auto A_n   = FA_n.getView();
+        auto A_np1 = FA_np1.getView();
+        auto A_nm1 = FA_nm1.getView();
+        
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                             + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5);
+            if(Kokkos::popcount(val) == 1){
+                if(i == 0){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(j == 0){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(k == 0){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(i == true_nr[0] - 1){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(j == true_nr[1] - 1){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(k == true_nr[2] - 1){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+            }
+        });
+        Kokkos::fence();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                             + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5);
+            if(Kokkos::popcount(val) == 2){ //Edge
+                if(i == 0 && k == 0){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == 0 && j == 0){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(j == 0 && k == 0){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(i == 0 && k == true_nr[2] - 1){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == 0 && j == true_nr[1] - 1){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(j == 0 && k == true_nr[2] - 1){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(i == true_nr[0] - 1 && k == 0){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == true_nr[0] - 1 && j == 0){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(j == true_nr[1] - 1 && k == 0){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(i == true_nr[0] - 1 && k == true_nr[2] - 1){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == true_nr[0] - 1 && j == true_nr[1] - 1){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(j == true_nr[1] - 1 && k == true_nr[2] - 1){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                             + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5);
+            
+            if(Kokkos::popcount(val) == 3){
+                //printf("Corner: %d, %d, %d\n", i, j, k);
+                if(i == 0 && j == 0 && k == 0){
+                    second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == true_nr[0] - 1 && j == 0 && k == 0){
+                    second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == 0 && j == true_nr[1] - 1 && k == 0){
+                    second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == 0){
+                    second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == 0 && j == 0 && k == true_nr[2] - 1){
+                    second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == true_nr[0] - 1 && j == 0 && k == true_nr[2] - 1){
+                    second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == 0 && j == true_nr[1] - 1 && k == true_nr[2] - 1){
+                    second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == true_nr[2] - 1){
+                    second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+    }
+};
+//END ABSORBING BOUNDARY CONDITION
+
+
+
+
+
+
+
+
+
+namespace ippl {
+    
+    template<typename scalar>
+    struct undulator_parameters{
+        scalar lambda; //MITHRA: lambda_u
+        scalar K; //Undulator parameter
+        scalar length;
+        scalar B_magnitude;
+        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u);
+            //std::cout << "Setting bmag: " << B_magnitude << "\n";
+        }
+        undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda);
+        }
+    };
+
+    template<typename scalar>
+    struct nondispersive{
+        scalar a1;
+        scalar a2;
+        scalar a4;
+        scalar a6;
+        scalar a8;
+    };
+    template <typename _scalar, class PLayout>
+    struct  Bunch : public ippl::ParticleBase<PLayout> {
+        using scalar = _scalar;
+
+        // Constructor for the Bunch class, taking a PLayout reference
+        Bunch(PLayout& playout)
+            : ippl::ParticleBase<PLayout>(playout) {
+            // Add attributes to the particle bunch
+            this->addAttribute(Q);          // Charge attribute
+            this->addAttribute(mass);       // Mass attribute
+            this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(R_np1);      // Position attribute for the next time step
+            this->addAttribute(R_nm1);      // Position attribute for the next time step
+            this->addAttribute(EB_gather);   // Electric field attribute for particle gathering
+        }
+
+        // Destructor for the Bunch class
+        ~Bunch() {}
+
+        // Define container types for various attributes
+        using charge_container_type   = ippl::ParticleAttrib<scalar>;
+        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector4_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+
+        // Declare instances of the attribute containers
+        charge_container_type Q;          // Charge container
+        charge_container_type mass;       // Mass container
+        velocity_container_type gamma_beta; // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type R_np1; // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type R_nm1; // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>> EB_gather;   // Electric field container for particle gathering
+
+    };
+
+
+    template <typename scalar>
+    // clang-format off
+    struct FDTDFieldState{
+        
+        //Sorry, can't do more than 3d
+
+        constexpr static unsigned int dim = 3;
+        using Vector_t = ippl::Vector<scalar, 3>;
+        using value_type = ippl::Vector<scalar, 4>;
+        using EB_type = ippl::Vector<ippl::Vector<scalar, 3>, 2>;
+        using Mesh_t               = ippl::UniformCartesian<scalar, dim>;
+
+        bool periodic_bc;
+        
+        //Fields
+        using VField_t = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_np1;
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_n;
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_nm1;
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> J;
+        ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> EB;
+        
+        //Discretization options
+        Vector_t hr_m;
+        ippl::Vector<uint32_t, 3> nr_global;
+        ippl::Vector<uint32_t, 3> nr_local;
+        scalar dt;
+        Mesh_t* mesh_mp;
+        ParticleSpatialLayout<scalar, 3> playout;
+        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
+        using bunch_type =  Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+
+        /**
+         * @brief Construct a new FDTDFieldState object
+         * Mesh and resolution parameter are technically redundant
+         * 
+         * @param resolution 
+         * @param layout 
+         * @param mesch 
+         */
+        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), playout(layout, mesch), particles(playout){
+            FA_np1.initialize(mesch, layout, 1);
+            FA_n.initialize(mesch, layout, 1);
+            FA_nm1.initialize(mesch, layout, 1);
+            J.initialize(mesch, layout, 1);
+            EB.initialize(mesch, layout, 1);
+            FA_n   = value_type(0);
+            FA_np1 = value_type(0);
+            FA_nm1 = value_type(0);
+            hr_m = mesch.getMeshSpacing();
+            nr_global = ippl::Vector<uint32_t, 3>{
+                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
+                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
+                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)
+            };
+            nr_local = ippl::Vector<uint32_t, 3>{
+                uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1),
+                uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1),
+                uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1)
+            };
+            std::cout << "NR_M_g: " << nr_global << "\n";
+            std::cout << "NR_M_l: " << nr_local << "\n";
+            dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2]));
+            particles.create(nparticles);
+            setNoBoundaryConditions();
+            
+        }
+        void setNoBoundaryConditions() {
+            periodic_bc = false;
+            typename VField_t::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::NoBcFace<VField_t>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<dim * 2>{});
+            FA_n  .setFieldBC(vector_bcs);
+            FA_np1.setFieldBC(vector_bcs);
+            FA_nm1.setFieldBC(vector_bcs);
+        }
+        void setPeriodicBoundaryConditions() {
+            periodic_bc = true;
+            typename VField_t::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<VField_t>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<dim * 2>{});
+            FA_n  .setFieldBC(vector_bcs);
+            FA_np1.setFieldBC(vector_bcs);
+            FA_nm1.setFieldBC(vector_bcs);
+        }
+        
+        void scatterBunch(){
+            //ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
+            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
+            assert_isreal(volume);
+            assert_isreal((scalar(1) / volume));
+            auto Jview = J.getView();
+            auto qview = particles.Q.getView();
+            auto rview = particles.R.getView();
+            auto rm1view = particles.R_nm1.getView();
+            auto orig = mesh_mp->getOrigin();
+            auto hr = mesh_mp->getMeshSpacing();
+            auto dt = this->dt;
+            Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                Vector_t pos = rview(i);
+                Vector_t to = rview(i);
+                Vector_t from = rm1view(i);
+                //scatterToGrid(Jview,hr, orig, pos, qview(i) / volume);
+                scatterLineToGrid(Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
+            });
+            Kokkos::fence();
+        }
+
+        void fieldStep(){
+            const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1])));
+            nondispersive<scalar> ndisp{
+                .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])),
+                .a2 = sq(dt / hr_m[0]),
+                .a4 = sq(dt / hr_m[1]),
+                .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0])  - 2 * calA * sq(dt / hr_m[1]),
+                .a8 = sq(dt)
+            };
+            if(periodic_bc){
+                FA_n.getFieldBC().apply(FA_n);
+            }
+            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
+            auto source = this->J.getView();
+            Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+                A_np1(i, j, k) =    -A_nm1(i,j,k)
+                        + ndisp.a1 * A_n  (i,j,k)
+                        + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1))
+                        + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1))
+                        + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1))
+                        + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1))
+                        + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k);
+            });
+            Kokkos::fence();
+            
+            if(!periodic_bc){
+                second_order_mur_boundary_conditions bc;
+                ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
+                bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr);
+            }
+            Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView());
+            Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView());
+            //std::swap(this->A_n, this->A_nm1);
+            //std::swap(this->A_np1, this->A_n);
+            evaluate_EB();
+        }
+        void evaluate_EB(){
+            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
+            const scalar idt = scalar(1.0) / dt;
+            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
+            auto source = this->J.getView();
+            auto EBv = this->EB.getView();
+            Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                ippl::Vector<scalar, 3> grad_phi{
+                    dAdx[0], dAdy[0], dAdz[0]
+                };
+                ippl::Vector<scalar, 3> curlA{
+                    dAdy[3] - dAdz[2],
+                    dAdz[1] - dAdx[3],
+                    dAdx[2] - dAdy[1],
+                };
+                EBv(i,j,k)[0] = -dAdt - grad_phi;
+                EBv(i,j,k)[1] = curlA;
+            });
+            Kokkos::fence();
+        }
+        template<typename callable>
+        void updateBunch(scalar time, UniaxialLorentzframe<scalar> ulb, callable undulator_field){
+            
+            Kokkos::fence();
+            auto gbview = particles.gamma_beta.getView();
+            auto ebview = particles.EB_gather.getView();
+            auto qview = particles.Q.getView();
+            auto mview = particles.mass.getView();
+            auto rview = particles.R.getView();
+            auto rm1view = particles.R_nm1.getView();
+            auto rp1view = particles.R_np1.getView();
+            scalar bunch_dt = dt / 3;
+            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
+            Kokkos::fence();
+            for(int bts = 0;bts < 3;bts++){
+                gather(particles.EB_gather, EB, particles.R);
+                Kokkos::fence();
+                Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                    const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
+                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB = ebview(i);
+                    ippl::Vector<scalar, 3> labpos = rview(i);
+
+                    ulb.primedToUnprimed(labpos, time);
+
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_frame = undulator_field(labpos);
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame);
+                    assert_isreal((EB_undulator_bunch.first[0]));
+                    assert_isreal((EB_undulator_bunch.first[1]));
+                    assert_isreal((EB_undulator_bunch.first[2]));
+                    assert_isreal((EB_undulator_bunch.second[0]));
+                    assert_isreal((EB_undulator_bunch.second[1]));
+                    assert_isreal((EB_undulator_bunch.second[2]));
+                    EB[0] += EB_undulator_bunch.first;
+                    EB[1] += EB_undulator_bunch.second;
+
+                    const scalar charge = qview(i);
+                    const scalar mass = mview(i);
+                    const ippl::Vector<scalar, 3> t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+                    const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
+                    const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
+                    const ippl::Vector<scalar, 3> t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0  + alpha * alpha * (EB[1].dot(EB[1])))));
+                    const ippl::Vector<scalar, 3> ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+
+                    assert_isreal((ngammabeta[0]));
+                    assert_isreal((ngammabeta[1]));
+                    assert_isreal((ngammabeta[2]));
+                    rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
+                    gbview(i) = ngammabeta;
+                });
+                Kokkos::fence();
+            }
+            Kokkos::fence();
+            
+        }
+    };
+    // clang-format on
+}  // namespace ippl
+
+int main(int argc, char* argv[]) {
+    using scalar = float;
+    ippl::initialize(argc, argv);
+    {
+
+        config cfg = read_config("../config.json");
+        const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
+        cfg.extents[2] *= frame_gamma;
+        cfg.total_time /= frame_gamma;
+        
+        const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma));
+        const scalar frame_gammabeta = frame_gamma * frame_beta;
+        UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
+        ippl::undulator_parameters<scalar> uparams(cfg);
+        const scalar k_u  = scalar(2.0 * M_PI) / uparams.lambda;
+        const scalar distance_to_entry  = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma);
+        auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector<scalar, 3>& position_in_lab_frame){
+            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
+            ret.first.fill(0);
+            ret.second.fill(0);
+
+            if(position_in_lab_frame[2] < distance_to_entry){
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator < 0);
+                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5));
+                ret.second[0] = 0;
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal;
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal;
+            }
+            else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator >= 0);
+                ret.second[0] = 0;
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator);
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator);
+            }
+            return ret;
+
+        };
+        BunchInitialize<scalar> mithra_config = generate_mithra_config(cfg, frame_boost);
+        ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
+
+        std::array<bool, 3> isParallel;
+        isParallel.fill(true);
+
+        // all parallel layout, standard domain, normal axis order
+        ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        //[-1, 1] box
+        ippl::Vector<scalar, 3> hx     = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])};
+        ippl::Vector<scalar, 3> origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)};
+        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+        std::cout << "hx: " << hx << "\n";
+        std::cout << "origin: " << origin << "\n";
+        std::cout << "extents: " << cfg.extents << std::endl;
+        if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){
+            std::cerr << "Dispersion relation not satisfiable\n";
+            abort();
+        }
+
+        ippl::FDTDFieldState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/);
+        fdtd_state.particles.Q = scalar(cfg.charge / cfg.num_particles);
+        fdtd_state.particles.mass = scalar(cfg.mass / cfg.num_particles);
+        auto rv = fdtd_state.particles.R.getView();
+        auto rm1 = fdtd_state.particles.R_nm1.getView();
+        size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma);
+        fdtd_state.particles.Q = cfg.charge / actual_pc;
+        fdtd_state.particles.mass = cfg.mass / actual_pc;
+        //fdtd_state.scatterBunch();
+        //std::cout << cfg.charge << "\n";
+        
+        size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
+        for(int i = 0;i < timesteps_required;i++){
+            fdtd_state.J = scalar(0.0);
+            fdtd_state.scatterBunch();
+            //std::cout << fdtd_state.J.getVolumeIntegral() << "\n";
+            fdtd_state.fieldStep();
+            fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
+            //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n";
+            //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n";
+
+            if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
+                int img_height = 500;
+                int img_width = int(500.0 * cfg.extents[2] / cfg.extents[0]);
+                float* imagedata = new float[img_width * img_height * 3];
+                uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
+                std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float));
+                auto phmirror = fdtd_state.particles.R.getHostMirror();
+                Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView());
+                for(size_t hi = 0;hi < phmirror.extent(0);hi++){
+                    ippl::Vector<scalar, 3> ppos = phmirror(hi);
+                    ppos -= mesh.getOrigin();
+                    ppos /= cfg.extents.cast<scalar>();
+                    int x_imgcoord = ppos[2] * img_width;
+                    int y_imgcoord = ppos[0] * img_height;
+                    //printf("%d, %d\n", x_imgcoord, y_imgcoord);
+                    if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){
+                        const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles);
+                        //std::cout << intensity << "\n";
+                        imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = 
+                        std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity);
+                    }
+                };
+                    
+                char output[1024] = {0};
+                snprintf(output, 1023, "../data/outimage%.05d.bmp", i);
+                std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;});
+                stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
+                delete[] imagedata;
+                delete[] imagedata_final;
+            }
+        }
+    }
+    ippl::finalize();
+}
\ No newline at end of file

From c8cb043d3098daded79d2afee8aeb325b47bf1a3 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 13 Apr 2024 18:09:26 +0200
Subject: [PATCH 02/32] Working on MPI

---
 CMakeLists.txt                  |   2 +-
 config.json                     |  84 +++++++++++++
 src/Particle/ParticleAttrib.hpp |  10 ++
 test/solver/TestFDTDSolver.cpp  | 214 +++++++++++++++++++++-----------
 4 files changed, 239 insertions(+), 71 deletions(-)
 create mode 100644 config.json

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 46a2ac866..058bfab5f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,7 +107,7 @@ set (CMAKE_CXX_STANDARD 20)
 set (CMAKE_CUDA_EXTENSIONS OFF)
 set (CMAKE_CXX_EXTENSIONS OFF)
 set (CMAKE_CXX_STANDARD_REQUIRED ON)
-set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINGO} -O3 -g ")
+set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g")
 set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
 #set (CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g")
 
diff --git a/config.json b/config.json
new file mode 100644
index 000000000..5b5c053e4
--- /dev/null
+++ b/config.json
@@ -0,0 +1,84 @@
+{
+    "timestep-ratio" : 1.0,
+    "mesh": {
+      "length-scale" : "micrometers",
+      "time-scale"   : "picoseconds",
+      "extents": [3400.0, 3400.0, 280.0],
+      "resolution": [64, 64, 2800],
+      "mesh-center": [0.0, 0.0, 0.0],
+      "total-time": 30000.0,
+      "bunch-time-step": 1.6,
+      "mesh-truncation-order": 2,
+      "space-charge": false,
+      "solver": "NSFD"
+    },
+    "bunch": {
+      "type": "ellipsoid",
+      "distribution": "uniform",
+      "_charge": 1,
+      "_mass": 1,
+      "charge": 1.846e8,
+      "mass": 1.846e8,
+      "number-of-particles": 500000,
+      "gamma": 100.41,
+      "_gamma": 1.0,
+      "direction (ignored)": [0.0, 0.0, 1.0],
+      "position is 0 because that's the center": "",
+      "position": [0.0, 0.0, 0.0],
+      "_sigma-position (those are commented out)": [260.0, 260.0, 50.25],
+      "_sigma-momentum (those are commented out)": [1.0e-8, 1.0e-8, 100.41e-4],
+      "sigma-position": [260.0, 260.0, 50.25],
+      "sigma-momentum": [1.0e-8, 1.0e-8, 100.41e-4],
+      "particle-gammabetas": [0, 0, 0],
+      "transverse-truncation   (Deprecated! see distribution-truncations)": 1040.0,
+      "longitudinal-truncation (Deprecated! see distribution-truncations)": 90.0,
+      "distribution-truncations" : [1040.0, 1040.0, 90.0],
+      "bunching-factor": 0.01,
+      "update-rule" : {"type" : "lorentz"}
+    },
+    "field": {
+      "update-rule" : "do",
+      "strength" : 100,
+      "field-sampling": {
+        "sample": true,
+        "type": "at-point",
+        "field": ["Ex", "Ey", "Ez"],
+        "directory": "./",
+        "base-name": "field-sampling/field",
+        "rhythm": 3.2,
+        "position": [0.0, 0.0, 110.0]
+      }
+    },
+    "undulator": {
+      "static-undulator": {
+        "undulator-parameter": 1.417,
+        "_undulator-parameter": 0,
+        "period": 30000.0,
+        "length": 5e6,
+        "polarization-angle": 0.0
+      }
+    },
+    "output":{
+      "rhythm" : 30,
+      "count" : 20,
+      "type" : "E",
+      "track" : {
+        "radiation" : "boundary_radiation.txt",
+        "particle-position" : "p0pos.txt"
+      }
+    },
+    "fel-output": {
+      "radiation-power": {
+        "sample": true,
+        "type": "at-point",
+        "directory": "./",
+        "base-name": "power-sampling/power-NSFD",
+        "plane-position": 110.0,
+        "normalized-frequency": 1.0
+      }
+    },
+    "experimentation" : {
+      "stretch-factor" : 1.0,
+      "resort" : 1.0
+    }
+  }
diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index 9f22b72be..e53ae6402 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -194,6 +194,16 @@ namespace ippl {
                 Vector<size_t, Field::dim> args = (index - lDom.first() + nghost).template cast<size_t>();
 
                 // gather
+                for(unsigned d = 0;d < Field::dim;d++){
+                    if(args[d] >= view.extent(d)){
+                        dview_m(idx) = T(0);
+                        return;
+                    }
+                    else if(args[d] < 1){
+                        dview_m(idx) = T(0);
+                        return;
+                    }
+                }
                 dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{},
                                                        view, wlo, whi, args);
             });
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 65446b03f..54f099246 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -707,7 +707,6 @@ size_t initialize_bunch_mithra(
             std::cout << "Pos before boost: " << c.rnp << "\n";
     }
     boost_bunch(oof, frame_gamma);
-    std::cout << "boost gamma" << frame_gamma << "\n";
     for(auto& c : oof){
          if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){
             std::cout << "Pos after boost: " << c.rnp << "\n";
@@ -748,12 +747,7 @@ size_t initialize_bunch_mithra(
         gbview(i) = dgammabetas(i);
     });
     Kokkos::fence();
-    ippl::Vector<scalar, 3> meanpos = bunch.R.sum() * (1.0 / oof.size());
     
-    Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){
-        rview(i) -= meanpos;
-        rm1view(i) -= meanpos;
-    });
     return oof.size();
 }
 
@@ -832,7 +826,7 @@ KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gr
     return ret;
 }
 template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
     if(
         ipos[0] < 0
@@ -861,7 +855,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr,
     assert(one_minus_fracpos[2] >= 0.0f);
     assert(one_minus_fracpos[2] <= 1.0f);
     scalar accum = 0;
-    
+    ipos -= ldom.first();
     for(unsigned i = 0;i < 8;i++){
         scalar weight = 1;
         ippl::Vector<int, 3> ipos_l = ipos;
@@ -877,7 +871,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr,
     assert(abs(accum - 1.0f) < 1e-6f);
 }
 template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
     //std::cout << "Value: " << value << "\n";
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
     if(
@@ -908,7 +902,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr,
     assert(one_minus_fracpos[2] >= 0.0f);
     assert(one_minus_fracpos[2] <= 1.0f);
     scalar accum = 0;
-    
+    ipos -= ldom.first();
     for(unsigned i = 0;i < 8;i++){
         scalar weight = 1;
         ippl::Vector<int, 3> ipos_l = ipos;
@@ -926,7 +920,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector<scalar, 3> hr,
 }
 
 template<typename view_type, typename scalar>
-KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
+KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
 
     
     Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
@@ -943,7 +937,7 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector<sca
     //to_ipos += ippl::Vector<int, 3>(nghost);
     
     if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
-        scatterToGrid(Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
         
         return;
     }
@@ -957,8 +951,8 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector<sca
                        max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i],
                            scalar(0.5) * (to[i] + from[i])));
     }
-    scatterToGrid(Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
-    scatterToGrid(Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
 }
 
 // END PREAMBLE
@@ -1195,7 +1189,7 @@ struct second_order_abc_corner{
 
 struct second_order_mur_boundary_conditions{
     template<typename field_type, typename dt_type>
-    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr){
+    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr, ippl::NDIndex<3> lDom){
         using scalar = decltype(dt);
         //TODO: tbh don't know
         //const unsigned nghost = 1;
@@ -1206,32 +1200,45 @@ struct second_order_mur_boundary_conditions{
         auto A_n   = FA_n.getView();
         auto A_np1 = FA_np1.getView();
         auto A_nm1 = FA_nm1.getView();
-        
+        ippl::Vector<uint32_t, 3> local_nr{
+            uint32_t(A_n.extent(0)),
+            uint32_t(A_n.extent(1)),
+            uint32_t(A_n.extent(2))
+        };
         Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-                             + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5);
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+            
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
+                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+
             if(Kokkos::popcount(val) == 1){
-                if(i == 0){
+                if(ig == 0){
                     second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(j == 0){
+                if(jg == 0){
                     second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(k == 0){
+                if(kg == 0){
                     second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(i == true_nr[0] - 1){
+                if(ig == true_nr[0] - 1){
                     second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(j == true_nr[1] - 1){
+                if(jg == true_nr[1] - 1){
                     second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(k == true_nr[2] - 1){
+                if(kg == true_nr[2] - 1){
                     second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
@@ -1239,57 +1246,65 @@ struct second_order_mur_boundary_conditions{
         });
         Kokkos::fence();
         Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-                             + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5);
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 2)return;
+            uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
+                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
             if(Kokkos::popcount(val) == 2){ //Edge
-                if(i == 0 && k == 0){
+                if(ig == 0 && kg == 0){
                     second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == 0 && j == 0){
+                else if(ig == 0 && jg == 0){
                     second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(j == 0 && k == 0){
+                else if(jg == 0 && kg == 0){
                     second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
 
-                else if(i == 0 && k == true_nr[2] - 1){
+                else if(ig == 0 && kg == true_nr[2] - 1){
                     second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == 0 && j == true_nr[1] - 1){
+                else if(ig == 0 && jg == true_nr[1] - 1){
                     second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(j == 0 && k == true_nr[2] - 1){
+                else if(jg == 0 && kg == true_nr[2] - 1){
                     second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
 
-                else if(i == true_nr[0] - 1 && k == 0){
+                else if(ig == true_nr[0] - 1 && kg == 0){
                     second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == true_nr[0] - 1 && j == 0){
+                else if(ig == true_nr[0] - 1 && jg == 0){
                     second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(j == true_nr[1] - 1 && k == 0){
+                else if(jg == true_nr[1] - 1 && kg == 0){
                     second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
 
-                else if(i == true_nr[0] - 1 && k == true_nr[2] - 1){
+                else if(ig == true_nr[0] - 1 && kg == true_nr[2] - 1){
                     second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == true_nr[0] - 1 && j == true_nr[1] - 1){
+                else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1){
                     second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(j == true_nr[1] - 1 && k == true_nr[2] - 1){
+                else if(jg == true_nr[1] - 1 && kg == true_nr[2] - 1){
                     second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
@@ -1300,40 +1315,48 @@ struct second_order_mur_boundary_conditions{
         });
         Kokkos::fence();
         Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-                             + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5);
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+            //             + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            //if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
+                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
             
             if(Kokkos::popcount(val) == 3){
                 //printf("Corner: %d, %d, %d\n", i, j, k);
-                if(i == 0 && j == 0 && k == 0){
+                if(ig == 0 && jg == 0 && kg == 0){
                     second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == true_nr[0] - 1 && j == 0 && k == 0){
+                else if(ig == true_nr[0] - 1 && jg == 0 && kg == 0){
                     second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == 0 && j == true_nr[1] - 1 && k == 0){
+                else if(ig == 0 && jg == true_nr[1] - 1 && kg == 0){
                     second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == 0){
+                else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == 0){
                     second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == 0 && j == 0 && k == true_nr[2] - 1){
+                else if(ig == 0 && jg == 0 && kg == true_nr[2] - 1){
                     second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == true_nr[0] - 1 && j == 0 && k == true_nr[2] - 1){
+                else if(ig == true_nr[0] - 1 && jg == 0 && kg == true_nr[2] - 1){
                     second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == 0 && j == true_nr[1] - 1 && k == true_nr[2] - 1){
+                else if(ig == 0 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){
                     second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == true_nr[2] - 1){
+                else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){
                     second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
@@ -1444,6 +1467,7 @@ namespace ippl {
         ippl::Vector<uint32_t, 3> nr_local;
         scalar dt;
         Mesh_t* mesh_mp;
+        FieldLayout<dim>* layout_mp;
         ParticleSpatialLayout<scalar, 3> playout;
         Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
         using bunch_type =  Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
@@ -1456,7 +1480,7 @@ namespace ippl {
          * @param layout 
          * @param mesch 
          */
-        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), playout(layout, mesch), particles(playout){
+        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout){
             FA_np1.initialize(mesch, layout, 1);
             FA_n.initialize(mesch, layout, 1);
             FA_nm1.initialize(mesch, layout, 1);
@@ -1528,14 +1552,16 @@ namespace ippl {
             auto orig = mesh_mp->getOrigin();
             auto hr = mesh_mp->getMeshSpacing();
             auto dt = this->dt;
+            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
             Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
                 Vector_t pos = rview(i);
                 Vector_t to = rview(i);
                 Vector_t from = rm1view(i);
                 //scatterToGrid(Jview,hr, orig, pos, qview(i) / volume);
-                scatterLineToGrid(Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
+                scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
             });
             Kokkos::fence();
+            J.accumulateHalo();
         }
 
         void fieldStep(){
@@ -1547,11 +1573,13 @@ namespace ippl {
                 .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0])  - 2 * calA * sq(dt / hr_m[1]),
                 .a8 = sq(dt)
             };
-            if(periodic_bc){
-                FA_n.getFieldBC().apply(FA_n);
-            }
+            //if(periodic_bc){
+            //    FA_n.getFieldBC().apply(FA_n);
+            //}
             auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
             auto source = this->J.getView();
+            FA_nm1.fillHalo();
+            FA_n.fillHalo();
             Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
                 A_np1(i, j, k) =    -A_nm1(i,j,k)
                         + ndisp.a1 * A_n  (i,j,k)
@@ -1565,8 +1593,9 @@ namespace ippl {
             
             if(!periodic_bc){
                 second_order_mur_boundary_conditions bc;
+                const auto& ldom = layout_mp->getLocalNDIndex();
                 ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
-                bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr);
+                bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom);
             }
             Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView());
             Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView());
@@ -1700,7 +1729,8 @@ int main(int argc, char* argv[]) {
         ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
 
         std::array<bool, 3> isParallel;
-        isParallel.fill(true);
+        isParallel.fill(false);
+        isParallel[2] = true;
 
         // all parallel layout, standard domain, normal axis order
         ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
@@ -1718,35 +1748,57 @@ int main(int argc, char* argv[]) {
         }
 
         ippl::FDTDFieldState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/);
-        fdtd_state.particles.Q = scalar(cfg.charge / cfg.num_particles);
-        fdtd_state.particles.mass = scalar(cfg.mass / cfg.num_particles);
-        auto rv = fdtd_state.particles.R.getView();
-        auto rm1 = fdtd_state.particles.R_nm1.getView();
-        size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma);
-        fdtd_state.particles.Q = cfg.charge / actual_pc;
-        fdtd_state.particles.mass = cfg.mass / actual_pc;
+        
+        if(ippl::Comm->rank() == 0){
+            std::cout << "Init particles: " << std::endl;
+            size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma);
+            fdtd_state.particles.Q = cfg.charge / actual_pc;
+            fdtd_state.particles.mass = cfg.mass / actual_pc;
+        }
+        else{
+            fdtd_state.particles.create(0);
+        }
+        {
+            auto rview = fdtd_state.particles.R.getView();
+            auto rm1view = fdtd_state.particles.R_nm1.getView();
+            ippl::Vector<scalar, 3> meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum());
+    
+            Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                rview(i) -= meanpos;
+                rm1view(i) -= meanpos;
+            });
+        }
         //fdtd_state.scatterBunch();
         //std::cout << cfg.charge << "\n";
         
         size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
-        for(int i = 0;i < timesteps_required;i++){
+        for(size_t i = 0;i < timesteps_required;i++){
+            if(ippl::Comm->rank() == 0){
+                std::cout << "Step: " << i << std::endl;
+            }
             fdtd_state.J = scalar(0.0);
+            fdtd_state.playout.update(fdtd_state.particles);
             fdtd_state.scatterBunch();
             //std::cout << fdtd_state.J.getVolumeIntegral() << "\n";
             fdtd_state.fieldStep();
             fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
             //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n";
             //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n";
-
+            int rank = ippl::Comm->rank();
+            int size = ippl::Comm->size();
             if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
+
+                auto ldom = layout.getLocalNDIndex();
                 int img_height = 500;
                 int img_width = int(500.0 * cfg.extents[2] / cfg.extents[0]);
                 float* imagedata = new float[img_width * img_height * 3];
+                float* idata_recvbuffer = new float[img_width * img_height * 3];
+                int floatcount = img_width * img_height * 3;
                 uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
                 std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float));
                 auto phmirror = fdtd_state.particles.R.getHostMirror();
                 Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView());
-                for(size_t hi = 0;hi < phmirror.extent(0);hi++){
+                for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){
                     ippl::Vector<scalar, 3> ppos = phmirror(hi);
                     ppos -= mesh.getOrigin();
                     ppos /= cfg.extents.cast<scalar>();
@@ -1760,12 +1812,34 @@ int main(int argc, char* argv[]) {
                         std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity);
                     }
                 };
-                    
-                char output[1024] = {0};
-                snprintf(output, 1023, "../data/outimage%.05d.bmp", i);
-                std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;});
-                stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
+                int mask = 1;
+                while (mask < size) {
+                    int partner = rank ^ mask;
+                    //if((rank & (mask - 1)) == 0)
+                    {
+                        if ((rank & mask) == 0) {
+                            // Send data to partner
+                            MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE);
+                            // Apply image summation
+                            for(int f = 0;f < floatcount;f++){
+                                imagedata[f] += idata_recvbuffer[f];
+                            }
+                        } else {
+                            MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator());
+                            // Receive data from partner and apply reduction
+                            
+                        }
+                    }
+                    mask <<= 1;  // Move to next bit position for pairing
+                }
+                if(rank == 0){
+                    char output[1024] = {0};
+                    snprintf(output, 1023, "../data/outimage%.05d.bmp", i);
+                    std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;});
+                    stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
+                }
                 delete[] imagedata;
+                delete[] idata_recvbuffer;
                 delete[] imagedata_final;
             }
         }

From 2b25e729b7334fc7deea7ec819b55d5a0235f9a4 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 14 Apr 2024 00:44:31 +0200
Subject: [PATCH 03/32] ABC working with mpi

---
 src/Types/Vector.h             |   2 +-
 test/solver/TestFDTDSolver.cpp | 180 +++++++++++++++++++++++----------
 2 files changed, 129 insertions(+), 53 deletions(-)

diff --git a/src/Types/Vector.h b/src/Types/Vector.h
index 56963f864..0f1d00b96 100644
--- a/src/Types/Vector.h
+++ b/src/Types/Vector.h
@@ -66,7 +66,7 @@ namespace ippl {
 #ifndef __CUDA_ARCH__
             using std::sqrt;
 #endif
-            return sqrt(squaredNorm());
+            return Kokkos::sqrt(squaredNorm());
         }
         KOKKOS_INLINE_FUNCTION Vector<T, Dim> normalized() const noexcept { return *this / norm(); }
         KOKKOS_INLINE_FUNCTION value_type sum() const noexcept {
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 54f099246..6ef9c0210 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -5,6 +5,7 @@
 #include "Field/Field.h"
 
 #include "MaxwellSolvers/FDTD.h"
+#define JSON_HAS_RANGES 0 //Merlin compilation
 #include <json.hpp>
 #include <fstream>
 #include <list>
@@ -40,7 +41,17 @@ KOKKOS_INLINE_FUNCTION bool isINF(double x){
     #endif
 }
 #define assert_isreal(X) assert(!isNaN(X) && !isINF(X))
-
+template<unsigned int Dim, typename callable, typename... Ts>
+KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> from, ippl::Vector<uint32_t, Dim> to, Ts... args){
+    if constexpr(sizeof...(Ts) == Dim){
+        c(args...);
+    }
+    else{
+        for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){
+            serial_for(c, from, to, args..., i);
+        }
+    }
+}
 
 
 
@@ -647,6 +658,7 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
      * macro-particles and perform the corresponding changes.                                         	*/
     bunchInit.numberOfParticles_ = chargeVector.size();
 }
+
 template<typename Double>
 void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma){
     Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma);
@@ -828,6 +840,7 @@ KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gr
 template<typename view_type, typename scalar>
 KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+    ipos -= ldom.first();
     if(
         ipos[0] < 0
         ||ipos[1] < 0
@@ -855,7 +868,7 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
     assert(one_minus_fracpos[2] >= 0.0f);
     assert(one_minus_fracpos[2] <= 1.0f);
     scalar accum = 0;
-    ipos -= ldom.first();
+    
     for(unsigned i = 0;i < 8;i++){
         scalar weight = 1;
         ippl::Vector<int, 3> ipos_l = ipos;
@@ -874,6 +887,7 @@ template<typename view_type, typename scalar>
 KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
     //std::cout << "Value: " << value << "\n";
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+    ipos -= ldom.first();
     if(
         ipos[0] < 0
         ||ipos[1] < 0
@@ -902,7 +916,7 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
     assert(one_minus_fracpos[2] >= 0.0f);
     assert(one_minus_fracpos[2] <= 1.0f);
     scalar accum = 0;
-    ipos -= ldom.first();
+    
     for(unsigned i = 0;i < 8;i++){
         scalar weight = 1;
         ippl::Vector<int, 3> ipos_l = ipos;
@@ -930,7 +944,7 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view
         //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
         //    std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " << from_grid.first << " " << to_grid.first << std::endl;
         //}
-        assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
+        //assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
     }
     //const uint32_t nghost = g.nghost();
     //from_ipos += ippl::Vector<int, 3>(nghost);
@@ -1205,7 +1219,9 @@ struct second_order_mur_boundary_conditions{
             uint32_t(A_n.extent(1)),
             uint32_t(A_n.extent(2))
         };
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+        constexpr uint32_t min_abc_boundary = 1;
+        constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1;
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
             uint32_t ig = i + lDom.first()[0];
             uint32_t jg = j + lDom.first()[1];
             uint32_t kg = k + lDom.first()[2];
@@ -1214,38 +1230,39 @@ struct second_order_mur_boundary_conditions{
                          + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
 
             if(Kokkos::popcount(lval) > 1)return;
-            uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
-                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
 
             if(Kokkos::popcount(val) == 1){
-                if(ig == 0){
+                if(ig == min_abc_boundary){
                     second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(jg == 0){
+                if(jg == min_abc_boundary){
                     second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(kg == 0){
+                if(kg == min_abc_boundary){
                     second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(ig == true_nr[0] - 1){
+                if(ig == true_nr[0] - max_abc_boundary_sub){
                     second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(jg == true_nr[1] - 1){
+                if(jg == true_nr[1] - max_abc_boundary_sub){
                     second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                if(kg == true_nr[2] - 1){
+                if(kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
             }
         });
         Kokkos::fence();
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+        FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
             uint32_t ig = i + lDom.first()[0];
             uint32_t jg = j + lDom.first()[1];
             uint32_t kg = k + lDom.first()[2];
@@ -1254,57 +1271,57 @@ struct second_order_mur_boundary_conditions{
                          + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
 
             if(Kokkos::popcount(lval) > 2)return;
-            uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
-                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
             if(Kokkos::popcount(val) == 2){ //Edge
-                if(ig == 0 && kg == 0){
+                if(ig == min_abc_boundary && kg == min_abc_boundary){
                     second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == 0 && jg == 0){
+                else if(ig == min_abc_boundary && jg == min_abc_boundary){
                     second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(jg == 0 && kg == 0){
+                else if(jg == min_abc_boundary && kg == min_abc_boundary){
                     second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
 
-                else if(ig == 0 && kg == true_nr[2] - 1){
+                else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == 0 && jg == true_nr[1] - 1){
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){
                     second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(jg == 0 && kg == true_nr[2] - 1){
+                else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
 
-                else if(ig == true_nr[0] - 1 && kg == 0){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){
                     second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == true_nr[0] - 1 && jg == 0){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){
                     second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(jg == true_nr[1] - 1 && kg == 0){
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
                     second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
 
-                else if(ig == true_nr[0] - 1 && kg == true_nr[2] - 1){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){
                     second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(jg == true_nr[1] - 1 && kg == true_nr[2] - 1){
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
                     A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
@@ -1314,7 +1331,8 @@ struct second_order_mur_boundary_conditions{
             }
         });
         Kokkos::fence();
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+        FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
             uint32_t ig = i + lDom.first()[0];
             uint32_t jg = j + lDom.first()[1];
             uint32_t kg = k + lDom.first()[2];
@@ -1323,40 +1341,40 @@ struct second_order_mur_boundary_conditions{
             //             + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
 
             //if(Kokkos::popcount(lval) > 1)return;
-            uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
-                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
             
             if(Kokkos::popcount(val) == 3){
                 //printf("Corner: %d, %d, %d\n", i, j, k);
-                if(ig == 0 && jg == 0 && kg == 0){
+                if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){
                     second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == true_nr[0] - 1 && jg == 0 && kg == 0){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){
                     second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == 0 && jg == true_nr[1] - 1 && kg == 0){
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
                     second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == 0){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
                     second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == 0 && jg == 0 && kg == true_nr[2] - 1){
+                else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == true_nr[0] - 1 && jg == 0 && kg == true_nr[2] - 1){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == 0 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
-                else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
                     second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
                     A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
                 }
@@ -1468,7 +1486,8 @@ namespace ippl {
         scalar dt;
         Mesh_t* mesh_mp;
         FieldLayout<dim>* layout_mp;
-        ParticleSpatialLayout<scalar, 3> playout;
+        using playout_type = ParticleSpatialLayout<scalar, 3>;
+        playout_type playout;
         Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
         using bunch_type =  Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
 
@@ -1580,30 +1599,42 @@ namespace ippl {
             auto source = this->J.getView();
             FA_nm1.fillHalo();
             FA_n.fillHalo();
+            ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
+            const auto& ldom = layout_mp->getLocalNDIndex();
             Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                A_np1(i, j, k) =    -A_nm1(i,j,k)
-                        + ndisp.a1 * A_n  (i,j,k)
-                        + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1))
-                        + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1))
-                        + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1))
-                        + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1))
-                        + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k);
+                uint32_t ig = i + ldom.first()[0];
+                uint32_t jg = j + ldom.first()[1];
+                uint32_t kg = k + ldom.first()[2];
+                uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2)
+                             + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5);
+                if(!val){
+                    A_np1(i, j, k) =    -A_nm1(i,j,k)
+                            + ndisp.a1 * A_n  (i,j,k)
+                            + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1))
+                            + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1))
+                            + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1))
+                            + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1))
+                            + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k);
+                }
             });
             Kokkos::fence();
             
             if(!periodic_bc){
+                FA_np1.fillHalo();
                 second_order_mur_boundary_conditions bc;
-                const auto& ldom = layout_mp->getLocalNDIndex();
-                ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
+                
+                
                 bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom);
             }
             Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView());
             Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView());
             //std::swap(this->A_n, this->A_nm1);
             //std::swap(this->A_np1, this->A_n);
+            
             evaluate_EB();
         }
         void evaluate_EB(){
+            FA_n.fillHalo();FA_nm1.fillHalo();
             ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
             const scalar idt = scalar(1.0) / dt;
             auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
@@ -1679,6 +1710,27 @@ namespace ippl {
                 });
                 Kokkos::fence();
             }
+            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
+            size_t invalid_count = 0;
+            auto origo = mesh_mp->getOrigin();
+            ippl::Vector<scalar, 3> extenz;//
+            extenz[0] = nr_global[0] * hr_m[0];
+            extenz[1] = nr_global[1] * hr_m[1];
+            extenz[2] = nr_global[2] * hr_m[2];
+            Kokkos::parallel_reduce(
+                Kokkos::RangePolicy<typename playout_type::RegionLayout_t::view_type::execution_space>(0, particles.getLocalNum()),
+                KOKKOS_LAMBDA(size_t i, size_t& ref){
+                    bool out_of_bounds = false;
+                    ippl::Vector<scalar, dim> ppos = rview(i);
+                    for(size_t d = 0;d < dim;d++){
+                        out_of_bounds |= (ppos[d] <= origo[d]);
+                        out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain
+                    }
+                    invalid(i) = out_of_bounds;
+                    ref += out_of_bounds;
+                }, 
+                invalid_count);
+            particles.destroy(invalid, invalid_count);
             Kokkos::fence();
             
         }
@@ -1772,6 +1824,7 @@ int main(int argc, char* argv[]) {
         //std::cout << cfg.charge << "\n";
         
         size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
+        
         for(size_t i = 0;i < timesteps_required;i++){
             if(ippl::Comm->rank() == 0){
                 std::cout << "Step: " << i << std::endl;
@@ -1789,9 +1842,10 @@ int main(int argc, char* argv[]) {
             if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
 
                 auto ldom = layout.getLocalNDIndex();
-                int img_height = 500;
-                int img_width = int(500.0 * cfg.extents[2] / cfg.extents[0]);
+                int img_height = 1000;
+                int img_width = int(1000.0 * cfg.extents[2] / cfg.extents[0]);
                 float* imagedata = new float[img_width * img_height * 3];
+                std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
                 float* idata_recvbuffer = new float[img_width * img_height * 3];
                 int floatcount = img_width * img_height * 3;
                 uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
@@ -1812,6 +1866,28 @@ int main(int argc, char* argv[]) {
                         std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity);
                     }
                 };
+                auto ebh = fdtd_state.EB.getHostMirror();
+                Kokkos::deep_copy(ebh, fdtd_state.EB.getView());
+
+                //double exp_avg = double(exp_sum) / double(acount);
+                {
+                    for(int i = 1;i < img_width;i++){
+                        for(int j = 1;j < img_height;j++){
+                            int i_remap = (double(i) / (img_width  - 1)) * (fdtd_state.nr_global[2] - 4) + 2;
+                            int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2;
+                            if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){
+                                if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){
+                                    ippl::Vector<ippl::Vector<scalar, 3>, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
+                                    ippl::Vector<scalar, 3> poynting = acc[0].cross(acc[1]);
+                                    if(poynting.norm() > 0){
+                                        //std::cout << poynting.norm() << "\n";
+                                    }
+                                    imagedata[(j * img_width + i) * 3 + 0] += std::sqrt(poynting.norm()) * 0.5f;//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                }
+                            }
+                        }
+                    }
+                }
                 int mask = 1;
                 while (mask < size) {
                     int partner = rank ^ mask;
@@ -1835,7 +1911,7 @@ int main(int argc, char* argv[]) {
                 if(rank == 0){
                     char output[1024] = {0};
                     snprintf(output, 1023, "../data/outimage%.05d.bmp", i);
-                    std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;});
+                    std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
                     stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
                 }
                 delete[] imagedata;

From cfba362e3894944849263b1bbbff1f3aff6c46bb Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 14 Apr 2024 02:30:00 +0200
Subject: [PATCH 04/32] timing

---
 test/solver/TestFDTDSolver.cpp | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 6ef9c0210..f803938a1 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -3,6 +3,7 @@
 #include "Types/Vector.h"
 
 #include "Field/Field.h"
+#include <chrono>
 
 #include "MaxwellSolvers/FDTD.h"
 #define JSON_HAS_RANGES 0 //Merlin compilation
@@ -11,6 +12,12 @@
 #include <list>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.hpp>
+
+uint64_t nanoTime(){
+    using namespace std;
+    using namespace chrono;
+    return duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count();
+}
 //CONFIG
 KOKKOS_INLINE_FUNCTION bool isNaN(float x){
     #ifdef __CUDA_ARCH__
@@ -1261,7 +1268,7 @@ struct second_order_mur_boundary_conditions{
             }
         });
         Kokkos::fence();
-        FA_np1.fillHalo();
+        //FA_np1.fillHalo();
         Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
             uint32_t ig = i + lDom.first()[0];
             uint32_t jg = j + lDom.first()[1];
@@ -1331,7 +1338,7 @@ struct second_order_mur_boundary_conditions{
             }
         });
         Kokkos::fence();
-        FA_np1.fillHalo();
+        //FA_np1.fillHalo();
         Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
             uint32_t ig = i + lDom.first()[0];
             uint32_t jg = j + lDom.first()[1];
@@ -1824,11 +1831,8 @@ int main(int argc, char* argv[]) {
         //std::cout << cfg.charge << "\n";
         
         size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
-        
+        uint64_t starttime =  nanoTime();
         for(size_t i = 0;i < timesteps_required;i++){
-            if(ippl::Comm->rank() == 0){
-                std::cout << "Step: " << i << std::endl;
-            }
             fdtd_state.J = scalar(0.0);
             fdtd_state.playout.update(fdtd_state.particles);
             fdtd_state.scatterBunch();
@@ -1917,8 +1921,10 @@ int main(int argc, char* argv[]) {
                 delete[] imagedata;
                 delete[] idata_recvbuffer;
                 delete[] imagedata_final;
-            }
+            }    
         }
+        uint64_t endtime = nanoTime();
+        std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
     }
     ippl::finalize();
 }
\ No newline at end of file

From 9da50c92f3be2e4be138dd8711f6d3a972df9585 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 14 Apr 2024 10:01:14 +0200
Subject: [PATCH 05/32] Remove gathers

---
 src/Particle/ParticleAttrib.hpp |  9 +++++----
 test/solver/TestFDTDSolver.cpp  | 18 +++++++++++-------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index e53ae6402..9ee77e3e3 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -159,10 +159,11 @@ namespace ippl {
         constexpr unsigned Dim = Field::dim;
         using PositionType     = typename Field::Mesh_t::value_type;
 
-        static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo");
-        IpplTimings::startTimer(fillHaloTimer);
-        f.fillHalo();
-        IpplTimings::stopTimer(fillHaloTimer);
+        //This completely klönks performance for subdivided particle timesteps
+        //static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo");
+        //IpplTimings::startTimer(fillHaloTimer);
+        //f.fillHalo();
+        //IpplTimings::stopTimer(fillHaloTimer);
 
         static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("gather");
         IpplTimings::startTimer(gatherTimer);
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index f803938a1..c59c77875 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -1497,6 +1497,7 @@ namespace ippl {
         playout_type playout;
         Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
         using bunch_type =  Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+        config m_config;
 
         /**
          * @brief Construct a new FDTDFieldState object
@@ -1506,7 +1507,7 @@ namespace ippl {
          * @param layout 
          * @param mesch 
          */
-        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout){
+        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){
             FA_np1.initialize(mesch, layout, 1);
             FA_n.initialize(mesch, layout, 1);
             FA_nm1.initialize(mesch, layout, 1);
@@ -1604,7 +1605,7 @@ namespace ippl {
             //}
             auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
             auto source = this->J.getView();
-            FA_nm1.fillHalo();
+            //FA_nm1.fillHalo();
             FA_n.fillHalo();
             ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
             const auto& ldom = layout_mp->getLocalNDIndex();
@@ -1641,7 +1642,7 @@ namespace ippl {
             evaluate_EB();
         }
         void evaluate_EB(){
-            FA_n.fillHalo();FA_nm1.fillHalo();
+            FA_n.fillHalo();//FA_nm1.fillHalo();
             ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
             const scalar idt = scalar(1.0) / dt;
             auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
@@ -1679,9 +1680,11 @@ namespace ippl {
             auto rp1view = particles.R_np1.getView();
             scalar bunch_dt = dt / 3;
             Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
+            EB.fillHalo();
             Kokkos::fence();
             for(int bts = 0;bts < 3;bts++){
-                gather(particles.EB_gather, EB, particles.R);
+                
+                particles.EB_gather.gather(EB, particles.R);
                 Kokkos::fence();
                 Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
                     const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
@@ -1806,7 +1809,7 @@ int main(int argc, char* argv[]) {
             abort();
         }
 
-        ippl::FDTDFieldState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/);
+        ippl::FDTDFieldState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
         
         if(ippl::Comm->rank() == 0){
             std::cout << "Init particles: " << std::endl;
@@ -1827,6 +1830,7 @@ int main(int argc, char* argv[]) {
                 rm1view(i) -= meanpos;
             });
         }
+        fdtd_state.particles.setParticleBC(ippl::NO);
         //fdtd_state.scatterBunch();
         //std::cout << cfg.charge << "\n";
         
@@ -1846,8 +1850,8 @@ int main(int argc, char* argv[]) {
             if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
 
                 auto ldom = layout.getLocalNDIndex();
-                int img_height = 1000;
-                int img_width = int(1000.0 * cfg.extents[2] / cfg.extents[0]);
+                int img_height = 400;
+                int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]);
                 float* imagedata = new float[img_width * img_height * 3];
                 std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
                 float* idata_recvbuffer = new float[img_width * img_height * 3];

From 24ff0a6186905f1d30082446dac4debd65893c17 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 14 Apr 2024 10:42:09 +0200
Subject: [PATCH 06/32] Custom output path

---
 test/solver/TestFDTDSolver.cpp | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index c59c77875..9ea06d754 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -96,6 +96,7 @@ struct config {
     scalar undulator_length;
 
     uint32_t output_rhythm;
+    std::string output_path;
     std::unordered_map<std::string, double> experiment_options;
 };
 template<typename scalar, unsigned Dim>
@@ -292,6 +293,13 @@ config read_config(const char *filepath){
     ret.position_truncations = getVector<config::scalar, 3>(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters;
     ret.sigma_momentum = getVector<config::scalar, 3>(j["bunch"]["sigma-momentum"]);
     ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0;
+    ret.output_path = "../data/";
+    if(j["output"].contains("path")){
+        ret.output_path = j["output"]["path"];
+        if(!ret.output_path.ends_with('/')){
+            ret.output_path.push_back('/');
+        }
+    }
     if(j.contains("experimentation")){
         nlohmann::json je = j["experimentation"];
         for(auto it = je.begin(); it!= je.end();it++){
@@ -1579,12 +1587,15 @@ namespace ippl {
             auto orig = mesh_mp->getOrigin();
             auto hr = mesh_mp->getMeshSpacing();
             auto dt = this->dt;
+            bool space_charge = m_config.space_charge;
             ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
             Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
                 Vector_t pos = rview(i);
                 Vector_t to = rview(i);
                 Vector_t from = rm1view(i);
-                //scatterToGrid(Jview,hr, orig, pos, qview(i) / volume);
+                if(space_charge){
+                    scatterToGrid(lDom, Jview,hr, orig, pos, qview(i) / volume);
+                }
                 scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
             });
             Kokkos::fence();
@@ -1918,7 +1929,8 @@ int main(int argc, char* argv[]) {
                 }
                 if(rank == 0){
                     char output[1024] = {0};
-                    snprintf(output, 1023, "../data/outimage%.05d.bmp", i);
+                    
+                    snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i);
                     std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
                     stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
                 }

From 35f2d01b46e6a74a4e5cde9a5ea34b7a664980bc Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 14 Apr 2024 19:05:11 +0200
Subject: [PATCH 07/32] Add useful radiation output

---
 test/solver/TestFDTDSolver.cpp | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 9ea06d754..f932c7da3 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -1847,6 +1847,10 @@ int main(int argc, char* argv[]) {
         
         size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
         uint64_t starttime =  nanoTime();
+        std::ofstream rad;
+        if(ippl::Comm->rank() == 0){
+            rad = std::ofstream("radiation.txt");
+        }
         for(size_t i = 0;i < timesteps_required;i++){
             fdtd_state.J = scalar(0.0);
             fdtd_state.playout.update(fdtd_state.particles);
@@ -1854,13 +1858,36 @@ int main(int argc, char* argv[]) {
             //std::cout << fdtd_state.J.getVolumeIntegral() << "\n";
             fdtd_state.fieldStep();
             fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
+            auto ldom = layout.getLocalNDIndex();
+            auto nrg = fdtd_state.nr_global;
+            auto ebv = fdtd_state.EB.getView();
+            double radiation = 0.0;
+            Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
+                uint32_t ig = i + ldom.first()[0];
+                uint32_t jg = j + ldom.first()[1];
+                uint32_t kg = k + ldom.first()[2];
+                if(kg == nrg[2] - 3){
+                    ref += ebv(i,j,k)[0].cross(ebv(i,j,k)[1])[2];
+                }
+
+            }, radiation);
+            double radiation_in_watt_on_this_rank = radiation *
+            unit_powerdensity_in_watt_per_square_meter *
+            fdtd_state.hr_m[0] * fdtd_state.hr_m[1] * unit_length_in_meters * unit_length_in_meters;
+            double radiation_in_watt_global = 0.0;
+            MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
+            if(ippl::Comm->rank() == 0){
+                ippl::Vector<scalar, 3> pos{0,0,0};
+                frame_boost.primedToUnprimed(pos, fdtd_state.dt * i);
+                rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
+            }
             //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n";
             //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n";
             int rank = ippl::Comm->rank();
             int size = ippl::Comm->size();
             if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
 
-                auto ldom = layout.getLocalNDIndex();
+                
                 int img_height = 400;
                 int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]);
                 float* imagedata = new float[img_width * img_height * 3];

From 0fbb69a42a9719ba7c0483e62b7998bf2a38f9a1 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 14 Apr 2024 21:25:36 +0200
Subject: [PATCH 08/32] Correct radiation eval

---
 test/solver/TestFDTDSolver.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index f932c7da3..e6a4d7d5a 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -416,6 +416,9 @@ struct UniaxialLorentzframe{
         ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
         return ret;
     }
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> inverse_transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB)const noexcept{
+        return UniaxialLorentzframe<T, 2>(-gammaBeta_m).transform_EB(primedEB);
+    }
 };
 template<typename scalar>
 BunchInitialize<scalar> generate_mithra_config(const config& cfg, const UniaxialLorentzframe<scalar>& /*frame_boost unused*/) {
@@ -1863,17 +1866,20 @@ int main(int argc, char* argv[]) {
             auto ebv = fdtd_state.EB.getView();
             double radiation = 0.0;
             Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
-                uint32_t ig = i + ldom.first()[0];
-                uint32_t jg = j + ldom.first()[1];
+                //uint32_t ig = i + ldom.first()[0];
+                //uint32_t jg = j + ldom.first()[1];
+                Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]};
+                ippl::Vector<scalar, 3> Elab = frame_boost.transform_EB(buncheb).first;
+                ippl::Vector<scalar, 3> Blab = frame_boost.transform_EB(buncheb).second;
                 uint32_t kg = k + ldom.first()[2];
                 if(kg == nrg[2] - 3){
-                    ref += ebv(i,j,k)[0].cross(ebv(i,j,k)[1])[2];
+                    ref += Elab.cross(Blab)[2];
                 }
 
             }, radiation);
             double radiation_in_watt_on_this_rank = radiation *
-            unit_powerdensity_in_watt_per_square_meter *
-            fdtd_state.hr_m[0] * fdtd_state.hr_m[1] * unit_length_in_meters * unit_length_in_meters;
+            double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) *
+            fdtd_state.hr_m[0] * fdtd_state.hr_m[1];
             double radiation_in_watt_global = 0.0;
             MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
             if(ippl::Comm->rank() == 0){

From eb553ceb48c765f819ed58e3f7c21d39a2dbc280 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 14 Apr 2024 21:37:07 +0200
Subject: [PATCH 09/32] Correct now

---
 test/solver/TestFDTDSolver.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index e6a4d7d5a..978d1e9eb 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -1869,8 +1869,8 @@ int main(int argc, char* argv[]) {
                 //uint32_t ig = i + ldom.first()[0];
                 //uint32_t jg = j + ldom.first()[1];
                 Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]};
-                ippl::Vector<scalar, 3> Elab = frame_boost.transform_EB(buncheb).first;
-                ippl::Vector<scalar, 3> Blab = frame_boost.transform_EB(buncheb).second;
+                ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
+                ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
                 uint32_t kg = k + ldom.first()[2];
                 if(kg == nrg[2] - 3){
                     ref += Elab.cross(Blab)[2];

From 120726d8b21f7869202dcd09a36e320141d8f2f7 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Tue, 16 Apr 2024 12:36:04 +0200
Subject: [PATCH 10/32] Change colorscheme and labframe rad

---
 test/solver/TestFDTDSolver.cpp | 273 ++++++++++++++++++++++++++++++++-
 1 file changed, 272 insertions(+), 1 deletion(-)

diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 978d1e9eb..fe90bcf24 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -12,6 +12,267 @@
 #include <list>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.hpp>
+constexpr float turbo_cm[256][3] = {
+  {0.18995,0.07176,0.23217},
+  {0.19483,0.08339,0.26149},
+  {0.19956,0.09498,0.29024},
+  {0.20415,0.10652,0.31844},
+  {0.20860,0.11802,0.34607},
+  {0.21291,0.12947,0.37314},
+  {0.21708,0.14087,0.39964},
+  {0.22111,0.15223,0.42558},
+  {0.22500,0.16354,0.45096},
+  {0.22875,0.17481,0.47578},
+  {0.23236,0.18603,0.50004},
+  {0.23582,0.19720,0.52373},
+  {0.23915,0.20833,0.54686},
+  {0.24234,0.21941,0.56942},
+  {0.24539,0.23044,0.59142},
+  {0.24830,0.24143,0.61286},
+  {0.25107,0.25237,0.63374},
+  {0.25369,0.26327,0.65406},
+  {0.25618,0.27412,0.67381},
+  {0.25853,0.28492,0.69300},
+  {0.26074,0.29568,0.71162},
+  {0.26280,0.30639,0.72968},
+  {0.26473,0.31706,0.74718},
+  {0.26652,0.32768,0.76412},
+  {0.26816,0.33825,0.78050},
+  {0.26967,0.34878,0.79631},
+  {0.27103,0.35926,0.81156},
+  {0.27226,0.36970,0.82624},
+  {0.27334,0.38008,0.84037},
+  {0.27429,0.39043,0.85393},
+  {0.27509,0.40072,0.86692},
+  {0.27576,0.41097,0.87936},
+  {0.27628,0.42118,0.89123},
+  {0.27667,0.43134,0.90254},
+  {0.27691,0.44145,0.91328},
+  {0.27701,0.45152,0.92347},
+  {0.27698,0.46153,0.93309},
+  {0.27680,0.47151,0.94214},
+  {0.27648,0.48144,0.95064},
+  {0.27603,0.49132,0.95857},
+  {0.27543,0.50115,0.96594},
+  {0.27469,0.51094,0.97275},
+  {0.27381,0.52069,0.97899},
+  {0.27273,0.53040,0.98461},
+  {0.27106,0.54015,0.98930},
+  {0.26878,0.54995,0.99303},
+  {0.26592,0.55979,0.99583},
+  {0.26252,0.56967,0.99773},
+  {0.25862,0.57958,0.99876},
+  {0.25425,0.58950,0.99896},
+  {0.24946,0.59943,0.99835},
+  {0.24427,0.60937,0.99697},
+  {0.23874,0.61931,0.99485},
+  {0.23288,0.62923,0.99202},
+  {0.22676,0.63913,0.98851},
+  {0.22039,0.64901,0.98436},
+  {0.21382,0.65886,0.97959},
+  {0.20708,0.66866,0.97423},
+  {0.20021,0.67842,0.96833},
+  {0.19326,0.68812,0.96190},
+  {0.18625,0.69775,0.95498},
+  {0.17923,0.70732,0.94761},
+  {0.17223,0.71680,0.93981},
+  {0.16529,0.72620,0.93161},
+  {0.15844,0.73551,0.92305},
+  {0.15173,0.74472,0.91416},
+  {0.14519,0.75381,0.90496},
+  {0.13886,0.76279,0.89550},
+  {0.13278,0.77165,0.88580},
+  {0.12698,0.78037,0.87590},
+  {0.12151,0.78896,0.86581},
+  {0.11639,0.79740,0.85559},
+  {0.11167,0.80569,0.84525},
+  {0.10738,0.81381,0.83484},
+  {0.10357,0.82177,0.82437},
+  {0.10026,0.82955,0.81389},
+  {0.09750,0.83714,0.80342},
+  {0.09532,0.84455,0.79299},
+  {0.09377,0.85175,0.78264},
+  {0.09287,0.85875,0.77240},
+  {0.09267,0.86554,0.76230},
+  {0.09320,0.87211,0.75237},
+  {0.09451,0.87844,0.74265},
+  {0.09662,0.88454,0.73316},
+  {0.09958,0.89040,0.72393},
+  {0.10342,0.89600,0.71500},
+  {0.10815,0.90142,0.70599},
+  {0.11374,0.90673,0.69651},
+  {0.12014,0.91193,0.68660},
+  {0.12733,0.91701,0.67627},
+  {0.13526,0.92197,0.66556},
+  {0.14391,0.92680,0.65448},
+  {0.15323,0.93151,0.64308},
+  {0.16319,0.93609,0.63137},
+  {0.17377,0.94053,0.61938},
+  {0.18491,0.94484,0.60713},
+  {0.19659,0.94901,0.59466},
+  {0.20877,0.95304,0.58199},
+  {0.22142,0.95692,0.56914},
+  {0.23449,0.96065,0.55614},
+  {0.24797,0.96423,0.54303},
+  {0.26180,0.96765,0.52981},
+  {0.27597,0.97092,0.51653},
+  {0.29042,0.97403,0.50321},
+  {0.30513,0.97697,0.48987},
+  {0.32006,0.97974,0.47654},
+  {0.33517,0.98234,0.46325},
+  {0.35043,0.98477,0.45002},
+  {0.36581,0.98702,0.43688},
+  {0.38127,0.98909,0.42386},
+  {0.39678,0.99098,0.41098},
+  {0.41229,0.99268,0.39826},
+  {0.42778,0.99419,0.38575},
+  {0.44321,0.99551,0.37345},
+  {0.45854,0.99663,0.36140},
+  {0.47375,0.99755,0.34963},
+  {0.48879,0.99828,0.33816},
+  {0.50362,0.99879,0.32701},
+  {0.51822,0.99910,0.31622},
+  {0.53255,0.99919,0.30581},
+  {0.54658,0.99907,0.29581},
+  {0.56026,0.99873,0.28623},
+  {0.57357,0.99817,0.27712},
+  {0.58646,0.99739,0.26849},
+  {0.59891,0.99638,0.26038},
+  {0.61088,0.99514,0.25280},
+  {0.62233,0.99366,0.24579},
+  {0.63323,0.99195,0.23937},
+  {0.64362,0.98999,0.23356},
+  {0.65394,0.98775,0.22835},
+  {0.66428,0.98524,0.22370},
+  {0.67462,0.98246,0.21960},
+  {0.68494,0.97941,0.21602},
+  {0.69525,0.97610,0.21294},
+  {0.70553,0.97255,0.21032},
+  {0.71577,0.96875,0.20815},
+  {0.72596,0.96470,0.20640},
+  {0.73610,0.96043,0.20504},
+  {0.74617,0.95593,0.20406},
+  {0.75617,0.95121,0.20343},
+  {0.76608,0.94627,0.20311},
+  {0.77591,0.94113,0.20310},
+  {0.78563,0.93579,0.20336},
+  {0.79524,0.93025,0.20386},
+  {0.80473,0.92452,0.20459},
+  {0.81410,0.91861,0.20552},
+  {0.82333,0.91253,0.20663},
+  {0.83241,0.90627,0.20788},
+  {0.84133,0.89986,0.20926},
+  {0.85010,0.89328,0.21074},
+  {0.85868,0.88655,0.21230},
+  {0.86709,0.87968,0.21391},
+  {0.87530,0.87267,0.21555},
+  {0.88331,0.86553,0.21719},
+  {0.89112,0.85826,0.21880},
+  {0.89870,0.85087,0.22038},
+  {0.90605,0.84337,0.22188},
+  {0.91317,0.83576,0.22328},
+  {0.92004,0.82806,0.22456},
+  {0.92666,0.82025,0.22570},
+  {0.93301,0.81236,0.22667},
+  {0.93909,0.80439,0.22744},
+  {0.94489,0.79634,0.22800},
+  {0.95039,0.78823,0.22831},
+  {0.95560,0.78005,0.22836},
+  {0.96049,0.77181,0.22811},
+  {0.96507,0.76352,0.22754},
+  {0.96931,0.75519,0.22663},
+  {0.97323,0.74682,0.22536},
+  {0.97679,0.73842,0.22369},
+  {0.98000,0.73000,0.22161},
+  {0.98289,0.72140,0.21918},
+  {0.98549,0.71250,0.21650},
+  {0.98781,0.70330,0.21358},
+  {0.98986,0.69382,0.21043},
+  {0.99163,0.68408,0.20706},
+  {0.99314,0.67408,0.20348},
+  {0.99438,0.66386,0.19971},
+  {0.99535,0.65341,0.19577},
+  {0.99607,0.64277,0.19165},
+  {0.99654,0.63193,0.18738},
+  {0.99675,0.62093,0.18297},
+  {0.99672,0.60977,0.17842},
+  {0.99644,0.59846,0.17376},
+  {0.99593,0.58703,0.16899},
+  {0.99517,0.57549,0.16412},
+  {0.99419,0.56386,0.15918},
+  {0.99297,0.55214,0.15417},
+  {0.99153,0.54036,0.14910},
+  {0.98987,0.52854,0.14398},
+  {0.98799,0.51667,0.13883},
+  {0.98590,0.50479,0.13367},
+  {0.98360,0.49291,0.12849},
+  {0.98108,0.48104,0.12332},
+  {0.97837,0.46920,0.11817},
+  {0.97545,0.45740,0.11305},
+  {0.97234,0.44565,0.10797},
+  {0.96904,0.43399,0.10294},
+  {0.96555,0.42241,0.09798},
+  {0.96187,0.41093,0.09310},
+  {0.95801,0.39958,0.08831},
+  {0.95398,0.38836,0.08362},
+  {0.94977,0.37729,0.07905},
+  {0.94538,0.36638,0.07461},
+  {0.94084,0.35566,0.07031},
+  {0.93612,0.34513,0.06616},
+  {0.93125,0.33482,0.06218},
+  {0.92623,0.32473,0.05837},
+  {0.92105,0.31489,0.05475},
+  {0.91572,0.30530,0.05134},
+  {0.91024,0.29599,0.04814},
+  {0.90463,0.28696,0.04516},
+  {0.89888,0.27824,0.04243},
+  {0.89298,0.26981,0.03993},
+  {0.88691,0.26152,0.03753},
+  {0.88066,0.25334,0.03521},
+  {0.87422,0.24526,0.03297},
+  {0.86760,0.23730,0.03082},
+  {0.86079,0.22945,0.02875},
+  {0.85380,0.22170,0.02677},
+  {0.84662,0.21407,0.02487},
+  {0.83926,0.20654,0.02305},
+  {0.83172,0.19912,0.02131},
+  {0.82399,0.19182,0.01966},
+  {0.81608,0.18462,0.01809},
+  {0.80799,0.17753,0.01660},
+  {0.79971,0.17055,0.01520},
+  {0.79125,0.16368,0.01387},
+  {0.78260,0.15693,0.01264},
+  {0.77377,0.15028,0.01148},
+  {0.76476,0.14374,0.01041},
+  {0.75556,0.13731,0.00942},
+  {0.74617,0.13098,0.00851},
+  {0.73661,0.12477,0.00769},
+  {0.72686,0.11867,0.00695},
+  {0.71692,0.11268,0.00629},
+  {0.70680,0.10680,0.00571},
+  {0.69650,0.10102,0.00522},
+  {0.68602,0.09536,0.00481},
+  {0.67535,0.08980,0.00449},
+  {0.66449,0.08436,0.00424},
+  {0.65345,0.07902,0.00408},
+  {0.64223,0.07380,0.00401},
+  {0.63082,0.06868,0.00401},
+  {0.61923,0.06367,0.00410},
+  {0.60746,0.05878,0.00427},
+  {0.59550,0.05399,0.00453},
+  {0.58336,0.04931,0.00486},
+  {0.57103,0.04474,0.00529},
+  {0.55852,0.04028,0.00579},
+  {0.54583,0.03593,0.00638},
+  {0.53295,0.03169,0.00705},
+  {0.51989,0.02756,0.00780},
+  {0.50664,0.02354,0.00863},
+  {0.49321,0.01963,0.00955},
+  {0.47960,0.01583,0.01055}
+};
+
+
+
 
 uint64_t nanoTime(){
     using namespace std;
@@ -1930,11 +2191,21 @@ int main(int argc, char* argv[]) {
                             if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){
                                 if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){
                                     ippl::Vector<ippl::Vector<scalar, 3>, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
+                                    
                                     ippl::Vector<scalar, 3> poynting = acc[0].cross(acc[1]);
+                                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> eblab =  frame_boost.inverse_transform_EB(
+                                        Kokkos::make_pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>(acc[0], acc[1])
+                                    );
+                                    ippl::Vector<scalar, 3> poyntinglab = eblab.first.cross(eblab.second);
+                                    poynting = poyntinglab;
                                     if(poynting.norm() > 0){
                                         //std::cout << poynting.norm() << "\n";
                                     }
-                                    imagedata[(j * img_width + i) * 3 + 0] += std::sqrt(poynting.norm()) * 0.5f;//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f;
+                                    int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f));
+                                    imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
                                 }
                             }
                         }

From ba073e60189b4f3ef830b2a0f2de930e76a60bbb Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Wed, 17 Apr 2024 11:22:38 +0200
Subject: [PATCH 11/32] FFMPEG output

---
 test/solver/TestFDTDSolver.cpp | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index fe90bcf24..f6cdcdf5e 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -4,7 +4,7 @@
 
 #include "Field/Field.h"
 #include <chrono>
-
+#include <cstdio> // For popen
 #include "MaxwellSolvers/FDTD.h"
 #define JSON_HAS_RANGES 0 //Merlin compilation
 #include <json.hpp>
@@ -2022,7 +2022,23 @@ namespace ippl {
     };
     // clang-format on
 }  // namespace ippl
+bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
+    const int channels = 3; // RGB
+    const int stride = width * channels;
+    std::vector<unsigned char> flippedData(data, data + stride * height);
+
+    // Use stb_image_write to write the BMP image to the file descriptor
+    if (!stbi_write_bmp_to_func(
+            [](void* context, void* data, int size) {
+                FILE* f = reinterpret_cast<FILE*>(context);
+                fwrite(data, 1, size, f);
+            },
+            fd, width, height, channels, flippedData.data())) {
+        return false;
+    }
 
+    return true;
+}
 int main(int argc, char* argv[]) {
     using scalar = float;
     ippl::initialize(argc, argv);
@@ -2068,7 +2084,7 @@ int main(int argc, char* argv[]) {
         std::array<bool, 3> isParallel;
         isParallel.fill(false);
         isParallel[2] = true;
-
+        
         // all parallel layout, standard domain, normal axis order
         ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
 
@@ -2112,9 +2128,16 @@ int main(int argc, char* argv[]) {
         size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
         uint64_t starttime =  nanoTime();
         std::ofstream rad;
+        FILE* ffmpeg_file = nullptr;
         if(ippl::Comm->rank() == 0){
             rad = std::ofstream("radiation.txt");
+            const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv";
+            if(cfg.output_rhythm != 0){
+                ffmpeg_file = popen(ffmpegCmd, "w");
+            }
         }
+
+
         for(size_t i = 0;i < timesteps_required;i++){
             fdtd_state.J = scalar(0.0);
             fdtd_state.playout.update(fdtd_state.particles);
@@ -2236,7 +2259,7 @@ int main(int argc, char* argv[]) {
                     
                     snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i);
                     std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
-                    stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
+                    writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final);
                 }
                 delete[] imagedata;
                 delete[] idata_recvbuffer;

From 200ad6d030831283bca7cb464ce36b8db868b3ea Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Fri, 19 Apr 2024 11:21:55 +0200
Subject: [PATCH 12/32] Gauss test

---
 config.json                    |  4 ++--
 test/solver/TestFDTDSolver.cpp | 43 ++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/config.json b/config.json
index 5b5c053e4..0948650ee 100644
--- a/config.json
+++ b/config.json
@@ -4,7 +4,7 @@
       "length-scale" : "micrometers",
       "time-scale"   : "picoseconds",
       "extents": [3400.0, 3400.0, 280.0],
-      "resolution": [64, 64, 2800],
+      "resolution": [32, 32, 800],
       "mesh-center": [0.0, 0.0, 0.0],
       "total-time": 30000.0,
       "bunch-time-step": 1.6,
@@ -19,7 +19,7 @@
       "_mass": 1,
       "charge": 1.846e8,
       "mass": 1.846e8,
-      "number-of-particles": 500000,
+      "number-of-particles": 50000,
       "gamma": 100.41,
       "_gamma": 1.0,
       "direction (ignored)": [0.0, 0.0, 1.0],
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index f6cdcdf5e..223bf1bd2 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -10,6 +10,7 @@
 #include <json.hpp>
 #include <fstream>
 #include <list>
+#include <Kokkos_Random.hpp>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.hpp>
 constexpr float turbo_cm[256][3] = {
@@ -2039,6 +2040,48 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
 
     return true;
 }
+template<typename scalar>
+scalar test_gauss_law(uint32_t n){
+
+    ippl::NDIndex<3> owned(n / 2, n / 2, n);
+    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, n};
+    ippl::Vector<scalar, 3> extents{1,1,1};
+    std::array<bool, 3> isParallel;
+    isParallel.fill(false);
+    isParallel[2] = true;
+        
+        // all parallel layout, standard domain, normal axis order
+    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        //[-1, 1] box
+    ippl::Vector<scalar, 3> hx     = extents / nr.cast<scalar>();
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5), scalar(-0.5), scalar(-0.5)};
+    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+    ippl::FDTDFieldState<scalar> field_state(layout, mesh, (1 << 20), config{});
+    field_state.particles.Q = scalar(coulomb_in_unit_charges) / (1 << 20);
+    field_state.particles.mass = scalar(1.0) / (1 << 20); //Irrelefant
+    auto pview = field_state.particles.R.getView();
+    auto p1view = field_state.particles.R_nm1.getView();
+
+    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
+    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
+    scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
+    
+    Kokkos::parallel_for((1 << 20), KOKKOS_LAMBDA(size_t i){
+        //bunch.gammaBeta[i].fill(scalar(0));
+        auto state = random_pool.get_state();
+        pview(i)[0] = state.normal(0.01 * meter_in_unit_lengths);
+        pview(i)[1] = state.normal(0.01 * meter_in_unit_lengths);
+        pview(i)[2] = state.normal(0.01 * meter_in_unit_lengths);
+        p1view(i) = pview(i);
+        random_pool.free_state(state);
+    });
+    Kokkos::fence();
+    field_state.scatterBunch();
+    for(size_t i = 0;i < 8*n;i++){
+        field_state.fieldStep();
+    }
+}
 int main(int argc, char* argv[]) {
     using scalar = float;
     ippl::initialize(argc, argv);

From 3958ed82e29c88426c8b72fb213e5a46acd97bd3 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 21 Apr 2024 13:12:06 +0200
Subject: [PATCH 13/32] Revert vector and particle attrib changes, make FDTD
 Solver compatible

---
 src/Particle/ParticleAttrib.hpp |  43 ++---
 src/Types/Vector.h              | 310 +++++++++++---------------------
 src/Types/Vector.hpp            |   9 +
 test/solver/TestFDTDSolver.cpp  |   2 +-
 4 files changed, 127 insertions(+), 237 deletions(-)

diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index 9ee77e3e3..3b37f920a 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -121,7 +121,7 @@ namespace ippl {
 
         const vector_type& dx     = mesh.getMeshSpacing();
         const vector_type& origin = mesh.getOrigin();
-        const vector_type invdx   = vector_type(1.0) / dx;
+        const vector_type invdx   = 1.0 / dx;
 
         const FieldLayout<Dim>& layout = f.getLayout();
         const NDIndex<Dim>& lDom       = layout.getLocalNDIndex();
@@ -132,12 +132,12 @@ namespace ippl {
             "ParticleAttrib::scatter", policy_type(0, *(this->localNum_mp)),
             KOKKOS_CLASS_LAMBDA(const size_t idx) {
                 // find nearest grid point
-                vector_type l                        = (pp(idx) - origin) * invdx + vector_type(1.0); // Terribile;
-                Vector<int, Field::dim> index        = l.template cast<int>();
-                Vector<PositionType, Field::dim> whi = l - index.template cast<PositionType>();
-                Vector<PositionType, Field::dim> wlo = vector_type(1.0) - whi;
+                vector_type l                        = (pp(idx) - origin) * invdx + 0.5;
+                Vector<int, Field::dim> index        = l;
+                Vector<PositionType, Field::dim> whi = l - index;
+                Vector<PositionType, Field::dim> wlo = 1.0 - whi;
 
-                Vector<size_t, Field::dim> args = (index - lDom.first() + nghost).template cast<size_t>();
+                Vector<size_t, Field::dim> args = index - lDom.first() + nghost;
 
                 // scatter
                 const value_type& val = dview_m(idx);
@@ -159,11 +159,10 @@ namespace ippl {
         constexpr unsigned Dim = Field::dim;
         using PositionType     = typename Field::Mesh_t::value_type;
 
-        //This completely klönks performance for subdivided particle timesteps
-        //static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo");
-        //IpplTimings::startTimer(fillHaloTimer);
-        //f.fillHalo();
-        //IpplTimings::stopTimer(fillHaloTimer);
+        static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo");
+        IpplTimings::startTimer(fillHaloTimer);
+        f.fillHalo();
+        IpplTimings::stopTimer(fillHaloTimer);
 
         static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("gather");
         IpplTimings::startTimer(gatherTimer);
@@ -176,7 +175,7 @@ namespace ippl {
 
         const vector_type& dx     = mesh.getMeshSpacing();
         const vector_type& origin = mesh.getOrigin();
-        const vector_type invdx   = vector_type(1.0) / dx;
+        const vector_type invdx   = 1.0 / dx;
 
         const FieldLayout<Dim>& layout = f.getLayout();
         const NDIndex<Dim>& lDom       = layout.getLocalNDIndex();
@@ -187,24 +186,14 @@ namespace ippl {
             "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)),
             KOKKOS_CLASS_LAMBDA(const size_t idx) {
                 // find nearest grid point
-                vector_type l                        = (pp(idx) - origin) * invdx + vector_type(1.0); //terribile
-                Vector<int, Field::dim> index        = l.template cast<int>();
-                Vector<PositionType, Field::dim> whi = l - index.template cast<PositionType>();
-                Vector<PositionType, Field::dim> wlo = vector_type(1.0) - whi;
+                vector_type l                        = (pp(idx) - origin) * invdx + 0.5;
+                Vector<int, Field::dim> index        = l;
+                Vector<PositionType, Field::dim> whi = l - index;
+                Vector<PositionType, Field::dim> wlo = 1.0 - whi;
 
-                Vector<size_t, Field::dim> args = (index - lDom.first() + nghost).template cast<size_t>();
+                Vector<size_t, Field::dim> args = index - lDom.first() + nghost;
 
                 // gather
-                for(unsigned d = 0;d < Field::dim;d++){
-                    if(args[d] >= view.extent(d)){
-                        dview_m(idx) = T(0);
-                        return;
-                    }
-                    else if(args[d] < 1){
-                        dview_m(idx) = T(0);
-                        return;
-                    }
-                }
                 dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{},
                                                        view, wlo, whi, args);
             });
diff --git a/src/Types/Vector.h b/src/Types/Vector.h
index 0f1d00b96..216ddf5b7 100644
--- a/src/Types/Vector.h
+++ b/src/Types/Vector.h
@@ -20,239 +20,131 @@ namespace ippl {
      * @tparam Dim vector dimension
      */
     template <typename T, unsigned Dim>
-    struct Vector {
-        using value_type = T;
+    class Vector : public detail::Expression<Vector<T, Dim>, sizeof(T) * Dim> {
+    public:
+        typedef T value_type;
+        static constexpr unsigned dim = Dim;
 
-        constexpr static unsigned dim = Dim;
-        T data[Dim];
-        KOKKOS_INLINE_FUNCTION constexpr Vector(const std::initializer_list<T>& list) {
-            // PAssert(list.size() == Dim);
-            unsigned int i = 0;
-            for (auto& l : list) {
-                data[i] = l;
-                ++i;
-            }
-        }
-        Vector() = default;
-        constexpr KOKKOS_INLINE_FUNCTION Vector(T v) { fill(v); }
-        KOKKOS_INLINE_FUNCTION value_type dot(const Vector<T, Dim>& v) const noexcept {
-            value_type ret = 0;
-            for (unsigned i = 0; i < dim; i++) {
-                ret += data[i] * v[i];
-            }
-            return ret;
+        KOKKOS_FUNCTION
+        constexpr Vector()
+            : Vector(value_type(0)) {}
+
+        template <typename... Args,
+                  typename std::enable_if<sizeof...(Args) == Dim, bool>::type = true>
+        explicit KOKKOS_FUNCTION Vector(const Args&... args);
+
+        template <typename E, size_t N>
+        KOKKOS_FUNCTION Vector(const detail::Expression<E, N>& expr);
+
+        KOKKOS_DEFAULTED_FUNCTION
+        Vector(const Vector<T, Dim>& v) = default;
+
+        KOKKOS_FUNCTION
+        Vector(const T& val);
+        
+        
+        Vector(const std::array<T, Dim>& a);
+
+	    Vector(const std::array<std::vector<T>, Dim>& a);
+
+        /*!
+         * @param list of values
+         */
+        KOKKOS_FUNCTION
+        Vector(const std::initializer_list<T>& list);
+
+        KOKKOS_FUNCTION
+        ~Vector() {}
+
+        // Get and Set Operations
+        KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i);
+
+        KOKKOS_INLINE_FUNCTION value_type operator[](unsigned int i) const;
+
+        KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i);
+
+        KOKKOS_INLINE_FUNCTION value_type operator()(unsigned int i) const;
+
+        // Assignment Operators
+        template <typename E, size_t N>
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator=(const detail::Expression<E, N>& expr);
+
+        template <typename E, size_t N>
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator+=(const detail::Expression<E, N>& expr);
+
+        template <typename E, size_t N>
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator-=(const detail::Expression<E, N>& expr);
+
+        template <typename E, size_t N>
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator*=(const detail::Expression<E, N>& expr);
+
+        template <typename E, size_t N>
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator/=(const detail::Expression<E, N>& expr);
+
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator+=(const T& val);
+
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator-=(const T& val);
+
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator*=(const T& val);
+
+        KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator/=(const T& val);
+
+        using iterator       = T*;
+        using const_iterator = const T*;
+        KOKKOS_INLINE_FUNCTION constexpr iterator begin();
+        KOKKOS_INLINE_FUNCTION constexpr iterator end();
+        KOKKOS_INLINE_FUNCTION constexpr const_iterator begin() const;
+        KOKKOS_INLINE_FUNCTION constexpr const_iterator end() const;
+
+        KOKKOS_INLINE_FUNCTION T dot(const Vector<T, Dim>& rhs) const;
+        KOKKOS_INLINE_FUNCTION Vector<T, 3> cross(const Vector<T, 3>& rhs) const;
+        KOKKOS_INLINE_FUNCTION void fill(const T& v){
+            for(unsigned k = 0;k < Dim;k++)(*this)[k] = v;
         }
-        template <unsigned N>
-        KOKKOS_INLINE_FUNCTION Vector<T, N> tail() const noexcept {
+        template<unsigned N>
+        KOKKOS_INLINE_FUNCTION Vector<T, N> tail()const noexcept{
             Vector<T, N> ret;
             static_assert(N <= Dim, "N must be smaller than Dim");
             constexpr unsigned diff = Dim - N;
-            for (unsigned i = 0; i < N; i++) {
+            for(unsigned i = 0;i < N;i++){
                 ret[i] = (*this)[i + diff];
             }
             return ret;
         }
-        template <unsigned N>
-        KOKKOS_INLINE_FUNCTION Vector<T, N> head() const noexcept {
+        template<unsigned N>
+        KOKKOS_INLINE_FUNCTION Vector<T, N> head()const noexcept{
             Vector<T, N> ret;
             static_assert(N <= Dim, "N must be smaller than Dim");
-            for (unsigned i = 0; i < N; i++) {
+            for(unsigned i = 0;i < N;i++){
                 ret[i] = (*this)[i];
             }
             return ret;
         }
-        KOKKOS_INLINE_FUNCTION value_type squaredNorm() const noexcept { return dot(*this); }
-        KOKKOS_INLINE_FUNCTION value_type norm() {
-#ifndef __CUDA_ARCH__
-            using std::sqrt;
-#endif
-            return Kokkos::sqrt(squaredNorm());
-        }
-        KOKKOS_INLINE_FUNCTION Vector<T, Dim> normalized() const noexcept { return *this / norm(); }
-        KOKKOS_INLINE_FUNCTION value_type sum() const noexcept {
-            value_type ret = 0;
-            for (unsigned i = 0; i < dim; i++) {
-                ret += data[i];
-            }
+        template<typename OtherType>
+        KOKKOS_INLINE_FUNCTION Vector<OtherType, Dim> cast()const{
+            Vector<OtherType, Dim> ret;
+            for(unsigned k = 0;k < Dim;k++)ret[k] = OtherType((*this)[k]);
             return ret;
         }
-        KOKKOS_INLINE_FUNCTION value_type average() const noexcept {
-            value_type ret = 0;
-            for (unsigned i = 0; i < dim; i++) {
-                ret += data[i];
-            }
-            return ret / dim;
+        KOKKOS_INLINE_FUNCTION T squaredNorm()const{
+            return this->dot(*this);
         }
-        KOKKOS_INLINE_FUNCTION Vector<T, 3> cross(const Vector<T, Dim>& v) const noexcept
-            requires(Dim == 3)
-        {
-            Vector<T, 3> ret(0);
-            ret[0] = data[1] * v[2] - data[2] * v[1];
-            ret[1] = data[2] * v[0] - data[0] * v[2];
-            ret[2] = data[0] * v[1] - data[1] * v[0];
-            return ret;
+        KOKKOS_INLINE_FUNCTION T norm()const{
+            using Kokkos::sqrt;
+            return sqrt(squaredNorm());
         }
-        KOKKOS_INLINE_FUNCTION bool operator==(const Vector<T, dim>& o) const noexcept {
-            for (unsigned i = 0; i < dim; i++) {
-                if (data[i] != o[i])
-                    return false;
-            }
-            return true;
-        }
-        KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i) noexcept {
-            assert(i < dim);
-            return data[i];
-        }
-        KOKKOS_INLINE_FUNCTION T* begin() noexcept { return data; }
-        KOKKOS_INLINE_FUNCTION T* end() noexcept { return data + dim; }
-        KOKKOS_INLINE_FUNCTION const T* begin() const noexcept { return data; }
-        KOKKOS_INLINE_FUNCTION const T* end() const noexcept { return data + dim; }
-        KOKKOS_INLINE_FUNCTION constexpr void fill(value_type x) {
-            for (unsigned i = 0; i < dim; i++) {
-                data[i] = value_type(x);
-            }
-        }
-        KOKKOS_INLINE_FUNCTION const value_type& operator[](unsigned int i) const noexcept {
-            assert(i < dim);
-            return data[i];
-        }
-
-        KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i) noexcept {
-            assert(i < dim);
-            return data[i];
-        }
-
-        KOKKOS_INLINE_FUNCTION const value_type& operator()(unsigned int i) const noexcept {
-            assert(i < dim);
-            return data[i];
-        }
-        KOKKOS_INLINE_FUNCTION Vector operator-() const noexcept {
-            Vector ret;
-            for (unsigned i = 0; i < dim; i++) {
-                ret[i] = -(*this)[i];
-            }
-            return ret;
-        }
-        KOKKOS_INLINE_FUNCTION Vector<T, dim> decompose(Vector<int, dim>* integral) {
-#ifndef __CUDA_ARCH__
-            using std::modf;
-#endif
-            Vector<T, dim> ret;
-            for (unsigned i = 0; i < dim; i++) {
-                if constexpr (std::is_same_v<T, float>) {
-                    float tmp;
-                    ret[i]         = modff((*this)[i], &tmp);
-                    (*integral)[i] = (int)tmp;
-                } else if constexpr (std::is_same_v<T, double>) {
-                    double tmp;
-                    ret[i]         = modf((*this)[i], &tmp);
-                    (*integral)[i] = (int)tmp;
-                }
-            }
-            return ret;
-        }
-        template <typename O>
-        constexpr KOKKOS_INLINE_FUNCTION Vector<O, dim> cast() const noexcept {
-            Vector<O, dim> ret;
-            for (unsigned i = 0; i < dim; i++) {
-                ret.data[i] = (O)(data[i]);
-            }
-            return ret;
-        }
-        KOKKOS_INLINE_FUNCTION Vector operator*(const value_type& o) const noexcept {
-            Vector ret;
-            for (unsigned i = 0; i < dim; i++) {
-                ret[i] = (*this)[i] * o;
-            }
-            return ret;
-        }
-#define defop_kf(OP)                                                                  \
-    constexpr KOKKOS_INLINE_FUNCTION Vector<T, Dim> operator OP(const Vector<T, Dim>& o) const noexcept { \
-        Vector<T, Dim> ret;                                                           \
-        for (unsigned i = 0; i < dim; i++) {                                          \
-            ret.data[i] = data[i] OP o.data[i];                                       \
-        }                                                                             \
-        return ret;                                                                   \
-    }
-        defop_kf(+)
-        defop_kf(-)
-        defop_kf(*)
-        defop_kf(/)
 
-#define def_aop_kf(OP)                                                 \
-    KOKKOS_INLINE_FUNCTION Vector<T, Dim>& operator OP(const Vector<T, Dim>& o) noexcept { \
-        for (unsigned i = 0; i < dim; i++) {                           \
-            (*this)[i] OP o[i];                                        \
-        }                                                              \
-        return *this;                                                  \
-    }
-            def_aop_kf(+=)
-            def_aop_kf(-=)
-            def_aop_kf(*=)
-            def_aop_kf(/=)
-            template <typename stream_t>
-            friend stream_t& operator<<(stream_t& str, const Vector<T, Dim>& v) {
-            // tr << "{";
-            for (unsigned i = 0; i < Dim; i++) {
-                str << v[i];
-                if (i < Dim - 1)
-                    str << ", ";
-            }
-            // str << "}";
-            return str;
-        }
-        // defop_kf(*)
-        // defop_kf(/)
+    //Needs to be public to be a standard-layout type
+    //private:
+        T data_m[Dim];
     };
-
-    //template <typename T, unsigned Dim>
-    //KOKKOS_INLINE_FUNCTION Vector<T, Dim> min(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
-    //template <typename T, unsigned Dim>
-    //KOKKOS_INLINE_FUNCTION Vector<T, Dim> max(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
+    
     template<typename T, unsigned Dim>
-    KOKKOS_INLINE_FUNCTION Vector<T, Dim> min(const Vector<T, Dim>& a, const Vector<T, Dim>& b){
-        using Kokkos::min;
-        Vector<T, Dim> ret;
-        for(unsigned d = 0; d < Dim;d++){
-            ret[d] = min(a[d], b[d]);
-        }
-        return ret;
-    }
+    KOKKOS_INLINE_FUNCTION Vector<T, Dim> min(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
     template<typename T, unsigned Dim>
-    KOKKOS_INLINE_FUNCTION Vector<T, Dim> max(const Vector<T, Dim>& a, const Vector<T, Dim>& b){
-        using Kokkos::max;
-        Vector<T, Dim> ret;
-        for(unsigned d = 0; d < Dim;d++){
-            ret[d] = max(a[d], b[d]);
-        }
-        return ret;
-    }
-    template<typename O, typename T, unsigned N>
-    KOKKOS_INLINE_FUNCTION ippl::Vector<T, N> operator *(const O &o, const ippl::Vector<T, N>& v) noexcept {
-        ippl::Vector<T, N> ret;
-        for (unsigned i = 0; i < N; i++) {
-            ret[i] = v[i] * o;
-        }
-        return ret;
-    }
-    template <typename T>
-    KOKKOS_INLINE_FUNCTION Vector<T, 3> cross(const ippl::Vector<T, 3>& a, const ippl::Vector<T, 3>& b) {
-        ippl::Vector<T, 3> ret{0.0, 0.0, 0.0};
-        ret[0] = a[1] * b[2] - a[2] * b[1];
-        ret[1] = a[2] * b[0] - a[0] * b[2];
-        ret[2] = a[0] * b[1] - a[1] * b[0];
-        return ret;
-    }
-    template <typename T, unsigned dim>
-    KOKKOS_INLINE_FUNCTION T dot(const ippl::Vector<T, dim>& a, const ippl::Vector<T, dim>& b) {
-        T ret = 0;
-        for(unsigned k = 0;k < dim;k++){
-            ret += a[k] * b[k];
-        }
-        return ret;
-    }
+    KOKKOS_INLINE_FUNCTION Vector<T, Dim> max(const Vector<T, Dim>& a, const Vector<T, Dim>& b);
 }  // namespace ippl
 
-//#include "Vector.hpp"
+#include "Vector.hpp"
 
 #endif
diff --git a/src/Types/Vector.hpp b/src/Types/Vector.hpp
index aa9a6168b..8723ed431 100644
--- a/src/Types/Vector.hpp
+++ b/src/Types/Vector.hpp
@@ -187,6 +187,15 @@ namespace ippl {
         }
         return res;
     }
+    template <typename T, unsigned Dim>
+    KOKKOS_INLINE_FUNCTION Vector<T, 3> Vector<T, Dim>::cross(const Vector<T, 3>& rhs) const {
+        
+        Vector<T, 3> ret;
+        ret[0] = data_m[1] * rhs[2] - data_m[2] * rhs[1];
+        ret[1] = data_m[2] * rhs[0] - data_m[0] * rhs[2];
+        ret[2] = data_m[0] * rhs[1] - data_m[1] * rhs[0];
+        return ret;
+    }
 
     template <typename T, unsigned Dim>
     inline std::ostream& operator<<(std::ostream& out, const Vector<T, Dim>& v) {
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 223bf1bd2..58faf4632 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -1434,7 +1434,7 @@ struct second_order_abc_corner{
         constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1);
         constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1);
         using ippl::apply;
-        constexpr ippl::Vector<uint32_t, 3> offsets[8] = {
+        const ippl::Vector<uint32_t, 3> offsets[8] = {
             ippl::Vector<uint32_t, 3>{0,0,0},
             ippl::Vector<uint32_t, 3>{xoff,0,0},
             ippl::Vector<uint32_t, 3>{0,yoff,0},

From b45bd97ac2e4610a55a3bfd519ec5106222b531b Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Mon, 22 Apr 2024 12:35:32 +0200
Subject: [PATCH 14/32] Gauss and ampere

---
 CMakeLists.txt                 |   2 +-
 test/solver/TestFDTDSolver.cpp | 160 +++++++++++++++++++++++++++++----
 2 files changed, 142 insertions(+), 20 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 058bfab5f..b2a799e6b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,7 +46,7 @@ file(
 )
 file(
     DOWNLOAD
-    https://raw.githubusercontent.com/nothings/stb/master/stb_image_write.h
+    https://raw.githubusercontent.com/manuel5975p/stb/master/stb_image_write.h
     "${CMAKE_BINARY_DIR}/downloaded_headers/stb_image_write.hpp"
 )
 include_directories("${CMAKE_BINARY_DIR}/downloaded_headers/")
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 58faf4632..433f16ec1 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -1121,6 +1121,7 @@ template<typename view_type, typename scalar>
 KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
     ipos -= ldom.first();
+    //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
     if(
         ipos[0] < 0
         ||ipos[1] < 0
@@ -1751,8 +1752,11 @@ namespace ippl {
 
         bool periodic_bc;
         
-        //Fields
         using VField_t = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using EBField_t = ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using view_type = VField_t::view_type;
+        using ev_view_type = EBField_t::view_type;
+        //Fields
         ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_np1;
         ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_n;
         ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_nm1;
@@ -1800,8 +1804,8 @@ namespace ippl {
                 uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1),
                 uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1)
             };
-            std::cout << "NR_M_g: " << nr_global << "\n";
-            std::cout << "NR_M_l: " << nr_local << "\n";
+            //std::cout << "NR_M_g: " << nr_global << "\n";
+            //std::cout << "NR_M_l: " << nr_local << "\n";
             dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2]));
             particles.create(nparticles);
             setNoBoundaryConditions();
@@ -1859,7 +1863,7 @@ namespace ippl {
                 Vector_t to = rview(i);
                 Vector_t from = rm1view(i);
                 if(space_charge){
-                    scatterToGrid(lDom, Jview,hr, orig, pos, qview(i) / volume);
+                    scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
                 }
                 scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
             });
@@ -2040,12 +2044,39 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
 
     return true;
 }
+template<typename View, typename T, unsigned Dim>
+KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector<T, Dim>& pos, const ippl::Vector<T, 3>& origin, const ippl::Vector<T, 3>& hr, const ippl::NDIndex<3>& lDom){
+    using vector_type = ippl::Vector<T, 3>;
+
+    vector_type l;
+    //vector_type origin = v.get_mesh().getOrigin();
+    //auto lDom = v.getLayout().getLocalNDIndex();
+    //vector_type hr = v.get_mesh().getMeshSpacing();
+    for(unsigned k = 0;k < 3;k++){
+        l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong
+    }                     
+
+    ippl::Vector<int, 3> index{int(l[0]), int(l[1]), int(l[2])};
+    ippl::Vector<T, 3> whi = l - index;
+    ippl::Vector<T, 3> wlo(1.0);
+    wlo -= whi;
+    //TODO: nghost
+    ippl::Vector<size_t, 3> args = index - lDom.first() + 1;
+    for(unsigned k = 0;k < 3;k++){
+        if(args[k] >= v.extent(k) || args[k] == 0){
+            return typename View::value_type(0);
+        }
+    }
+    //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n";
+    return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/;
+
+}
 template<typename scalar>
 scalar test_gauss_law(uint32_t n){
 
-    ippl::NDIndex<3> owned(n / 2, n / 2, n);
-    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, n};
-    ippl::Vector<scalar, 3> extents{1,1,1};
+    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
+    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths};
     std::array<bool, 3> isParallel;
     isParallel.fill(false);
     isParallel[2] = true;
@@ -2055,38 +2086,127 @@ scalar test_gauss_law(uint32_t n){
 
         //[-1, 1] box
     ippl::Vector<scalar, 3> hx     = extents / nr.cast<scalar>();
-    ippl::Vector<scalar, 3> origin = {scalar(-0.5), scalar(-0.5), scalar(-0.5)};
+    using vector_type = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
     ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
-    ippl::FDTDFieldState<scalar> field_state(layout, mesh, (1 << 20), config{});
-    field_state.particles.Q = scalar(coulomb_in_unit_charges) / (1 << 20);
-    field_state.particles.mass = scalar(1.0) / (1 << 20); //Irrelefant
+
+    uint32_t pcount = 1 << 20;
+    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, config{.space_charge = true});
+    field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
     auto pview = field_state.particles.R.getView();
     auto p1view = field_state.particles.R_nm1.getView();
 
     //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
     Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
-    scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
+    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
+    
+    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
+        //bunch.gammaBeta[i].fill(scalar(0));
+        auto state = random_pool.get_state();
+        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        p1view(i) = pview(i);
+        random_pool.free_state(state);
+    });
+    Kokkos::fence();
+    field_state.J = scalar(0.0);
+    field_state.scatterBunch();
+    for(size_t i = 0;i < 8*n;i++){
+        field_state.fieldStep();
+    }
+    field_state.evaluate_EB();
+    Kokkos::fence();
+    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
+    
+    std::ofstream line("gauss_line.txt");
+    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
+    for(unsigned i = 1;i < nr[2];i++){
+        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
+        pos[2] += hx[2] * scalar(i);
+        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
+        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
+    }
+    return 0.0f;
+}
+template<typename scalar>
+scalar test_amperes_law(uint32_t n){
+
+    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
+    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths};
+    std::array<bool, 3> isParallel;
+    isParallel.fill(false);
+    isParallel[2] = true;
+        
+        // all parallel layout, standard domain, normal axis order
+    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        //[-1, 1] box
+    ippl::Vector<scalar, 3> hx;
+    for(unsigned d = 0;d < 3;d++){
+        hx[d] = extents[d] / (scalar)nr[d];
+    }
+    using vector_type = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
+    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+
+    uint32_t pcount = 1 << 20;
+    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, config{.space_charge = true});
+    field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
+    auto pview = field_state.particles.R.getView();
+    auto p1view = field_state.particles.R_nm1.getView();
+    constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
+    scalar timestep = field_state.dt;
+    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
+    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
+    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
     
-    Kokkos::parallel_for((1 << 20), KOKKOS_LAMBDA(size_t i){
+    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
         //bunch.gammaBeta[i].fill(scalar(0));
         auto state = random_pool.get_state();
-        pview(i)[0] = state.normal(0.01 * meter_in_unit_lengths);
-        pview(i)[1] = state.normal(0.01 * meter_in_unit_lengths);
-        pview(i)[2] = state.normal(0.01 * meter_in_unit_lengths);
+        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1);
         p1view(i) = pview(i);
+        p1view(i)[1] -= vy * timestep;
         random_pool.free_state(state);
     });
     Kokkos::fence();
+    field_state.J = scalar(0.0);
     field_state.scatterBunch();
     for(size_t i = 0;i < 8*n;i++){
         field_state.fieldStep();
     }
+    field_state.evaluate_EB();
+    Kokkos::fence();
+    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
+    
+    std::ofstream line("ampere_line.txt");
+    
+    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
+    for(unsigned i = 1;i < nr[2];i++){
+        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
+        pos[2] += hx[2] * scalar(i);
+        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
+        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n";
+    }
+    return 0.0f;
 }
 int main(int argc, char* argv[]) {
-    using scalar = float;
+    using scalar = double;
     ippl::initialize(argc, argv);
     {
-
+        
+        test_gauss_law<scalar>(64);
+        test_amperes_law<scalar>(64);
+        goto exit;
         config cfg = read_config("../config.json");
         const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
         cfg.extents[2] *= frame_gamma;
@@ -2302,7 +2422,8 @@ int main(int argc, char* argv[]) {
                     
                     snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i);
                     std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
-                    writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final);
+                    if(ffmpeg_file != nullptr)
+                        writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final);
                 }
                 delete[] imagedata;
                 delete[] idata_recvbuffer;
@@ -2312,5 +2433,6 @@ int main(int argc, char* argv[]) {
         uint64_t endtime = nanoTime();
         std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
     }
+    exit:
     ippl::finalize();
 }
\ No newline at end of file

From d8f7568a2266202d4a0e54d49c25a939505310d8 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Mon, 22 Apr 2024 12:51:55 +0200
Subject: [PATCH 15/32] Add plotscripts

---
 plotscripts/ampereplot.gp | 13 +++++++++++++
 plotscripts/gaussplot.gp  | 13 +++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100755 plotscripts/ampereplot.gp
 create mode 100755 plotscripts/gaussplot.gp

diff --git a/plotscripts/ampereplot.gp b/plotscripts/ampereplot.gp
new file mode 100755
index 000000000..495a05cbd
--- /dev/null
+++ b/plotscripts/ampereplot.gp
@@ -0,0 +1,13 @@
+#! /usr/bin/gnuplot
+set terminal svg size 800,600 font "CMU Serif,22"
+set logscale x 2
+set logscale y 2
+set key box
+set output "ampere.svg"
+set ylabel "Magnetic Flux density[T]"
+set xlabel "Distance to wire [m]"
+#set title "Ampere's Law"
+set grid lw 2
+set ytics format "2^{%L}"
+set xtics format "2^{%L}"
+plot 'ampere_line.txt' u (abs($1)):(abs($2)) w points title "Measurements", 1.256e-6/(2*3.14159)*(1/x) w lines lw 2 title "Radial falloff"
diff --git a/plotscripts/gaussplot.gp b/plotscripts/gaussplot.gp
new file mode 100755
index 000000000..879584114
--- /dev/null
+++ b/plotscripts/gaussplot.gp
@@ -0,0 +1,13 @@
+#! /usr/bin/gnuplot
+set terminal svg size 900,700 font "CMU Serif, 25"
+set logscale x 2
+set logscale y 2
+set key box
+set output "gauss.svg"
+set ylabel "Electric Field [V/m]"
+set xlabel "Distance to charge [m]"
+#set title "Gauss's Law"
+set grid lw 2
+set ytics format "2^{%L}"
+set xtics format "2^{%L}"
+plot 'gauss_line.txt' u (abs($1)):(abs($2)) w points title "Measurements" pt 2, (1 / (4 * 3.1416 * 8.854e-12 * x * x)) w lines lw 3 title "Quadratic falloff"

From 46ff0d9eaf1666917b6464a25e27509539907b7d Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Mon, 29 Apr 2024 21:37:51 +0200
Subject: [PATCH 16/32] Some things

---
 alpine/CMakeLists.txt          |    3 +
 alpine/FreeElectronLaser.cpp   | 2442 +++++++++++++++++++++++++++++++
 src/CMakeLists.txt             |    2 +-
 src/MaxwellSolvers/FDTD.h      |  340 ++++-
 src/MaxwellSolvers/Maxwell.h   |   21 +-
 test/solver/TestFDTDSolver.cpp | 2507 ++------------------------------
 6 files changed, 2894 insertions(+), 2421 deletions(-)
 create mode 100644 alpine/FreeElectronLaser.cpp

diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt
index c045305a8..e2711a472 100644
--- a/alpine/CMakeLists.txt
+++ b/alpine/CMakeLists.txt
@@ -21,6 +21,9 @@ target_link_libraries (PenningTrap ${IPPL_LIBS})
 add_executable (LandauDamping LandauDamping.cpp)
 target_link_libraries (LandauDamping ${IPPL_LIBS})
 
+add_executable (FreeElectronLaser FreeElectronLaser.cpp)
+target_link_libraries (FreeElectronLaser ${IPPL_LIBS})
+
 add_executable (BumponTailInstability BumponTailInstability.cpp)
 target_link_libraries (BumponTailInstability ${IPPL_LIBS})
 
diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
new file mode 100644
index 000000000..d15dd6e76
--- /dev/null
+++ b/alpine/FreeElectronLaser.cpp
@@ -0,0 +1,2442 @@
+#include "Ippl.h"
+
+#include "Types/Vector.h"
+
+#include "Field/Field.h"
+#include "MaxwellSolvers/FDTD.h"
+#include <chrono>
+#include <cstdio> // For popen
+#define JSON_HAS_RANGES 0 //Merlin compilation
+#include <json.hpp>
+#include <fstream>
+#include <list>
+#include <Kokkos_Random.hpp>
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <stb_image_write.hpp>
+
+
+constexpr float turbo_cm[256][3] = {
+  {0.18995,0.07176,0.23217},
+  {0.19483,0.08339,0.26149},
+  {0.19956,0.09498,0.29024},
+  {0.20415,0.10652,0.31844},
+  {0.20860,0.11802,0.34607},
+  {0.21291,0.12947,0.37314},
+  {0.21708,0.14087,0.39964},
+  {0.22111,0.15223,0.42558},
+  {0.22500,0.16354,0.45096},
+  {0.22875,0.17481,0.47578},
+  {0.23236,0.18603,0.50004},
+  {0.23582,0.19720,0.52373},
+  {0.23915,0.20833,0.54686},
+  {0.24234,0.21941,0.56942},
+  {0.24539,0.23044,0.59142},
+  {0.24830,0.24143,0.61286},
+  {0.25107,0.25237,0.63374},
+  {0.25369,0.26327,0.65406},
+  {0.25618,0.27412,0.67381},
+  {0.25853,0.28492,0.69300},
+  {0.26074,0.29568,0.71162},
+  {0.26280,0.30639,0.72968},
+  {0.26473,0.31706,0.74718},
+  {0.26652,0.32768,0.76412},
+  {0.26816,0.33825,0.78050},
+  {0.26967,0.34878,0.79631},
+  {0.27103,0.35926,0.81156},
+  {0.27226,0.36970,0.82624},
+  {0.27334,0.38008,0.84037},
+  {0.27429,0.39043,0.85393},
+  {0.27509,0.40072,0.86692},
+  {0.27576,0.41097,0.87936},
+  {0.27628,0.42118,0.89123},
+  {0.27667,0.43134,0.90254},
+  {0.27691,0.44145,0.91328},
+  {0.27701,0.45152,0.92347},
+  {0.27698,0.46153,0.93309},
+  {0.27680,0.47151,0.94214},
+  {0.27648,0.48144,0.95064},
+  {0.27603,0.49132,0.95857},
+  {0.27543,0.50115,0.96594},
+  {0.27469,0.51094,0.97275},
+  {0.27381,0.52069,0.97899},
+  {0.27273,0.53040,0.98461},
+  {0.27106,0.54015,0.98930},
+  {0.26878,0.54995,0.99303},
+  {0.26592,0.55979,0.99583},
+  {0.26252,0.56967,0.99773},
+  {0.25862,0.57958,0.99876},
+  {0.25425,0.58950,0.99896},
+  {0.24946,0.59943,0.99835},
+  {0.24427,0.60937,0.99697},
+  {0.23874,0.61931,0.99485},
+  {0.23288,0.62923,0.99202},
+  {0.22676,0.63913,0.98851},
+  {0.22039,0.64901,0.98436},
+  {0.21382,0.65886,0.97959},
+  {0.20708,0.66866,0.97423},
+  {0.20021,0.67842,0.96833},
+  {0.19326,0.68812,0.96190},
+  {0.18625,0.69775,0.95498},
+  {0.17923,0.70732,0.94761},
+  {0.17223,0.71680,0.93981},
+  {0.16529,0.72620,0.93161},
+  {0.15844,0.73551,0.92305},
+  {0.15173,0.74472,0.91416},
+  {0.14519,0.75381,0.90496},
+  {0.13886,0.76279,0.89550},
+  {0.13278,0.77165,0.88580},
+  {0.12698,0.78037,0.87590},
+  {0.12151,0.78896,0.86581},
+  {0.11639,0.79740,0.85559},
+  {0.11167,0.80569,0.84525},
+  {0.10738,0.81381,0.83484},
+  {0.10357,0.82177,0.82437},
+  {0.10026,0.82955,0.81389},
+  {0.09750,0.83714,0.80342},
+  {0.09532,0.84455,0.79299},
+  {0.09377,0.85175,0.78264},
+  {0.09287,0.85875,0.77240},
+  {0.09267,0.86554,0.76230},
+  {0.09320,0.87211,0.75237},
+  {0.09451,0.87844,0.74265},
+  {0.09662,0.88454,0.73316},
+  {0.09958,0.89040,0.72393},
+  {0.10342,0.89600,0.71500},
+  {0.10815,0.90142,0.70599},
+  {0.11374,0.90673,0.69651},
+  {0.12014,0.91193,0.68660},
+  {0.12733,0.91701,0.67627},
+  {0.13526,0.92197,0.66556},
+  {0.14391,0.92680,0.65448},
+  {0.15323,0.93151,0.64308},
+  {0.16319,0.93609,0.63137},
+  {0.17377,0.94053,0.61938},
+  {0.18491,0.94484,0.60713},
+  {0.19659,0.94901,0.59466},
+  {0.20877,0.95304,0.58199},
+  {0.22142,0.95692,0.56914},
+  {0.23449,0.96065,0.55614},
+  {0.24797,0.96423,0.54303},
+  {0.26180,0.96765,0.52981},
+  {0.27597,0.97092,0.51653},
+  {0.29042,0.97403,0.50321},
+  {0.30513,0.97697,0.48987},
+  {0.32006,0.97974,0.47654},
+  {0.33517,0.98234,0.46325},
+  {0.35043,0.98477,0.45002},
+  {0.36581,0.98702,0.43688},
+  {0.38127,0.98909,0.42386},
+  {0.39678,0.99098,0.41098},
+  {0.41229,0.99268,0.39826},
+  {0.42778,0.99419,0.38575},
+  {0.44321,0.99551,0.37345},
+  {0.45854,0.99663,0.36140},
+  {0.47375,0.99755,0.34963},
+  {0.48879,0.99828,0.33816},
+  {0.50362,0.99879,0.32701},
+  {0.51822,0.99910,0.31622},
+  {0.53255,0.99919,0.30581},
+  {0.54658,0.99907,0.29581},
+  {0.56026,0.99873,0.28623},
+  {0.57357,0.99817,0.27712},
+  {0.58646,0.99739,0.26849},
+  {0.59891,0.99638,0.26038},
+  {0.61088,0.99514,0.25280},
+  {0.62233,0.99366,0.24579},
+  {0.63323,0.99195,0.23937},
+  {0.64362,0.98999,0.23356},
+  {0.65394,0.98775,0.22835},
+  {0.66428,0.98524,0.22370},
+  {0.67462,0.98246,0.21960},
+  {0.68494,0.97941,0.21602},
+  {0.69525,0.97610,0.21294},
+  {0.70553,0.97255,0.21032},
+  {0.71577,0.96875,0.20815},
+  {0.72596,0.96470,0.20640},
+  {0.73610,0.96043,0.20504},
+  {0.74617,0.95593,0.20406},
+  {0.75617,0.95121,0.20343},
+  {0.76608,0.94627,0.20311},
+  {0.77591,0.94113,0.20310},
+  {0.78563,0.93579,0.20336},
+  {0.79524,0.93025,0.20386},
+  {0.80473,0.92452,0.20459},
+  {0.81410,0.91861,0.20552},
+  {0.82333,0.91253,0.20663},
+  {0.83241,0.90627,0.20788},
+  {0.84133,0.89986,0.20926},
+  {0.85010,0.89328,0.21074},
+  {0.85868,0.88655,0.21230},
+  {0.86709,0.87968,0.21391},
+  {0.87530,0.87267,0.21555},
+  {0.88331,0.86553,0.21719},
+  {0.89112,0.85826,0.21880},
+  {0.89870,0.85087,0.22038},
+  {0.90605,0.84337,0.22188},
+  {0.91317,0.83576,0.22328},
+  {0.92004,0.82806,0.22456},
+  {0.92666,0.82025,0.22570},
+  {0.93301,0.81236,0.22667},
+  {0.93909,0.80439,0.22744},
+  {0.94489,0.79634,0.22800},
+  {0.95039,0.78823,0.22831},
+  {0.95560,0.78005,0.22836},
+  {0.96049,0.77181,0.22811},
+  {0.96507,0.76352,0.22754},
+  {0.96931,0.75519,0.22663},
+  {0.97323,0.74682,0.22536},
+  {0.97679,0.73842,0.22369},
+  {0.98000,0.73000,0.22161},
+  {0.98289,0.72140,0.21918},
+  {0.98549,0.71250,0.21650},
+  {0.98781,0.70330,0.21358},
+  {0.98986,0.69382,0.21043},
+  {0.99163,0.68408,0.20706},
+  {0.99314,0.67408,0.20348},
+  {0.99438,0.66386,0.19971},
+  {0.99535,0.65341,0.19577},
+  {0.99607,0.64277,0.19165},
+  {0.99654,0.63193,0.18738},
+  {0.99675,0.62093,0.18297},
+  {0.99672,0.60977,0.17842},
+  {0.99644,0.59846,0.17376},
+  {0.99593,0.58703,0.16899},
+  {0.99517,0.57549,0.16412},
+  {0.99419,0.56386,0.15918},
+  {0.99297,0.55214,0.15417},
+  {0.99153,0.54036,0.14910},
+  {0.98987,0.52854,0.14398},
+  {0.98799,0.51667,0.13883},
+  {0.98590,0.50479,0.13367},
+  {0.98360,0.49291,0.12849},
+  {0.98108,0.48104,0.12332},
+  {0.97837,0.46920,0.11817},
+  {0.97545,0.45740,0.11305},
+  {0.97234,0.44565,0.10797},
+  {0.96904,0.43399,0.10294},
+  {0.96555,0.42241,0.09798},
+  {0.96187,0.41093,0.09310},
+  {0.95801,0.39958,0.08831},
+  {0.95398,0.38836,0.08362},
+  {0.94977,0.37729,0.07905},
+  {0.94538,0.36638,0.07461},
+  {0.94084,0.35566,0.07031},
+  {0.93612,0.34513,0.06616},
+  {0.93125,0.33482,0.06218},
+  {0.92623,0.32473,0.05837},
+  {0.92105,0.31489,0.05475},
+  {0.91572,0.30530,0.05134},
+  {0.91024,0.29599,0.04814},
+  {0.90463,0.28696,0.04516},
+  {0.89888,0.27824,0.04243},
+  {0.89298,0.26981,0.03993},
+  {0.88691,0.26152,0.03753},
+  {0.88066,0.25334,0.03521},
+  {0.87422,0.24526,0.03297},
+  {0.86760,0.23730,0.03082},
+  {0.86079,0.22945,0.02875},
+  {0.85380,0.22170,0.02677},
+  {0.84662,0.21407,0.02487},
+  {0.83926,0.20654,0.02305},
+  {0.83172,0.19912,0.02131},
+  {0.82399,0.19182,0.01966},
+  {0.81608,0.18462,0.01809},
+  {0.80799,0.17753,0.01660},
+  {0.79971,0.17055,0.01520},
+  {0.79125,0.16368,0.01387},
+  {0.78260,0.15693,0.01264},
+  {0.77377,0.15028,0.01148},
+  {0.76476,0.14374,0.01041},
+  {0.75556,0.13731,0.00942},
+  {0.74617,0.13098,0.00851},
+  {0.73661,0.12477,0.00769},
+  {0.72686,0.11867,0.00695},
+  {0.71692,0.11268,0.00629},
+  {0.70680,0.10680,0.00571},
+  {0.69650,0.10102,0.00522},
+  {0.68602,0.09536,0.00481},
+  {0.67535,0.08980,0.00449},
+  {0.66449,0.08436,0.00424},
+  {0.65345,0.07902,0.00408},
+  {0.64223,0.07380,0.00401},
+  {0.63082,0.06868,0.00401},
+  {0.61923,0.06367,0.00410},
+  {0.60746,0.05878,0.00427},
+  {0.59550,0.05399,0.00453},
+  {0.58336,0.04931,0.00486},
+  {0.57103,0.04474,0.00529},
+  {0.55852,0.04028,0.00579},
+  {0.54583,0.03593,0.00638},
+  {0.53295,0.03169,0.00705},
+  {0.51989,0.02756,0.00780},
+  {0.50664,0.02354,0.00863},
+  {0.49321,0.01963,0.00955},
+  {0.47960,0.01583,0.01055}
+};
+
+
+
+
+uint64_t nanoTime(){
+    using namespace std;
+    using namespace chrono;
+    return duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count();
+}
+//CONFIG
+KOKKOS_INLINE_FUNCTION bool isNaN(float x){
+    #ifdef __CUDA_ARCH__
+    return isnan(x);
+    #else
+    return std::isnan(x);
+    #endif
+}
+KOKKOS_INLINE_FUNCTION bool isINF(float x){
+    #ifdef __CUDA_ARCH__
+    return isinf(x);
+    #else
+    return std::isinf(x);
+    #endif
+}
+KOKKOS_INLINE_FUNCTION bool isNaN(double x){
+    #ifdef __CUDA_ARCH__
+    return isnan(x);
+    #else
+    return std::isnan(x);
+    #endif
+}
+KOKKOS_INLINE_FUNCTION bool isINF(double x){
+    #ifdef __CUDA_ARCH__
+    return isinf(x);
+    #else
+    return std::isinf(x);
+    #endif
+}
+#define assert_isreal(X) assert(!isNaN(X) && !isINF(X))
+template<unsigned int Dim, typename callable, typename... Ts>
+KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> from, ippl::Vector<uint32_t, Dim> to, Ts... args){
+    if constexpr(sizeof...(Ts) == Dim){
+        c(args...);
+    }
+    else{
+        for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){
+            serial_for(c, from, to, args..., i);
+        }
+    }
+}
+
+
+
+
+
+struct config {
+    using scalar = double;
+
+    //using length_unit = funits::length<scalar, funits::planck_base>;
+    //using duration_unit = funits::time<scalar, funits::planck_base>;
+    // GRID PARAMETERS
+    ippl::Vector<uint32_t, 3> resolution;
+
+    ippl::Vector<scalar, 3> extents;
+    scalar total_time;
+    scalar timestep_ratio;
+
+    scalar length_scale_in_jobfile, temporal_scale_in_jobfile;
+
+        // All in unit_charge, or unit_mass
+    scalar charge, mass;
+
+    uint64_t num_particles;
+    bool space_charge;
+
+        // BUNCH PARAMETERS
+    ippl::Vector<scalar, 3> mean_position;
+    ippl::Vector<scalar, 3> sigma_position;
+    ippl::Vector<scalar, 3> position_truncations;
+    ippl::Vector<scalar, 3> sigma_momentum;
+    scalar bunch_gamma;
+
+    scalar undulator_K;
+    scalar undulator_period;
+    scalar undulator_length;
+
+    uint32_t output_rhythm;
+    std::string output_path;
+    std::unordered_map<std::string, double> experiment_options;
+};
+template<typename scalar, unsigned Dim>
+ippl::Vector<scalar, Dim> getVector(const nlohmann::json& j){
+    if(j.is_array()){
+        assert(j.size() == Dim);
+        ippl::Vector<scalar, Dim> ret;
+        for(unsigned i = 0;i < Dim;i++)
+            ret[i] = (scalar)j[i];
+        return ret;
+    }
+    else{
+        std::cerr << "Warning: Obtaining Vector from scalar json\n";
+        ippl::Vector<scalar, Dim> ret;
+        ret.fill((scalar)j);
+        return ret;
+    }
+}
+template<size_t N, typename T>
+struct DefaultedStringLiteral {
+    constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) {
+        std::copy_n(str, N, key);
+    }
+    
+    T value;
+    char key[N];
+};
+template<size_t N>
+struct StringLiteral {
+    constexpr StringLiteral(const char (&str)[N]) {
+        std::copy_n(str, N, value);
+    }
+    
+    char value[N];
+    constexpr DefaultedStringLiteral<N, int> operator>>(int t)const noexcept{
+        return DefaultedStringLiteral<N, int>(value, t);
+    }
+    constexpr size_t size()const noexcept{return N - 1;}
+};
+template<StringLiteral lit>
+constexpr size_t chash(){
+    size_t hash = 5381;
+    int c;
+
+    for(size_t i = 0;i < lit.size();i++){
+        c = lit.value[i];
+        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    }
+
+    return hash;
+}
+size_t chash(const char* val) {
+    size_t hash = 5381;
+    int c;
+
+    while ((c = *val++)) {
+        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    }
+
+    return hash;
+}
+size_t chash(const std::string& _val) {
+    size_t hash = 5381;
+    const char* val = _val.c_str();
+    int c;
+
+    while ((c = *val++)) {
+        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    }
+
+    return hash;
+}
+std::string lowercase_singular(std::string str) {
+    // Convert string to lowercase
+    std::transform(str.begin(), str.end(), str.begin(), ::tolower);
+
+    // Check if the string ends with "s" and remove it if it does
+    if (!str.empty() && str.back() == 's') {
+        str.pop_back();
+    }
+
+    return str;
+}
+double get_time_multiplier(const nlohmann::json& j){
+    std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]);
+    double time_factor = 1.0;
+    switch (chash(length_scale_string)) {
+        case chash<"planck-time">():
+        case chash<"plancktime">():
+        case chash<"pt">():
+        case chash<"natural">():
+            time_factor = unit_time_in_seconds;
+        break;
+        case chash<"picosecond">():
+            time_factor = 1e-12;
+        break;
+        case chash<"nanosecond">():
+            time_factor = 1e-9;
+        break;
+        case chash<"microsecond">():
+            time_factor = 1e-6;
+        break;
+        case chash<"millisecond">():
+            time_factor = 1e-3;
+        break;
+        case chash<"second">():
+            time_factor = 1.0;
+        break;
+        default:
+            std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n";
+        break;
+    }
+    return time_factor;
+}
+double get_length_multiplier(const nlohmann::json& options){
+    std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]);
+    double length_factor = 1.0;
+    switch (chash(length_scale_string)) {
+        case chash<"planck-length">():
+        case chash<"plancklength">():
+        case chash<"pl">():
+        case chash<"natural">():
+            length_factor = unit_length_in_meters;
+        break;
+        case chash<"picometer">():
+            length_factor = 1e-12;
+        break;
+        case chash<"nanometer">():
+            length_factor = 1e-9;
+        break;
+        case chash<"micrometer">():
+            length_factor = 1e-6;
+        break;
+        case chash<"millimeter">():
+            length_factor = 1e-3;
+        break;
+        case chash<"meter">():
+            length_factor = 1.0;
+        break;
+        default:
+            std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n";
+        break;
+    }
+    return length_factor;
+}
+config read_config(const char *filepath){
+    std::ifstream cfile(filepath);
+    nlohmann::json j;
+    cfile >> j;
+    config::scalar lmult = get_length_multiplier(j);
+    config::scalar tmult = get_time_multiplier(j);
+    config ret;
+
+    ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters;
+    ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters;
+    ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters;
+    ret.resolution = getVector<uint32_t, 3>(j["mesh"]["resolution"]);
+
+    //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n";
+    //std::cerr << "Tmult: " << tmult << "\n";
+    if(j.contains("timestep-ratio")){
+        ret.timestep_ratio = (config::scalar)j["timestep-ratio"];
+    }
+
+    else{
+        ret.timestep_ratio = 1;
+    }
+    ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds;
+    ret.space_charge = (bool)(j["mesh"]["space-charge"]);
+    ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]);
+    if(ret.bunch_gamma < config::scalar(1)){
+        std::cerr << "Gamma must be >= 1\n";
+        exit(1);
+    }
+    assert(j.contains("undulator"));
+    assert(j["undulator"].contains("static-undulator"));
+
+    ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"];
+    ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters;
+    ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters;
+    assert(!std::isnan(ret.undulator_length));
+    assert(!std::isnan(ret.undulator_period));
+    assert(!std::isnan(ret.extents[0]));
+    assert(!std::isnan(ret.extents[1]));
+    assert(!std::isnan(ret.extents[2]));
+    assert(!std::isnan(ret.total_time));
+    ret.length_scale_in_jobfile = get_length_multiplier(j);
+    ret.temporal_scale_in_jobfile = get_time_multiplier(j);
+    ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges;
+    ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses;
+    ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"];
+    ret.mean_position  = getVector<config::scalar, 3>(j["bunch"]["position"])                       * lmult / unit_length_in_meters;
+    ret.sigma_position = getVector<config::scalar, 3>(j["bunch"]["sigma-position"])                 * lmult / unit_length_in_meters;
+    ret.position_truncations = getVector<config::scalar, 3>(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters;
+    ret.sigma_momentum = getVector<config::scalar, 3>(j["bunch"]["sigma-momentum"]);
+    ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0;
+    ret.output_path = "../data/";
+    if(j["output"].contains("path")){
+        ret.output_path = j["output"]["path"];
+        if(!ret.output_path.ends_with('/')){
+            ret.output_path.push_back('/');
+        }
+    }
+    if(j.contains("experimentation")){
+        nlohmann::json je = j["experimentation"];
+        for(auto it = je.begin(); it!= je.end();it++){
+            ret.experiment_options[it.key()] = double(it.value());
+        }
+    }
+    return ret;
+}
+template<typename scalar>
+using FieldVector = ippl::Vector<scalar, 3>;
+template<typename scalar>
+struct BunchInitialize {
+
+    /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is
+     * manual the charge at points of the position vector will be produced.    				*/
+    // std::string     			bunchType_;
+
+    /* Type of the distributions (transverse or longitudinal) in the bunch.				*/
+    std::string distribution_;
+
+    /* Type of the generator for creating the bunch distribution.					*/
+    std::string generator_;
+
+    /* Total number of macroparticles in the bunch.                                                     */
+    unsigned int numberOfParticles_;
+
+    /* Total charge of the bunch in pC.                                                                 */
+    scalar cloudCharge_;
+
+    /* Initial energy of the bunch in MeV.                                                              */
+    scalar initialGamma_;
+
+    /* Initial normalized speed of the bunch.                                                           */
+    scalar initialBeta_;
+
+    /* Initial movement direction of the bunch, which is a unit vector.                                 */
+    FieldVector<scalar> initialDirection_;
+
+    /* Position of the center of the bunch in the unit of length scale.                           	*/
+    // std::vector<FieldVector<scalar> >	position_;
+    FieldVector<scalar> position_;
+
+    /* Number of macroparticles in each direction for 3Dcrystal type.                                   */
+    FieldVector<unsigned int> numbers_;
+
+    /* Lattice constant in x, y, and z directions for 3D crystal type.                                  */
+    FieldVector<scalar> latticeConstants_;
+
+    /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal
+     * type, it will be the spread in position for each micro-bunch of the crystal.			*/
+    FieldVector<scalar> sigmaPosition_;
+
+    /* Spread in energy in each direction.                                                              */
+    FieldVector<scalar> sigmaGammaBeta_;
+
+    /* Store the truncation transverse distance for the electron generation.				*/
+    scalar tranTrun_;
+
+    /* Store the truncation longitudinal distance for the electron generation.				*/
+    scalar longTrun_;
+
+    /* Name of the file for reading the electrons distribution from.					*/
+    std::string fileName_;
+
+    /* The radiation wavelength corresponding to the bunch length outside the undulator			*/
+    scalar lambda_;
+
+    /* Bunching factor for the initialization of the bunch.						*/
+    scalar bF_;
+
+    /* Phase of the bunching factor for the initialization of the bunch.				*/
+    scalar bFP_;
+
+    /* Boolean flag determining the activation of shot-noise.						*/
+    bool shotNoise_;
+
+    /* Initial beta vector of the bunch, which is obtained as the product of beta and direction.	*/
+    FieldVector<scalar> betaVector_;
+
+    /* Initialize the parameters for the bunch initialization to some first values.                     */
+    // BunchInitialize ();
+};
+
+
+
+
+
+//END CONFIG
+
+//LORENTZ FRAME AND UNDULATOR
+template<typename T, unsigned axis = 2>
+struct UniaxialLorentzframe{
+    constexpr static T c = 1.0;
+    using scalar = T;
+    using Vector3 = ippl::Vector<T, 3>;
+    scalar beta_m;
+    scalar gammaBeta_m;
+    scalar gamma_m;
+    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){
+        gammaBeta_m = gammaBeta;
+        beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta));
+        gamma_m = sqrt(1 + gammaBeta * (gammaBeta));
+    }
+    KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{
+        arg[axis] = gamma_m * (arg[axis] + beta_m * time); 
+    }
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB)const noexcept{
+        
+        Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
+        ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
+        ret.first  = ippl::Vector<T, 3>(unprimedEB.first  + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
+        ret.second = ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
+        ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
+        ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
+        return ret;
+    }
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> inverse_transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB)const noexcept{
+        return UniaxialLorentzframe<T, 2>(-gammaBeta_m).transform_EB(primedEB);
+    }
+};
+template<typename scalar>
+BunchInitialize<scalar> generate_mithra_config(const config& cfg, const UniaxialLorentzframe<scalar>& /*frame_boost unused*/) {
+    using vec3 = ippl::Vector<scalar, 3>;
+    scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K);
+    BunchInitialize<scalar> init;
+    init.generator_ = "random";
+    init.distribution_ = "uniform";
+    init.initialDirection_ = vec3{0, 0, 1};
+    init.initialGamma_ = cfg.bunch_gamma;
+    init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma);
+    init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast<scalar>();
+    init.sigmaPosition_ = cfg.sigma_position.template cast<scalar>();
+
+    // TODO: Initial bunching factor huh
+    init.bF_ = 0.01;
+    init.bFP_ = 0;
+    init.shotNoise_ = false;
+    init.cloudCharge_ = cfg.charge;
+    init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma);
+    init.longTrun_ = cfg.position_truncations[2];
+    init.tranTrun_ = cfg.position_truncations[0];
+    init.position_ = cfg.mean_position.cast<scalar>();
+    init.betaVector_ = ippl::Vector<scalar, 3>{0,0,init.initialBeta_};
+    init.numberOfParticles_ = cfg.num_particles;
+
+    init.numbers_ = 0;                      // UNUSED
+    init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED
+
+    return init;
+}
+template <typename Double>
+struct Charge {
+
+    Double q;                     /* Charge of the point in the unit of electron charge.	*/
+    FieldVector<Double> rnp, rnm; /* Position vector of the charge.			*/
+    FieldVector<Double> gb;       /* Normalized velocity vector of the charge.		*/
+
+    /* Double flag determining if the particle is passing the entrance point of the undulator. This flag
+     * can be used for better boosting the bunch to the moving frame. We need to consider it to be double,
+     * because this flag needs to be communicated during bunch update.					*/
+    Double e;
+
+    // Charge();
+};
+template <typename scalar>
+using ChargeVector = std::list<Charge<scalar>>;
+template<typename Double>
+void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<Double> & chargeVector, int rank, int size, int ia){
+    /* Correct the number of particles if it is not a multiple of four.					*/
+    if ( bunchInit.numberOfParticles_ % 4 != 0 )
+      {
+        unsigned int n = bunchInit.numberOfParticles_ % 4;
+        bunchInit.numberOfParticles_ += 4 - n;
+        //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") +
+        //    std::string("It is corrected to ") +  std::to_string(bunchInit.numberOfParticles_) );
+    }
+
+    /* Save the initially given number of particles.							*/
+    unsigned int	Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size();
+
+    /* Declare the required parameters for the initialization of charge vectors.                      	*/
+    Charge<Double>         	charge; charge.q  = bunchInit.cloudCharge_ / Np;
+    FieldVector<Double> gb = bunchInit.initialGamma_ * bunchInit.betaVector_;
+    FieldVector<Double> r  (0.0);
+    FieldVector<Double> t  (0.0);
+    Double            	t0, g;
+    Double		zmin = 1e100;
+    Double		Ne, bF, bFi;
+    unsigned int	bmi;
+    std::vector<Double>	randomNumbers;
+
+    /* The initialization in group of four particles should only be done if there exists an undulator in
+     * the interaction.											*/
+    unsigned int	ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4;
+
+    /* Check the bunching factor.                                                                     	*/
+    if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 )
+      {
+        //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") );
+        //exit(1);
+      }
+
+    /* If the generator is random we should make sure that different processors do not produce the same
+     * random numbers.											*/
+    if 	( bunchInit.generator_ == "random" )
+      {
+        /* Initialize the random number generator.								*/
+        srand ( time(NULL) );
+        /* Np / ng * 20 is the maximum number of particles.							*/
+        randomNumbers.resize( Np / ng * 20, 0.0);
+        for ( unsigned int ri = 0; ri < Np / ng * 20; ri++)
+          randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX));
+      }
+
+    /* Declare the generator function depending on the input.						*/
+    auto generate = [&] (unsigned int n, unsigned int m) {
+      //if 	( bunchInit.generator_ == "random" )
+        return  ( randomNumbers.at( n * 2 * Np/ng + m ) );
+      //else
+      //  return  ( randomNumbers[ n * 2 * Np/ng + m ] );
+    //TODO: Return halton properly
+        //return 	( halton(n,m) );
+    };
+
+    /* Declare the function for injecting the shot noise.						*/
+    auto insertCharge = [&] (Charge<Double> q) {
+
+      for ( unsigned int ii = 0; ii < ng; ii++ )
+        {
+          /* The random modulation is introduced depending on the shot-noise being activated.		*/
+          if ( bunchInit.shotNoise_ )
+            {
+              /* Obtain the number of beamlet.								*/
+              bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ );
+
+              /* Obtain the phase and amplitude of the modulation.					*/
+              bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) );
+
+              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+
+              q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) );
+            }
+          else if ( bunchInit.lambda_ != 0.0)
+            {
+              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+
+              q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 );
+            }
+
+          /* Set this charge into the charge vector.							*/
+          chargeVector.push_back(q);
+        }
+    };
+
+    /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to
+     * calculate the FEL bucket number.									*/
+    if ( bunchInit.shotNoise_ )
+      {
+        for (i = 0; i < Np / ng; i++)
+          {
+            if ( bunchInit.distribution_ == "uniform" )
+              zmin = std::min(   Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin );
+            else if ( bunchInit.distribution_ == "gaussian" )
+              zmin = std::min(  (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin );
+            else
+              {
+                //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
+                exit(1);
+              }
+          }
+
+        if ( bunchInit.distribution_ == "uniform" )
+          for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++)
+            {
+              t0  = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) );
+              t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
+
+              zmin = std::min(   t0 , zmin );
+            }
+
+        //zmin = zmin + bunchInit.position_[ia][2];
+        zmin = zmin + bunchInit.position_[2];
+
+        /* Obtain the average number of electrons per FEL beamlet.					*/
+        Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] );
+
+        /* Set the bunching factor level for the shot noise depending on the given values.		*/
+        bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_;
+
+        //printmessage(std::string(__FILE__), __LINE__, std::string("The standard deviation of the bunching factor for the shot noise implementation is set to ") + stringify(bF) );
+      }
+
+    /* Determine the properties of each charge point and add them to the charge vector.               	*/
+    for (i = rank; i < Np / ng; i += size)
+      {
+        /* Determine the transverse coordinate.								*/
+        r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
+        r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
+
+        /* Determine the longitudinal coordinate.							*/
+        if ( bunchInit.distribution_ == "uniform" )
+          r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2];
+        else if ( bunchInit.distribution_ == "gaussian" )
+          r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
+        else
+          {
+            //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
+            exit(1);
+          }
+
+        /* Determine the transverse momentum.								*/
+        t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
+        t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
+        t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
+
+        if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
+          {
+            /* Shift the generated charge to the center position and momentum space.			*/
+            //charge.rnp    = bunchInit.position_[ia];
+            charge.rnp    = bunchInit.position_;
+            charge.rnp   += r;
+
+            charge.gb   = gb;
+            charge.gb  += t;
+            //std::cout << gb << "\n";
+            if(std::isinf(gb[2])){
+                std::cerr << "it klonked here\n";
+            }
+
+            /* Insert this charge and the mirrored ones into the charge vector.				*/
+            insertCharge(charge);
+          }
+      }
+
+    /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the
+     * CSE from the tail of the bunch.									*/
+    if ( bunchInit.distribution_ == "uniform" ){
+      for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size)
+        {
+            
+          r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
+          r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
+
+          /* Determine the longitudinal coordinate.							*/
+          r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
+          r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
+
+          /* Determine the transverse momentum.								*/
+          t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
+          t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
+          t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
+          //std::cerr << "DOING UNIFORM tapering!!!\n";
+          if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
+            {
+                //std::cerr << "ACTUALLY DOING UNIFORM tapering!!!\n";
+              /* Shift the generated charge to the center position and momentum space.			*/
+              charge.rnp   = bunchInit.position_[ia];
+              charge.rnp  += r;
+
+              charge.gb  = gb;
+              
+              charge.gb += t;
+              //std::cout << gb[0] << "\n";
+              //if(std::isinf(gb.squaredNorm())){
+              //    std::cerr << "it klonked here\n";
+              //}
+              /* Insert this charge and the mirrored ones into the charge vector.			*/
+              insertCharge(charge);
+            }
+        }
+    }
+
+    /* Reset the value for the number of particle variable according to the installed number of
+     * macro-particles and perform the corresponding changes.                                         	*/
+    bunchInit.numberOfParticles_ = chargeVector.size();
+}
+
+template<typename Double>
+void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma){
+    Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma);
+    Double zmaxL = -1.0e100, zmaxG;
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
+      {
+        Double g  	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        if(std::isinf(g)){
+            std::cerr << __FILE__  << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n";
+            abort();
+        }
+        Double bz 	= iterQ->gb[2] / g;
+        iterQ->rnp[2]  *= frame_gamma;
+        
+        iterQ->gb[2] 	= frame_gamma * g * ( bz - frame_beta );
+        
+        zmaxL 		= std::max( zmaxL , iterQ->rnp[2] );
+      }
+    zmaxG = zmaxL;
+    struct {
+       Double zu_;
+       Double beta_;
+    } bunch_;
+    bunch_.zu_ 		= zmaxG;
+    bunch_.beta_ 	= frame_beta;
+
+    /****************************************************************************************************/
+
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
+      {
+        Double g	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        iterQ->rnp[0]  += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
+        iterQ->rnp[1]  += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
+        iterQ->rnp[2]  += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
+        if(std::isnan(iterQ->rnp[2])){
+            std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_  << ", " <<  frame_beta << "\n";
+            std::cerr << __FILE__  << ": " << __LINE__ << "   OOOOOF\n";
+            abort();
+        }
+      }
+}
+
+
+
+
+template<typename bunch_type, typename scalar>
+size_t initialize_bunch_mithra(
+    bunch_type& bunch,
+    const BunchInitialize<scalar>& bunchInit,
+    scalar frame_gamma){
+
+    ChargeVector<scalar> oof;
+    initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0);
+    for(auto& c : oof){
+        if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2]))
+            std::cout << "Pos before boost: " << c.rnp << "\n";
+        if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2]))
+            std::cout << "Pos before boost: " << c.rnp << "\n";
+    }
+    boost_bunch(oof, frame_gamma);
+    for(auto& c : oof){
+         if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){
+            std::cout << "Pos after boost: " << c.rnp << "\n";
+            break;
+        }
+    }
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> positions("", oof.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> gammabetas("", oof.size());
+    auto iterQ = oof.begin();
+    for (size_t i = 0; i < oof.size(); i++) {
+        assert_isreal(iterQ->gb[0]);
+        assert_isreal(iterQ->gb[1]);
+        assert_isreal(iterQ->gb[2]);
+        assert(iterQ->gb[2] != 0.0f);
+        scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        assert_isreal(g);
+        scalar bz = iterQ->gb[2] / g;
+        assert_isreal(bz);
+        (void)bz;
+        positions(i) = iterQ->rnp;
+        gammabetas(i) = iterQ->gb;
+        ++iterQ;
+    }
+    if(oof.size() > bunch.getLocalNum()){
+        bunch.create(oof.size() - bunch.getLocalNum());
+    }
+    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions ("", oof.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*> dgammabetas("", oof.size());
+    
+    Kokkos::deep_copy(dpositions, positions);
+    Kokkos::deep_copy(dgammabetas, gammabetas);
+    Kokkos::deep_copy(bunch.R_nm1.getView(), positions);
+    Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas);
+    auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();;
+    Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){
+        rview(i) = dpositions(i);
+        rm1view(i) = dpositions(i);
+        gbview(i) = dgammabetas(i);
+    });
+    Kokkos::fence();
+    
+    return oof.size();
+}
+
+
+
+
+
+
+
+//END LORENTZ FRAME AND UNDULATOR AND BUNCH
+
+
+
+
+
+//PREAMBLE
+
+template<typename scalar1, typename... scalar>
+    requires((std::is_floating_point_v<scalar1>))
+KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
+    uint32_t dim = sizeof...(scalar);
+    ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
+    for(unsigned d = 0;d < dim;d++){
+        vec[d] = vec[d] * vec[d];
+    }
+    #ifndef __CUDA_ARCH__
+    using std::exp;
+    #endif
+    return exp(-(vec.sum()) / (stddev * stddev)); 
+}
+
+
+
+
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto first(){
+    return a;
+}
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto second(){
+    return b;
+}
+
+template <typename T>
+KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a,
+                                                     const ippl::Vector<T, 3>& b) {
+    ippl::Vector<T, 3> ret{0.0, 0.0, 0.0};
+    ret[0] = a[1] * b[2] - a[2] * b[1];
+    ret[1] = a[2] * b[0] - a[0] * b[2];
+    ret[2] = a[0] * b[1] - a[1] * b[0];
+    return ret;
+}
+template<typename T>
+KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos){
+    //return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5}, ippl::Vector<T, 3>{0,0,0}};
+    //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
+    //pos -= spacing * T(0.5);
+    ippl::Vector<T, 3> relpos = pos - origin;
+    ippl::Vector<T, 3> gridpos = relpos / hr;
+    ippl::Vector<int, 3> ipos;
+    ipos = gridpos.template cast<int>();
+    ippl::Vector<T, 3> fracpos;
+    for(unsigned k = 0;k < 3;k++){
+        fracpos[k] = gridpos[k] - (int)ipos[k];
+    }
+    //TODO: NGHOST!!!!!!!
+    ipos += ippl::Vector<int, 3>(1);
+    ret.first = ipos;
+    ret.second = fracpos;
+    return ret;
+}
+template<typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+    ipos -= ldom.first();
+    //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
+    if(
+        ipos[0] < 0
+        ||ipos[1] < 0
+        ||ipos[2] < 0
+        ||size_t(ipos[0]) >= view.extent(0) - 1
+        ||size_t(ipos[1]) >= view.extent(1) - 1
+        ||size_t(ipos[2]) >= view.extent(2) - 1
+        ||fracpos[0] < 0
+        ||fracpos[1] < 0
+        ||fracpos[2] < 0
+    ){
+        return;
+    }
+    assert(fracpos[0] >= 0.0f);
+    assert(fracpos[0] <= 1.0f);
+    assert(fracpos[1] >= 0.0f);
+    assert(fracpos[1] <= 1.0f);
+    assert(fracpos[2] >= 0.0f);
+    assert(fracpos[2] <= 1.0f);
+    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+    assert(one_minus_fracpos[0] >= 0.0f);
+    assert(one_minus_fracpos[0] <= 1.0f);
+    assert(one_minus_fracpos[1] >= 0.0f);
+    assert(one_minus_fracpos[1] <= 1.0f);
+    assert(one_minus_fracpos[2] >= 0.0f);
+    assert(one_minus_fracpos[2] <= 1.0f);
+    scalar accum = 0;
+    
+    for(unsigned i = 0;i < 8;i++){
+        scalar weight = 1;
+        ippl::Vector<int, 3> ipos_l = ipos;
+        for(unsigned d = 0;d < 3;d++){
+            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+            ipos_l[d] += !!(i & (1 << d));
+        }
+        assert_isreal(value);
+        assert_isreal(weight);
+        accum += weight;
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
+    }
+    assert(abs(accum - 1.0f) < 1e-6f);
+}
+template<typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
+    //std::cout << "Value: " << value << "\n";
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+    ipos -= ldom.first();
+    if(
+        ipos[0] < 0
+        ||ipos[1] < 0
+        ||ipos[2] < 0
+        ||size_t(ipos[0]) >= view.extent(0) - 1
+        ||size_t(ipos[1]) >= view.extent(1) - 1
+        ||size_t(ipos[2]) >= view.extent(2) - 1
+        ||fracpos[0] < 0
+        ||fracpos[1] < 0
+        ||fracpos[2] < 0
+    ){
+        //std::cout << "out of bounds\n";
+        return;
+    }
+    assert(fracpos[0] >= 0.0f);
+    assert(fracpos[0] <= 1.0f);
+    assert(fracpos[1] >= 0.0f);
+    assert(fracpos[1] <= 1.0f);
+    assert(fracpos[2] >= 0.0f);
+    assert(fracpos[2] <= 1.0f);
+    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+    assert(one_minus_fracpos[0] >= 0.0f);
+    assert(one_minus_fracpos[0] <= 1.0f);
+    assert(one_minus_fracpos[1] >= 0.0f);
+    assert(one_minus_fracpos[1] <= 1.0f);
+    assert(one_minus_fracpos[2] >= 0.0f);
+    assert(one_minus_fracpos[2] <= 1.0f);
+    scalar accum = 0;
+    
+    for(unsigned i = 0;i < 8;i++){
+        scalar weight = 1;
+        ippl::Vector<int, 3> ipos_l = ipos;
+        for(unsigned d = 0;d < 3;d++){
+            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+            ipos_l[d] += !!(i & (1 << d));
+        }
+        assert_isreal(weight);
+        accum += weight;
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
+    }
+    assert(abs(accum - 1.0f) < 1e-6f);
+}
+
+template<typename view_type, typename scalar>
+KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
+
+    
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid   = gridCoordinatesOf(hr, origin, to  );
+    //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]);
+    for(int d = 0;d < 3;d++){
+        //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
+        //    std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " << from_grid.first << " " << to_grid.first << std::endl;
+        //}
+        //assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
+    }
+    //const uint32_t nghost = g.nghost();
+    //from_ipos += ippl::Vector<int, 3>(nghost);
+    //to_ipos += ippl::Vector<int, 3>(nghost);
+    
+    if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
+        
+        return;
+    }
+    ippl::Vector<scalar, 3> relay;
+    const int nghost = 1;
+    const ippl::Vector<scalar, 3> orig = origin;
+    using Kokkos::max;
+    using Kokkos::min;
+    for (unsigned int i = 0; i < 3; i++) {
+        relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i],
+                       max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i],
+                           scalar(0.5) * (to[i] + from[i])));
+    }
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
+}
+
+// END PREAMBLE
+
+
+//BEGIN ABSORBING BOUNDARY CONDITION
+template<typename _scalar, unsigned _main_axis, unsigned... _side_axes>
+struct second_order_abc_face{
+    using scalar = _scalar;
+    scalar Cweights[5];
+    int sign;
+    constexpr static unsigned main_axis = _main_axis;
+    KOKKOS_FUNCTION second_order_abc_face(ippl::Vector<scalar, 3> hr, scalar dt, int _sign) : sign(_sign){
+        constexpr scalar c = 1;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        static_assert(
+            (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) ||
+            (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) ||
+            (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1)
+        );
+        assert(_main_axis != side_axes[0]);
+        assert(_main_axis != side_axes[1]);
+        assert(side_axes[0] != side_axes[1]);
+        constexpr scalar truncation_order = 2.0;
+        scalar p      = ( 1.0 + 1 * 1 ) / ( 1 + 1 );
+        scalar q      = - 1.0 / ( 1 + 1 );
+
+        scalar d  	 = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt);
+
+        Cweights[0]	= (   1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        Cweights[1]	= ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        assert(abs(Cweights[1] + 1) < 1e-6); //Like literally
+        Cweights[2]  	= (   p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d;
+        Cweights[3]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d;
+        Cweights[4]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d;
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        ippl::Vector<uint32_t, 3> side_axis1_onehot = ippl::Vector<uint32_t, 3>{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2};
+        ippl::Vector<uint32_t, 3> side_axis2_onehot = ippl::Vector<uint32_t, 3>{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2};
+        ippl::Vector<uint32_t, 3> mainaxis_off = ippl::Vector<int32_t, 3>{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast<uint32_t>();
+        return advanceBoundaryS(
+		    A_nm1(i,j,k), A_n(i,j,k),
+		    apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off),
+		    apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off),
+		    apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot),                apply(A_n, c - side_axis1_onehot),
+		    apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 ,
+						 const value_type& v3 , const value_type& v4 , const value_type& v5 ,
+						 const value_type& v6 , const value_type& v7 , const value_type& v8 ,
+						 const value_type& v9 , const value_type& v10, const value_type& v11,
+						 const value_type& v12, const value_type& v13)const noexcept
+    {
+        
+      value_type v0 =
+    	 Cweights[0]  * (v1 + v5) +
+    	(Cweights[1]) * v3 +
+    	(Cweights[2]) * ( v2 + v4 ) +
+    	(Cweights[3]) * ( v6 + v7 + v10 + v11 ) +
+    	(Cweights[4]) * ( v8 + v9 + v12 + v13 );
+      return v0;
+    }
+};
+template<typename _scalar, unsigned edge_axis, unsigned normal_axis1, unsigned normal_axis2, bool na1_zero, bool na2_zero>
+struct second_order_abc_edge{
+    using scalar = _scalar;
+    //
+    scalar Eweights[5];
+    
+    KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector<scalar, 3> hr, scalar dt){
+        static_assert(normal_axis1 != normal_axis2);
+        static_assert(edge_axis != normal_axis2);
+        static_assert(edge_axis != normal_axis1);
+        static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0));
+        constexpr scalar c0_ = scalar(1);
+        scalar d    =    ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt );
+        if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else{
+            assert(false);
+        }
+        
+
+
+        
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        //constexpr unsigned nax[2] = {normal_axis1, normal_axis2};
+        ippl::Vector<int32_t, 3> normal_axis1_onehot = ippl::Vector<int32_t, 3>{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1);
+        ippl::Vector<int32_t, 3> normal_axis2_onehot = ippl::Vector<int32_t, 3>{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1);
+        ippl::Vector<uint32_t, 3> acc0 = {i, j, k};
+        ippl::Vector<uint32_t, 3> acc1 = acc0 + normal_axis1_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc2 = acc0 + normal_axis2_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc3 = acc0 + normal_axis1_onehot.cast<uint32_t>() + normal_axis2_onehot.cast<uint32_t>();
+        //ippl::Vector<uint32_t, 3> axism = (-ippl::Vector<int, 3>{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2};
+        //return A_n(i, j, k);
+        return advanceEdgeS(
+                                        A_n(i, j, k),      A_nm1(i, j, k),
+            apply(A_np1, acc1),   apply(A_n, acc1   ), apply(A_nm1, acc1),
+            apply(A_np1, acc2),   apply(A_n, acc2   ), apply(A_nm1, acc2),
+            apply(A_np1, acc3),   apply(A_n, acc3   ), apply(A_nm1, acc3),
+            apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp),
+            apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceEdgeS 		
+            (              value_type v1 , value_type v2 ,
+                           value_type v3 , value_type v4 , value_type v5 ,
+                           value_type v6 , value_type v7 , value_type v8 ,
+                           value_type v9 , value_type v10, value_type v11,
+                           value_type v12, value_type v13, value_type v14,
+                           value_type v15, value_type v16, value_type v17,
+                           value_type v18, value_type v19)const noexcept{
+    value_type v0 =
+    Eweights[0] * (v3 + v8) +
+    Eweights[1] * (v5 + v6) +
+    Eweights[2] * (v2 + v9) +
+    Eweights[3] * (v1 + v4 + v7 + v10) +
+    Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11;
+    return v0;
+  }
+};
+template<typename _scalar, bool x0, bool y0, bool z0>
+struct second_order_abc_corner{
+    using scalar = _scalar;
+    scalar Cweights[17];
+    KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector<scalar, 3> hr, scalar dt){
+        constexpr scalar c0_ = scalar(1);
+        Cweights[0]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[1]  =   (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[2]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[3]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[4]  =   (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[5]  =   (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[6]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[7]  =   (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[8]  = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[9]  = - (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[12] = - (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[13] = - (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[15] = - (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt);
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        //First implementation: 0,0,0 corner
+        constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1);
+        constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1);
+        constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1);
+        using ippl::apply;
+        const ippl::Vector<uint32_t, 3> offsets[8] = {
+            ippl::Vector<uint32_t, 3>{0,0,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,0},
+            ippl::Vector<uint32_t, 3>{0,yoff,0},
+            ippl::Vector<uint32_t, 3>{0,0,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,zoff},
+            ippl::Vector<uint32_t, 3>{0,yoff,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,zoff},
+        };
+        return advanceCornerS(
+                                          apply(A_n, c), apply(A_nm1, c),
+            apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]),
+            apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]),
+            apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]),
+            apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]),
+            apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]),
+            apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]),
+            apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7])
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceCornerS         
+                            (       value_type v1 , value_type v2 ,
+                             value_type v3 , value_type v4 , value_type v5 ,
+                             value_type v6 , value_type v7 , value_type v8 ,
+                             value_type v9 , value_type v10, value_type v11,
+                             value_type v12, value_type v13, value_type v14,
+                             value_type v15, value_type v16, value_type v17,
+                             value_type v18, value_type v19, value_type v20,
+                             value_type v21, value_type v22, value_type v23)const noexcept{
+    return      - ( v1  * (Cweights[16]) + v2  * (Cweights[8]) +
+    v3  * Cweights[1] + v4  * Cweights[16] + v5  * Cweights[9] +
+    v6  * Cweights[2] + v7  * Cweights[16] + v8  * Cweights[10] +
+    v9  * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] +
+    v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] +
+    v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] +
+    v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] +
+    v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0];
+  }
+};
+
+
+
+
+
+
+struct second_order_mur_boundary_conditions{
+    template<typename field_type, typename dt_type>
+    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr, ippl::NDIndex<3> lDom){
+        using scalar = decltype(dt);
+        //TODO: tbh don't know
+        //const unsigned nghost = 1;
+        const ippl::Vector<scalar, 3> hr = FA_n.get_mesh().getMeshSpacing();
+        //assert_isreal((betaMur[0]));
+        //assert_isreal((betaMur[1]));
+        //assert_isreal((betaMur[2]));
+        auto A_n   = FA_n.getView();
+        auto A_np1 = FA_np1.getView();
+        auto A_nm1 = FA_nm1.getView();
+        ippl::Vector<uint32_t, 3> local_nr{
+            uint32_t(A_n.extent(0)),
+            uint32_t(A_n.extent(1)),
+            uint32_t(A_n.extent(2))
+        };
+        constexpr uint32_t min_abc_boundary = 1;
+        constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1;
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+            
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+
+            if(Kokkos::popcount(val) == 1){
+                if(ig == min_abc_boundary){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(jg == min_abc_boundary){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(kg == min_abc_boundary){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(ig == true_nr[0] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+            }
+        });
+        Kokkos::fence();
+        //FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 2)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+            if(Kokkos::popcount(val) == 2){ //Edge
+                if(ig == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+        //FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+            //             + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            //if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+            
+            if(Kokkos::popcount(val) == 3){
+                //printf("Corner: %d, %d, %d\n", i, j, k);
+                if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+    }
+};
+//END ABSORBING BOUNDARY CONDITION
+
+
+
+
+
+
+
+
+
+namespace ippl {
+    
+    template<typename scalar>
+    struct undulator_parameters{
+        scalar lambda; //MITHRA: lambda_u
+        scalar K; //Undulator parameter
+        scalar length;
+        scalar B_magnitude;
+        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u);
+            //std::cout << "Setting bmag: " << B_magnitude << "\n";
+        }
+        undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda);
+        }
+    };
+
+    template<typename scalar>
+    struct nondispersive{
+        scalar a1;
+        scalar a2;
+        scalar a4;
+        scalar a6;
+        scalar a8;
+    };
+    template <typename _scalar, class PLayout>
+    struct  Bunch : public ippl::ParticleBase<PLayout> {
+        using scalar = _scalar;
+
+        // Constructor for the Bunch class, taking a PLayout reference
+        Bunch(PLayout& playout)
+            : ippl::ParticleBase<PLayout>(playout) {
+            // Add attributes to the particle bunch
+            this->addAttribute(Q);          // Charge attribute
+            this->addAttribute(mass);       // Mass attribute
+            this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(R_np1);      // Position attribute for the next time step
+            this->addAttribute(R_nm1);      // Position attribute for the next time step
+            this->addAttribute(EB_gather);   // Electric field attribute for particle gathering
+        }
+
+        // Destructor for the Bunch class
+        ~Bunch() {}
+
+        // Define container types for various attributes
+        using charge_container_type   = ippl::ParticleAttrib<scalar>;
+        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector4_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+
+        // Declare instances of the attribute containers
+        charge_container_type Q;          // Charge container
+        charge_container_type mass;       // Mass container
+        velocity_container_type gamma_beta; // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type R_np1; // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type R_nm1; // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>> EB_gather;   // Electric field container for particle gathering
+
+    };
+
+
+    template <typename scalar>
+    // clang-format off
+    struct FDTDFieldState{
+        
+        //Sorry, can't do more than 3d
+
+        constexpr static unsigned int dim = 3;
+        using Vector_t = ippl::Vector<scalar, 3>;
+        using value_type = ippl::Vector<scalar, 4>;
+        using EB_type = ippl::Vector<ippl::Vector<scalar, 3>, 2>;
+        using Mesh_t               = ippl::UniformCartesian<scalar, dim>;
+
+        bool periodic_bc;
+        using FourField = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using ThreeField = ippl::Field<Vector_t, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using VField_t = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using EBField_t = ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using view_type = VField_t::view_type;
+        using ev_view_type = EBField_t::view_type;
+        //Fields
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_np1;
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_n;
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_nm1;
+        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> J;
+        ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> EB;
+        
+        //Discretization options
+        Vector_t hr_m;
+        ippl::Vector<uint32_t, 3> nr_global;
+        ippl::Vector<uint32_t, 3> nr_local;
+        scalar dt;
+        Mesh_t* mesh_mp;
+        FieldLayout<dim>* layout_mp;
+        using playout_type = ParticleSpatialLayout<scalar, 3>;
+        playout_type playout;
+        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
+        using bunch_type =  Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+        config m_config;
+
+        /**
+         * @brief Construct a new FDTDFieldState object
+         * Mesh and resolution parameter are technically redundant
+         * 
+         * @param resolution 
+         * @param layout 
+         * @param mesch 
+         */
+        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){
+            FA_np1.initialize(mesch, layout, 1);
+            FA_n.initialize(mesch, layout, 1);
+            FA_nm1.initialize(mesch, layout, 1);
+            J.initialize(mesch, layout, 1);
+            EB.initialize(mesch, layout, 1);
+            ThreeField Edummy;
+            Edummy.initialize(mesch, layout, 1);
+            FA_n   = value_type(0);
+            FA_np1 = value_type(0);
+            FA_nm1 = value_type(0);
+            ippl::StandardFDTDSolver<ThreeField, FourField> solver(J, Edummy, Edummy);
+            hr_m = mesch.getMeshSpacing();
+            nr_global = ippl::Vector<uint32_t, 3>{
+                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
+                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
+                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)
+            };
+            nr_local = ippl::Vector<uint32_t, 3>{
+                uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1),
+                uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1),
+                uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1)
+            };
+            //std::cout << "NR_M_g: " << nr_global << "\n";
+            //std::cout << "NR_M_l: " << nr_local << "\n";
+            dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2]));
+            particles.create(nparticles);
+            setNoBoundaryConditions();
+            
+        }
+        void setNoBoundaryConditions() {
+            periodic_bc = false;
+            typename VField_t::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::NoBcFace<VField_t>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<dim * 2>{});
+            FA_n  .setFieldBC(vector_bcs);
+            FA_np1.setFieldBC(vector_bcs);
+            FA_nm1.setFieldBC(vector_bcs);
+        }
+        void setPeriodicBoundaryConditions() {
+            periodic_bc = true;
+            typename VField_t::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<VField_t>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<dim * 2>{});
+            FA_n  .setFieldBC(vector_bcs);
+            FA_np1.setFieldBC(vector_bcs);
+            FA_nm1.setFieldBC(vector_bcs);
+        }
+        
+        void scatterBunch(){
+            //ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
+            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
+            assert_isreal(volume);
+            assert_isreal((scalar(1) / volume));
+            auto Jview = J.getView();
+            auto qview = particles.Q.getView();
+            auto rview = particles.R.getView();
+            auto rm1view = particles.R_nm1.getView();
+            auto orig = mesh_mp->getOrigin();
+            auto hr = mesh_mp->getMeshSpacing();
+            auto dt = this->dt;
+            bool space_charge = m_config.space_charge;
+            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
+            Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                Vector_t pos = rview(i);
+                Vector_t to = rview(i);
+                Vector_t from = rm1view(i);
+                if(space_charge){
+                    scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
+                }
+                scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
+            });
+            Kokkos::fence();
+            J.accumulateHalo();
+        }
+
+        void fieldStep(){
+            const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1])));
+            nondispersive<scalar> ndisp{
+                .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])),
+                .a2 = sq(dt / hr_m[0]),
+                .a4 = sq(dt / hr_m[1]),
+                .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0])  - 2 * calA * sq(dt / hr_m[1]),
+                .a8 = sq(dt)
+            };
+            //if(periodic_bc){
+            //    FA_n.getFieldBC().apply(FA_n);
+            //}
+            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
+            auto source = this->J.getView();
+            //FA_nm1.fillHalo();
+            FA_n.fillHalo();
+            ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
+            const auto& ldom = layout_mp->getLocalNDIndex();
+            Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+                uint32_t ig = i + ldom.first()[0];
+                uint32_t jg = j + ldom.first()[1];
+                uint32_t kg = k + ldom.first()[2];
+                uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2)
+                             + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5);
+                if(!val){
+                    A_np1(i, j, k) =    -A_nm1(i,j,k)
+                            + ndisp.a1 * A_n  (i,j,k)
+                            + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1))
+                            + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1))
+                            + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1))
+                            + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1))
+                            + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k);
+                }
+            });
+            Kokkos::fence();
+            
+            if(!periodic_bc){
+                FA_np1.fillHalo();
+                second_order_mur_boundary_conditions bc;
+                
+                
+                bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom);
+            }
+            Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView());
+            Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView());
+            //std::swap(this->A_n, this->A_nm1);
+            //std::swap(this->A_np1, this->A_n);
+            
+            evaluate_EB();
+        }
+        void evaluate_EB(){
+            FA_n.fillHalo();//FA_nm1.fillHalo();
+            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
+            const scalar idt = scalar(1.0) / dt;
+            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
+            auto source = this->J.getView();
+            auto EBv = this->EB.getView();
+            Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                ippl::Vector<scalar, 3> grad_phi{
+                    dAdx[0], dAdy[0], dAdz[0]
+                };
+                ippl::Vector<scalar, 3> curlA{
+                    dAdy[3] - dAdz[2],
+                    dAdz[1] - dAdx[3],
+                    dAdx[2] - dAdy[1],
+                };
+                EBv(i,j,k)[0] = -dAdt - grad_phi;
+                EBv(i,j,k)[1] = curlA;
+            });
+            Kokkos::fence();
+        }
+        template<typename callable>
+        void updateBunch(scalar time, UniaxialLorentzframe<scalar> ulb, callable undulator_field){
+            
+            Kokkos::fence();
+            auto gbview = particles.gamma_beta.getView();
+            auto ebview = particles.EB_gather.getView();
+            auto qview = particles.Q.getView();
+            auto mview = particles.mass.getView();
+            auto rview = particles.R.getView();
+            auto rm1view = particles.R_nm1.getView();
+            auto rp1view = particles.R_np1.getView();
+            scalar bunch_dt = dt / 3;
+            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
+            EB.fillHalo();
+            Kokkos::fence();
+            for(int bts = 0;bts < 3;bts++){
+                
+                particles.EB_gather.gather(EB, particles.R);
+                Kokkos::fence();
+                Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                    const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
+                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB = ebview(i);
+                    ippl::Vector<scalar, 3> labpos = rview(i);
+
+                    ulb.primedToUnprimed(labpos, time);
+
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_frame = undulator_field(labpos);
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame);
+                    assert_isreal((EB_undulator_bunch.first[0]));
+                    assert_isreal((EB_undulator_bunch.first[1]));
+                    assert_isreal((EB_undulator_bunch.first[2]));
+                    assert_isreal((EB_undulator_bunch.second[0]));
+                    assert_isreal((EB_undulator_bunch.second[1]));
+                    assert_isreal((EB_undulator_bunch.second[2]));
+                    EB[0] += EB_undulator_bunch.first;
+                    EB[1] += EB_undulator_bunch.second;
+
+                    const scalar charge = qview(i);
+                    const scalar mass = mview(i);
+                    const ippl::Vector<scalar, 3> t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+                    const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
+                    const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
+                    const ippl::Vector<scalar, 3> t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0  + alpha * alpha * (EB[1].dot(EB[1])))));
+                    const ippl::Vector<scalar, 3> ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+
+                    assert_isreal((ngammabeta[0]));
+                    assert_isreal((ngammabeta[1]));
+                    assert_isreal((ngammabeta[2]));
+                    rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
+                    gbview(i) = ngammabeta;
+                });
+                Kokkos::fence();
+            }
+            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
+            size_t invalid_count = 0;
+            auto origo = mesh_mp->getOrigin();
+            ippl::Vector<scalar, 3> extenz;//
+            extenz[0] = nr_global[0] * hr_m[0];
+            extenz[1] = nr_global[1] * hr_m[1];
+            extenz[2] = nr_global[2] * hr_m[2];
+            Kokkos::parallel_reduce(
+                Kokkos::RangePolicy<typename playout_type::RegionLayout_t::view_type::execution_space>(0, particles.getLocalNum()),
+                KOKKOS_LAMBDA(size_t i, size_t& ref){
+                    bool out_of_bounds = false;
+                    ippl::Vector<scalar, dim> ppos = rview(i);
+                    for(size_t d = 0;d < dim;d++){
+                        out_of_bounds |= (ppos[d] <= origo[d]);
+                        out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain
+                    }
+                    invalid(i) = out_of_bounds;
+                    ref += out_of_bounds;
+                }, 
+                invalid_count);
+            particles.destroy(invalid, invalid_count);
+            Kokkos::fence();
+            
+        }
+    };
+    // clang-format on
+}  // namespace ippl
+bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
+    const int channels = 3; // RGB
+    const int stride = width * channels;
+    std::vector<unsigned char> flippedData(data, data + stride * height);
+
+    // Use stb_image_write to write the BMP image to the file descriptor
+    if (!stbi_write_bmp_to_func(
+            [](void* context, void* data, int size) {
+                FILE* f = reinterpret_cast<FILE*>(context);
+                fwrite(data, 1, size, f);
+            },
+            fd, width, height, channels, flippedData.data())) {
+        return false;
+    }
+
+    return true;
+}
+template<typename View, typename T, unsigned Dim>
+KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector<T, Dim>& pos, const ippl::Vector<T, 3>& origin, const ippl::Vector<T, 3>& hr, const ippl::NDIndex<3>& lDom){
+    using vector_type = ippl::Vector<T, 3>;
+
+    vector_type l;
+    //vector_type origin = v.get_mesh().getOrigin();
+    //auto lDom = v.getLayout().getLocalNDIndex();
+    //vector_type hr = v.get_mesh().getMeshSpacing();
+    for(unsigned k = 0;k < 3;k++){
+        l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong
+    }                     
+
+    ippl::Vector<int, 3> index{int(l[0]), int(l[1]), int(l[2])};
+    ippl::Vector<T, 3> whi = l - index;
+    ippl::Vector<T, 3> wlo(1.0);
+    wlo -= whi;
+    //TODO: nghost
+    ippl::Vector<size_t, 3> args = index - lDom.first() + 1;
+    for(unsigned k = 0;k < 3;k++){
+        if(args[k] >= v.extent(k) || args[k] == 0){
+            return typename View::value_type(0);
+        }
+    }
+    //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n";
+    return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/;
+
+}
+template<typename scalar>
+scalar test_gauss_law(uint32_t n){
+
+    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
+    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths};
+    std::array<bool, 3> isParallel;
+    isParallel.fill(false);
+    isParallel[2] = true;
+        
+        // all parallel layout, standard domain, normal axis order
+    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        //[-1, 1] box
+    ippl::Vector<scalar, 3> hx     = extents / nr.cast<scalar>();
+    using vector_type = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
+    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+
+    uint32_t pcount = 1 << 20;
+    config cfg{};
+    cfg.space_charge = true;
+    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, cfg);
+    field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
+    auto pview = field_state.particles.R.getView();
+    auto p1view = field_state.particles.R_nm1.getView();
+
+    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
+    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
+    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
+    
+    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
+        //bunch.gammaBeta[i].fill(scalar(0));
+        auto state = random_pool.get_state();
+        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        p1view(i) = pview(i);
+        random_pool.free_state(state);
+    });
+    Kokkos::fence();
+    field_state.J = scalar(0.0);
+    field_state.scatterBunch();
+    for(size_t i = 0;i < 8*n;i++){
+        field_state.fieldStep();
+    }
+    field_state.evaluate_EB();
+    Kokkos::fence();
+    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
+    
+    std::ofstream line("gauss_line.txt");
+    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
+    for(unsigned i = 1;i < nr[2];i++){
+        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
+        pos[2] += hx[2] * scalar(i);
+        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
+        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
+    }
+    return 0.0f;
+}
+template<typename scalar>
+scalar test_amperes_law(uint32_t n){
+
+    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
+    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths};
+    std::array<bool, 3> isParallel;
+    isParallel.fill(false);
+    isParallel[2] = true;
+        
+        // all parallel layout, standard domain, normal axis order
+    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        //[-1, 1] box
+    ippl::Vector<scalar, 3> hx;
+    for(unsigned d = 0;d < 3;d++){
+        hx[d] = extents[d] / (scalar)nr[d];
+    }
+    using vector_type = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
+    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+
+    uint32_t pcount = 1 << 20;
+    config cfg{};cfg.space_charge = true;
+    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, cfg);
+    field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
+    auto pview = field_state.particles.R.getView();
+    auto p1view = field_state.particles.R_nm1.getView();
+    constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
+    scalar timestep = field_state.dt;
+    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
+    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
+    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
+    
+    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
+        //bunch.gammaBeta[i].fill(scalar(0));
+        auto state = random_pool.get_state();
+        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+        pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1);
+        p1view(i) = pview(i);
+        p1view(i)[1] -= vy * timestep;
+        random_pool.free_state(state);
+    });
+    Kokkos::fence();
+    field_state.J = scalar(0.0);
+    field_state.scatterBunch();
+    for(size_t i = 0;i < 8*n;i++){
+        field_state.fieldStep();
+    }
+    field_state.evaluate_EB();
+    Kokkos::fence();
+    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
+    
+    std::ofstream line("ampere_line.txt");
+    
+    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
+    for(unsigned i = 1;i < nr[2];i++){
+        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
+        pos[2] += hx[2] * scalar(i);
+        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
+        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n";
+    }
+    return 0.0f;
+}
+int main(int argc, char* argv[]) {
+    using scalar = double;
+    ippl::initialize(argc, argv);
+    {
+        
+        test_gauss_law<scalar>(64);
+        test_amperes_law<scalar>(64);
+        goto exit;
+        config cfg = read_config("../config.json");
+        const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
+        cfg.extents[2] *= frame_gamma;
+        cfg.total_time /= frame_gamma;
+        
+        const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma));
+        const scalar frame_gammabeta = frame_gamma * frame_beta;
+        UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
+        ippl::undulator_parameters<scalar> uparams(cfg);
+        const scalar k_u  = scalar(2.0 * M_PI) / uparams.lambda;
+        const scalar distance_to_entry  = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma);
+        auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector<scalar, 3>& position_in_lab_frame){
+            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
+            ret.first.fill(0);
+            ret.second.fill(0);
+
+            if(position_in_lab_frame[2] < distance_to_entry){
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator < 0);
+                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5));
+                ret.second[0] = 0;
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal;
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal;
+            }
+            else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator >= 0);
+                ret.second[0] = 0;
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator);
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator);
+            }
+            return ret;
+
+        };
+        BunchInitialize<scalar> mithra_config = generate_mithra_config(cfg, frame_boost);
+        ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
+
+        std::array<bool, 3> isParallel;
+        isParallel.fill(false);
+        isParallel[2] = true;
+        
+        // all parallel layout, standard domain, normal axis order
+        ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        //[-1, 1] box
+        ippl::Vector<scalar, 3> hx     = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])};
+        ippl::Vector<scalar, 3> origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)};
+        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+        std::cout << "hx: " << hx << "\n";
+        std::cout << "origin: " << origin << "\n";
+        std::cout << "extents: " << cfg.extents << std::endl;
+        if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){
+            std::cerr << "Dispersion relation not satisfiable\n";
+            abort();
+        }
+
+        ippl::FDTDFieldState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
+        
+        if(ippl::Comm->rank() == 0){
+            std::cout << "Init particles: " << std::endl;
+            size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma);
+            fdtd_state.particles.Q = cfg.charge / actual_pc;
+            fdtd_state.particles.mass = cfg.mass / actual_pc;
+        }
+        else{
+            fdtd_state.particles.create(0);
+        }
+        {
+            auto rview = fdtd_state.particles.R.getView();
+            auto rm1view = fdtd_state.particles.R_nm1.getView();
+            ippl::Vector<scalar, 3> meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum());
+    
+            Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                rview(i) -= meanpos;
+                rm1view(i) -= meanpos;
+            });
+        }
+        fdtd_state.particles.setParticleBC(ippl::NO);
+        //fdtd_state.scatterBunch();
+        //std::cout << cfg.charge << "\n";
+        
+        size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
+        uint64_t starttime =  nanoTime();
+        std::ofstream rad;
+        FILE* ffmpeg_file = nullptr;
+        if(ippl::Comm->rank() == 0){
+            rad = std::ofstream("radiation.txt");
+            const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv";
+            if(cfg.output_rhythm != 0){
+                ffmpeg_file = popen(ffmpegCmd, "w");
+            }
+        }
+
+
+        for(size_t i = 0;i < timesteps_required;i++){
+            fdtd_state.J = scalar(0.0);
+            fdtd_state.playout.update(fdtd_state.particles);
+            fdtd_state.scatterBunch();
+            //std::cout << fdtd_state.J.getVolumeIntegral() << "\n";
+            fdtd_state.fieldStep();
+            fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
+            auto ldom = layout.getLocalNDIndex();
+            auto nrg = fdtd_state.nr_global;
+            auto ebv = fdtd_state.EB.getView();
+            double radiation = 0.0;
+            Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
+                //uint32_t ig = i + ldom.first()[0];
+                //uint32_t jg = j + ldom.first()[1];
+                Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]};
+                ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
+                ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
+                uint32_t kg = k + ldom.first()[2];
+                if(kg == nrg[2] - 3){
+                    ref += Elab.cross(Blab)[2];
+                }
+
+            }, radiation);
+            double radiation_in_watt_on_this_rank = radiation *
+            double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) *
+            fdtd_state.hr_m[0] * fdtd_state.hr_m[1];
+            double radiation_in_watt_global = 0.0;
+            MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
+            if(ippl::Comm->rank() == 0){
+                ippl::Vector<scalar, 3> pos{0,0,0};
+                frame_boost.primedToUnprimed(pos, fdtd_state.dt * i);
+                rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
+            }
+            //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n";
+            //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n";
+            int rank = ippl::Comm->rank();
+            int size = ippl::Comm->size();
+            if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
+
+                
+                int img_height = 400;
+                int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]);
+                float* imagedata = new float[img_width * img_height * 3];
+                std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
+                float* idata_recvbuffer = new float[img_width * img_height * 3];
+                int floatcount = img_width * img_height * 3;
+                uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
+                std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float));
+                auto phmirror = fdtd_state.particles.R.getHostMirror();
+                Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView());
+                for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){
+                    ippl::Vector<scalar, 3> ppos = phmirror(hi);
+                    ppos -= mesh.getOrigin();
+                    ppos /= cfg.extents.cast<scalar>();
+                    int x_imgcoord = ppos[2] * img_width;
+                    int y_imgcoord = ppos[0] * img_height;
+                    //printf("%d, %d\n", x_imgcoord, y_imgcoord);
+                    if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){
+                        const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles);
+                        //std::cout << intensity << "\n";
+                        imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = 
+                        std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity);
+                    }
+                };
+                auto ebh = fdtd_state.EB.getHostMirror();
+                Kokkos::deep_copy(ebh, fdtd_state.EB.getView());
+
+                //double exp_avg = double(exp_sum) / double(acount);
+                {
+                    for(int i = 1;i < img_width;i++){
+                        for(int j = 1;j < img_height;j++){
+                            int i_remap = (double(i) / (img_width  - 1)) * (fdtd_state.nr_global[2] - 4) + 2;
+                            int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2;
+                            if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){
+                                if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){
+                                    ippl::Vector<ippl::Vector<scalar, 3>, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
+                                    
+                                    ippl::Vector<scalar, 3> poynting = acc[0].cross(acc[1]);
+                                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> eblab =  frame_boost.inverse_transform_EB(
+                                        Kokkos::make_pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>(acc[0], acc[1])
+                                    );
+                                    ippl::Vector<scalar, 3> poyntinglab = eblab.first.cross(eblab.second);
+                                    poynting = poyntinglab;
+                                    if(poynting.norm() > 0){
+                                        //std::cout << poynting.norm() << "\n";
+                                    }
+                                    float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f;
+                                    int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f));
+                                    imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                }
+                            }
+                        }
+                    }
+                }
+                int mask = 1;
+                while (mask < size) {
+                    int partner = rank ^ mask;
+                    //if((rank & (mask - 1)) == 0)
+                    {
+                        if ((rank & mask) == 0) {
+                            // Send data to partner
+                            MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE);
+                            // Apply image summation
+                            for(int f = 0;f < floatcount;f++){
+                                imagedata[f] += idata_recvbuffer[f];
+                            }
+                        } else {
+                            MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator());
+                            // Receive data from partner and apply reduction
+                            
+                        }
+                    }
+                    mask <<= 1;  // Move to next bit position for pairing
+                }
+                if(rank == 0){
+                    char output[1024] = {0};
+                    
+                    snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i);
+                    std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
+                    if(ffmpeg_file != nullptr)
+                        writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final);
+                }
+                delete[] imagedata;
+                delete[] idata_recvbuffer;
+                delete[] imagedata_final;
+            }    
+        }
+        uint64_t endtime = nanoTime();
+        std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
+    }
+    exit:
+    ippl::finalize();
+}
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cd2dc1364..d130045c9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -64,7 +64,7 @@ add_ippl_sources (Ippl.cpp)
 
 add_subdirectory (Communicate)
 if (ENABLE_FFT)
-    set (HEFFTE_LIBRARY Heffte::Heffte)
+    set (HEFFTE_LIBRARY Heffte)
     add_subdirectory (FFT)
 else()
     set (HEFFTE_LIBRARY "")
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index f4a1d5b9a..827ed7fef 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -1,11 +1,14 @@
 #ifndef IPPL_FDTD_H
 #define IPPL_FDTD_H
+#include <cstddef>
+using std::size_t;
 #include "Types/Vector.h"
 
 #include "Field/Field.h"
 
 #include "FieldLayout/FieldLayout.h"
 #include "Meshes/UniformCartesian.h"
+#include "MaxwellSolvers/Maxwell.h"
 constexpr double sqrt_4pi = 3.54490770181103205459;
 constexpr double alpha_scaling_factor = 1e30;
 constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor;
@@ -41,11 +44,340 @@ constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length
 constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G;
 constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons;
 
+constexpr double sq(double x){
+    return x * x;
+}
+constexpr float sq(float x){
+    return x * x;
+}
+
 namespace ippl {
-    //template <typename scalar>
-    //struct FDTDFieldState{
-    //    
-    //};
+    enum fdtd_bc{
+        periodic, absorbing
+    };
+
+    template <typename EMField, typename FourField, fdtd_bc boundary_conditions = periodic>
+    class StandardFDTDSolver : Maxwell<EMField, FourField> {
+        public:
+        constexpr static unsigned Dim = EMField::dim;
+        using scalar = typename EMField::value_type::value_type;
+        using Vector_t = Vector<typename EMField::value_type::value_type, Dim>;
+        using FourVector_t = typename FourField::value_type;
+        StandardFDTDSolver(FourField& source, EMField& E, EMField& B) {
+            Maxwell<EMField, FourField>::setSource(source);
+            Maxwell<EMField, FourField>::setEMFields(E, B);
+            initialize();
+        }
+        virtual void solve()override{
+            step();
+            timeShift();
+            evaluate_EB();
+        }
+        void setPeriodicBoundaryConditions() {
+            periodic_bc = true;
+            typename FourField::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<FourField>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<Dim * 2>{});
+            A_n  .setFieldBC(vector_bcs);
+            A_np1.setFieldBC(vector_bcs);
+            A_nm1.setFieldBC(vector_bcs);
+        }
+
+
+        FourField A_n;
+        FourField A_np1;
+        FourField A_nm1;
+        
+        private:
+        void timeShift(){
+
+            //Look into this, maybe cyclic swap is better
+            Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
+            Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
+        }
+
+        void step(){
+            const auto& ldom    = layout_mp->getLocalNDIndex();
+            const int nghost    = A_n.getNghost();
+            const auto aview    = A_n  .getView();
+            const auto anp1view = A_np1.getView();
+            const auto anm1view = A_nm1.getView();
+            const auto source_view = Maxwell<EMField, FourField>::source_mp->getView();
+
+            const scalar a1 = scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2]));
+            const scalar a2 = sq(dt / hr_m[0]);
+            const scalar a4 = sq(dt / hr_m[1]);
+            const scalar a6 = sq(dt / hr_m[2]);
+            const scalar a8 = sq(dt);
+            Vector<uint32_t, Dim> true_nr = nr_m;
+            true_nr += (nghost * 2);
+            A_n.getFieldBC().apply(A_n);
+            A_np1.getFieldBC().apply(A_np1);
+            A_nm1.getFieldBC().apply(A_nm1);
+            Kokkos::parallel_for(
+            "Scalar potential update", ippl::getRangePolicy(aview, nghost),
+            KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
+                // global indices
+                const uint32_t ig = i + ldom.first()[0];
+                const uint32_t jg = j + ldom.first()[1];
+                const uint32_t kg = k + ldom.first()[2];
+                uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
+                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+                
+                if(val == 0){
+                    FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k)
+                                      + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k))
+                                      + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k))
+                                      + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1))
+                                      + a8 * (-source_view(i, j, k));
+                    anp1view(i, j, k) = interior;
+                }
+                else{
+                    //std::cout << i << ", " << j << ", " << k << "\n";
+                }
+            });
+            Kokkos::fence();
+            A_np1.fillHalo();
+            A_np1.getFieldBC().apply(A_np1);
+        }
+        void evaluate_EB(){
+            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
+            const scalar idt = scalar(1.0) / dt;
+            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView();
+            auto source = Maxwell<EMField, FourField>::source_mp->getView();
+            auto Eview  = Maxwell<EMField, FourField>::En_mp->getView();
+            auto Bview  = Maxwell<EMField, FourField>::Bn_mp->getView();
+
+            Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                ippl::Vector<scalar, 3> grad_phi{
+                    dAdx[0], dAdy[0], dAdz[0]
+                };
+                ippl::Vector<scalar, 3> curlA{
+                    dAdy[3] - dAdz[2],
+                    dAdz[1] - dAdx[3],
+                    dAdx[2] - dAdy[1],
+                };
+                Eview(i,j,k) = -dAdt - grad_phi;
+                Bview(i,j,k) = curlA;
+            });
+            Kokkos::fence();
+        }
+
+
+        bool periodic_bc;
+        
+        typename FourField::Mesh_t* mesh_mp;
+        FieldLayout<Dim>* layout_mp;
+        NDIndex<Dim> domain_m;
+        Vector_t hr_m;
+        scalar dt;
+        Vector<int, Dim> nr_m;
+        
+        void initialize() {
+        // get layout and mesh
+            layout_mp = &(this->source_mp->getLayout());
+            mesh_mp   = &(this->source_mp->get_mesh());
+            
+            // get mesh spacing, domain, and mesh size
+            hr_m     = mesh_mp->getMeshSpacing();
+            dt = hr_m[0] / 2;
+            for (unsigned int i = 0; i < Dim; ++i){
+                dt = std::min(dt, hr_m[i] / 2);
+            }
+            domain_m = layout_mp->getDomain();
+            for (unsigned int i = 0; i < Dim; ++i)
+                nr_m[i] = domain_m[i].length();
+
+            // initialize fields
+            A_nm1.initialize(*mesh_mp, *layout_mp);
+            A_n.initialize(*mesh_mp, *layout_mp);
+            A_np1.initialize(*mesh_mp, *layout_mp);
+
+            A_nm1 = 0.0;
+            A_n   = 0.0;
+            A_np1 = 0.0;
+        };
+    };
+    /**
+     * @brief Nonstandard Finite-Difference Time-Domain
+     * 
+     * @tparam EMField 
+     * @tparam FourField 
+     */
+    template <typename EMField, typename FourField, fdtd_bc boundary_conditions = periodic>
+    class NonStandardFDTDSolver : Maxwell<EMField, FourField> {
+        public:
+        constexpr static unsigned Dim = EMField::dim;
+        using scalar = typename EMField::value_type::value_type;
+        using Vector_t = Vector<typename EMField::value_type::value_type, Dim>;
+        using FourVector_t = typename FourField::value_type;
+        NonStandardFDTDSolver(FourField& source, EMField& E, EMField& B) {
+            auto hx = source.get_mesh().getMeshSpacing();
+            if((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1){
+                std::cerr << "Dispersion-free CFL condition not satisfiable\n";
+                std::abort();
+            }
+            Maxwell<EMField, FourField>::setSource(source);
+            Maxwell<EMField, FourField>::setEMFields(E, B);
+            initialize();
+        }
+        virtual void solve()override{
+            step();
+            timeShift();
+            evaluate_EB();
+        }
+        FourField A_n;
+        FourField A_np1;
+        FourField A_nm1;
+        void setPeriodicBoundaryConditions() {
+            periodic_bc = true;
+            typename FourField::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<FourField>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<Dim * 2>{});
+            A_n  .setFieldBC(vector_bcs);
+            A_np1.setFieldBC(vector_bcs);
+            A_nm1.setFieldBC(vector_bcs);
+        }
+
+
+        private:
+        void timeShift(){
+
+            //Look into this, maybe cyclic swap is better
+            Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
+            Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
+        }
+        template<typename scalar>
+        struct nondispersive{
+            scalar a1;
+            scalar a2;
+            scalar a4;
+            scalar a6;
+            scalar a8;
+        };
+        void step(){
+            const auto& ldom    = layout_mp->getLocalNDIndex();
+            const int nghost    = A_n.getNghost();
+            const auto aview    = A_n  .getView();
+            const auto anp1view = A_np1.getView();
+            const auto anm1view = A_nm1.getView();
+            const auto source_view = Maxwell<EMField, FourField>::source_mp->getView();
+
+            const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1])));
+            nondispersive<scalar> ndisp{
+                .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])),
+                .a2 = sq(dt / hr_m[0]),
+                .a4 = sq(dt / hr_m[1]),
+                .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]),
+                .a8 = sq(dt)
+            };
+            Vector<uint32_t, Dim> true_nr = nr_m;
+            true_nr += (nghost * 2);
+            std::cout << hr_m << "\n";
+            std::cout << dt << "\n";
+            A_n.getFieldBC().apply(A_n);
+            A_np1.getFieldBC().apply(A_np1);
+            A_nm1.getFieldBC().apply(A_nm1);
+            Kokkos::parallel_for(ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+                uint32_t ig = i + ldom.first()[0];
+                uint32_t jg = j + ldom.first()[1];
+                uint32_t kg = k + ldom.first()[2];
+                uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
+                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+                if(!val){
+                    anp1view(i, j, k) = -anm1view(i,j,k)
+                            + ndisp.a1 * aview(i,j,k)
+                            + ndisp.a2 * (calA * aview(i + 1, j, k - 1) + (1 - 2 * calA) * aview(i + 1, j, k) + calA * aview(i + 1, j, k + 1))
+                            + ndisp.a2 * (calA * aview(i - 1, j, k - 1) + (1 - 2 * calA) * aview(i - 1, j, k) + calA * aview(i - 1, j, k + 1))
+                            + ndisp.a4 * (calA * aview(i, j + 1, k - 1) + (1 - 2 * calA) * aview(i, j + 1, k) + calA * aview(i, j + 1, k + 1))
+                            + ndisp.a4 * (calA * aview(i, j - 1, k - 1) + (1 - 2 * calA) * aview(i, j - 1, k) + calA * aview(i, j - 1, k + 1))
+                            + ndisp.a6 * aview(i, j, k + 1) + ndisp.a6 * aview(i, j, k - 1) + ndisp.a8 * source_view(i, j, k);
+                }
+            });
+            Kokkos::fence();
+            A_np1.fillHalo();
+            A_np1.getFieldBC().apply(A_np1);
+        }
+        void evaluate_EB(){
+            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
+            const scalar idt = scalar(1.0) / dt;
+            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView();
+            auto source = Maxwell<EMField, FourField>::source_mp->getView();
+            auto Eview  = Maxwell<EMField, FourField>::En_mp->getView();
+            auto Bview  = Maxwell<EMField, FourField>::Bn_mp->getView();
+
+            Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                ippl::Vector<scalar, 3> grad_phi{
+                    dAdx[0], dAdy[0], dAdz[0]
+                };
+                ippl::Vector<scalar, 3> curlA{
+                    dAdy[3] - dAdz[2],
+                    dAdz[1] - dAdx[3],
+                    dAdx[2] - dAdy[1],
+                };
+                Eview(i,j,k) = -dAdt - grad_phi;
+                Bview(i,j,k) = curlA;
+            });
+            Kokkos::fence();
+        }
+
+
+
+        
+        typename FourField::Mesh_t* mesh_mp;
+        FieldLayout<Dim>* layout_mp;
+        NDIndex<Dim> domain_m;
+        Vector_t hr_m;
+        scalar dt;
+        Vector<int, Dim> nr_m;
+        bool periodic_bc;
+        
+        void initialize() {
+        // get layout and mesh
+            layout_mp = &(this->source_mp->getLayout());
+            mesh_mp   = &(this->source_mp->get_mesh());
+            
+            // get mesh spacing, domain, and mesh size
+            hr_m     = mesh_mp->getMeshSpacing();
+            dt = hr_m[2];
+            domain_m = layout_mp->getDomain();
+            for (unsigned int i = 0; i < Dim; ++i)
+                nr_m[i] = domain_m[i].length();
+
+            // initialize fields
+            A_nm1.initialize(*mesh_mp, *layout_mp);
+            A_n.initialize(*mesh_mp, *layout_mp);
+            A_np1.initialize(*mesh_mp, *layout_mp);
+
+            A_nm1 = 0.0;
+            A_n   = 0.0;
+            A_np1 = 0.0;
+        };
+    };
 }
 
 #endif
\ No newline at end of file
diff --git a/src/MaxwellSolvers/Maxwell.h b/src/MaxwellSolvers/Maxwell.h
index c493a1c8d..21e2a2c58 100644
--- a/src/MaxwellSolvers/Maxwell.h
+++ b/src/MaxwellSolvers/Maxwell.h
@@ -15,32 +15,31 @@
 
 namespace ippl {
 
-    template <typename EMField, typename SourceField>
+    template <typename EMField, typename FourField>
     class Maxwell {
     public:
         // typedefs for the different fields and vector fields
-        using typeR = typename SourceField::value_type;
+        using typeR = typename FourField::value_type;
 
         constexpr static unsigned Dim = EMField::dim;
         typedef typename EMField::Mesh_t Mesh_t;
         typedef typename EMField::Centering_t Centering_t;
         typedef Vector<typeR, Dim> Vector_t;
-        typedef Field<Vector_t, Dim, Mesh_t, Centering_t> VectorSourceField_t;
 
         // define type for field layout
         typedef FieldLayout<Dim> FieldLayout_t;
 
         // type for communication buffers
-        using memory_space = typename SourceField::memory_space;
-        using buffer_type  = Communicate::buffer_type<memory_space>;
+        using memory_space = typename FourField::memory_space;
+        //using buffer_type  = Communicate::buffer_type<memory_space>;
 
         /*!
          * Default constructor for Maxwell solvers;
          */
         Maxwell() {}
 
-        Maxwell(SourceField& charge, VectorSourceField_t& current, EMField& E, EMField& B) {
-            setSources(charge, current);
+        Maxwell(FourField& source, EMField& E, EMField& B) {
+            setSource(source);
             setEMFields(E, B);
         }
 
@@ -49,9 +48,8 @@ namespace ippl {
          * @param charge Reference to rho field
          * @param current Reference to J field
          */
-        virtual void setSources(SourceField& charge, VectorSourceField_t& current) {
-            rhoN_mp = &charge;
-            JN_mp   = &current;
+        virtual void setSource(FourField& charge) {
+            source_mp = &charge;
         }
 
         /*!
@@ -73,8 +71,7 @@ namespace ippl {
 
     protected:
         // fields containing reference to charge and current
-        SourceField* rhoN_mp       = nullptr;
-        VectorSourceField_t* JN_mp = nullptr;
+        FourField* source_mp = nullptr;
 
         // E and B fields
         EMField* En_mp = nullptr;
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 433f16ec1..6cf9d4d15 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -1,2437 +1,136 @@
+#include <cstddef>
+using std::size_t;
+#include <Kokkos_Core.hpp>
 #include "Ippl.h"
-
 #include "Types/Vector.h"
-
 #include "Field/Field.h"
-#include <chrono>
-#include <cstdio> // For popen
 #include "MaxwellSolvers/FDTD.h"
-#define JSON_HAS_RANGES 0 //Merlin compilation
-#include <json.hpp>
-#include <fstream>
-#include <list>
-#include <Kokkos_Random.hpp>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
-#include <stb_image_write.hpp>
-constexpr float turbo_cm[256][3] = {
-  {0.18995,0.07176,0.23217},
-  {0.19483,0.08339,0.26149},
-  {0.19956,0.09498,0.29024},
-  {0.20415,0.10652,0.31844},
-  {0.20860,0.11802,0.34607},
-  {0.21291,0.12947,0.37314},
-  {0.21708,0.14087,0.39964},
-  {0.22111,0.15223,0.42558},
-  {0.22500,0.16354,0.45096},
-  {0.22875,0.17481,0.47578},
-  {0.23236,0.18603,0.50004},
-  {0.23582,0.19720,0.52373},
-  {0.23915,0.20833,0.54686},
-  {0.24234,0.21941,0.56942},
-  {0.24539,0.23044,0.59142},
-  {0.24830,0.24143,0.61286},
-  {0.25107,0.25237,0.63374},
-  {0.25369,0.26327,0.65406},
-  {0.25618,0.27412,0.67381},
-  {0.25853,0.28492,0.69300},
-  {0.26074,0.29568,0.71162},
-  {0.26280,0.30639,0.72968},
-  {0.26473,0.31706,0.74718},
-  {0.26652,0.32768,0.76412},
-  {0.26816,0.33825,0.78050},
-  {0.26967,0.34878,0.79631},
-  {0.27103,0.35926,0.81156},
-  {0.27226,0.36970,0.82624},
-  {0.27334,0.38008,0.84037},
-  {0.27429,0.39043,0.85393},
-  {0.27509,0.40072,0.86692},
-  {0.27576,0.41097,0.87936},
-  {0.27628,0.42118,0.89123},
-  {0.27667,0.43134,0.90254},
-  {0.27691,0.44145,0.91328},
-  {0.27701,0.45152,0.92347},
-  {0.27698,0.46153,0.93309},
-  {0.27680,0.47151,0.94214},
-  {0.27648,0.48144,0.95064},
-  {0.27603,0.49132,0.95857},
-  {0.27543,0.50115,0.96594},
-  {0.27469,0.51094,0.97275},
-  {0.27381,0.52069,0.97899},
-  {0.27273,0.53040,0.98461},
-  {0.27106,0.54015,0.98930},
-  {0.26878,0.54995,0.99303},
-  {0.26592,0.55979,0.99583},
-  {0.26252,0.56967,0.99773},
-  {0.25862,0.57958,0.99876},
-  {0.25425,0.58950,0.99896},
-  {0.24946,0.59943,0.99835},
-  {0.24427,0.60937,0.99697},
-  {0.23874,0.61931,0.99485},
-  {0.23288,0.62923,0.99202},
-  {0.22676,0.63913,0.98851},
-  {0.22039,0.64901,0.98436},
-  {0.21382,0.65886,0.97959},
-  {0.20708,0.66866,0.97423},
-  {0.20021,0.67842,0.96833},
-  {0.19326,0.68812,0.96190},
-  {0.18625,0.69775,0.95498},
-  {0.17923,0.70732,0.94761},
-  {0.17223,0.71680,0.93981},
-  {0.16529,0.72620,0.93161},
-  {0.15844,0.73551,0.92305},
-  {0.15173,0.74472,0.91416},
-  {0.14519,0.75381,0.90496},
-  {0.13886,0.76279,0.89550},
-  {0.13278,0.77165,0.88580},
-  {0.12698,0.78037,0.87590},
-  {0.12151,0.78896,0.86581},
-  {0.11639,0.79740,0.85559},
-  {0.11167,0.80569,0.84525},
-  {0.10738,0.81381,0.83484},
-  {0.10357,0.82177,0.82437},
-  {0.10026,0.82955,0.81389},
-  {0.09750,0.83714,0.80342},
-  {0.09532,0.84455,0.79299},
-  {0.09377,0.85175,0.78264},
-  {0.09287,0.85875,0.77240},
-  {0.09267,0.86554,0.76230},
-  {0.09320,0.87211,0.75237},
-  {0.09451,0.87844,0.74265},
-  {0.09662,0.88454,0.73316},
-  {0.09958,0.89040,0.72393},
-  {0.10342,0.89600,0.71500},
-  {0.10815,0.90142,0.70599},
-  {0.11374,0.90673,0.69651},
-  {0.12014,0.91193,0.68660},
-  {0.12733,0.91701,0.67627},
-  {0.13526,0.92197,0.66556},
-  {0.14391,0.92680,0.65448},
-  {0.15323,0.93151,0.64308},
-  {0.16319,0.93609,0.63137},
-  {0.17377,0.94053,0.61938},
-  {0.18491,0.94484,0.60713},
-  {0.19659,0.94901,0.59466},
-  {0.20877,0.95304,0.58199},
-  {0.22142,0.95692,0.56914},
-  {0.23449,0.96065,0.55614},
-  {0.24797,0.96423,0.54303},
-  {0.26180,0.96765,0.52981},
-  {0.27597,0.97092,0.51653},
-  {0.29042,0.97403,0.50321},
-  {0.30513,0.97697,0.48987},
-  {0.32006,0.97974,0.47654},
-  {0.33517,0.98234,0.46325},
-  {0.35043,0.98477,0.45002},
-  {0.36581,0.98702,0.43688},
-  {0.38127,0.98909,0.42386},
-  {0.39678,0.99098,0.41098},
-  {0.41229,0.99268,0.39826},
-  {0.42778,0.99419,0.38575},
-  {0.44321,0.99551,0.37345},
-  {0.45854,0.99663,0.36140},
-  {0.47375,0.99755,0.34963},
-  {0.48879,0.99828,0.33816},
-  {0.50362,0.99879,0.32701},
-  {0.51822,0.99910,0.31622},
-  {0.53255,0.99919,0.30581},
-  {0.54658,0.99907,0.29581},
-  {0.56026,0.99873,0.28623},
-  {0.57357,0.99817,0.27712},
-  {0.58646,0.99739,0.26849},
-  {0.59891,0.99638,0.26038},
-  {0.61088,0.99514,0.25280},
-  {0.62233,0.99366,0.24579},
-  {0.63323,0.99195,0.23937},
-  {0.64362,0.98999,0.23356},
-  {0.65394,0.98775,0.22835},
-  {0.66428,0.98524,0.22370},
-  {0.67462,0.98246,0.21960},
-  {0.68494,0.97941,0.21602},
-  {0.69525,0.97610,0.21294},
-  {0.70553,0.97255,0.21032},
-  {0.71577,0.96875,0.20815},
-  {0.72596,0.96470,0.20640},
-  {0.73610,0.96043,0.20504},
-  {0.74617,0.95593,0.20406},
-  {0.75617,0.95121,0.20343},
-  {0.76608,0.94627,0.20311},
-  {0.77591,0.94113,0.20310},
-  {0.78563,0.93579,0.20336},
-  {0.79524,0.93025,0.20386},
-  {0.80473,0.92452,0.20459},
-  {0.81410,0.91861,0.20552},
-  {0.82333,0.91253,0.20663},
-  {0.83241,0.90627,0.20788},
-  {0.84133,0.89986,0.20926},
-  {0.85010,0.89328,0.21074},
-  {0.85868,0.88655,0.21230},
-  {0.86709,0.87968,0.21391},
-  {0.87530,0.87267,0.21555},
-  {0.88331,0.86553,0.21719},
-  {0.89112,0.85826,0.21880},
-  {0.89870,0.85087,0.22038},
-  {0.90605,0.84337,0.22188},
-  {0.91317,0.83576,0.22328},
-  {0.92004,0.82806,0.22456},
-  {0.92666,0.82025,0.22570},
-  {0.93301,0.81236,0.22667},
-  {0.93909,0.80439,0.22744},
-  {0.94489,0.79634,0.22800},
-  {0.95039,0.78823,0.22831},
-  {0.95560,0.78005,0.22836},
-  {0.96049,0.77181,0.22811},
-  {0.96507,0.76352,0.22754},
-  {0.96931,0.75519,0.22663},
-  {0.97323,0.74682,0.22536},
-  {0.97679,0.73842,0.22369},
-  {0.98000,0.73000,0.22161},
-  {0.98289,0.72140,0.21918},
-  {0.98549,0.71250,0.21650},
-  {0.98781,0.70330,0.21358},
-  {0.98986,0.69382,0.21043},
-  {0.99163,0.68408,0.20706},
-  {0.99314,0.67408,0.20348},
-  {0.99438,0.66386,0.19971},
-  {0.99535,0.65341,0.19577},
-  {0.99607,0.64277,0.19165},
-  {0.99654,0.63193,0.18738},
-  {0.99675,0.62093,0.18297},
-  {0.99672,0.60977,0.17842},
-  {0.99644,0.59846,0.17376},
-  {0.99593,0.58703,0.16899},
-  {0.99517,0.57549,0.16412},
-  {0.99419,0.56386,0.15918},
-  {0.99297,0.55214,0.15417},
-  {0.99153,0.54036,0.14910},
-  {0.98987,0.52854,0.14398},
-  {0.98799,0.51667,0.13883},
-  {0.98590,0.50479,0.13367},
-  {0.98360,0.49291,0.12849},
-  {0.98108,0.48104,0.12332},
-  {0.97837,0.46920,0.11817},
-  {0.97545,0.45740,0.11305},
-  {0.97234,0.44565,0.10797},
-  {0.96904,0.43399,0.10294},
-  {0.96555,0.42241,0.09798},
-  {0.96187,0.41093,0.09310},
-  {0.95801,0.39958,0.08831},
-  {0.95398,0.38836,0.08362},
-  {0.94977,0.37729,0.07905},
-  {0.94538,0.36638,0.07461},
-  {0.94084,0.35566,0.07031},
-  {0.93612,0.34513,0.06616},
-  {0.93125,0.33482,0.06218},
-  {0.92623,0.32473,0.05837},
-  {0.92105,0.31489,0.05475},
-  {0.91572,0.30530,0.05134},
-  {0.91024,0.29599,0.04814},
-  {0.90463,0.28696,0.04516},
-  {0.89888,0.27824,0.04243},
-  {0.89298,0.26981,0.03993},
-  {0.88691,0.26152,0.03753},
-  {0.88066,0.25334,0.03521},
-  {0.87422,0.24526,0.03297},
-  {0.86760,0.23730,0.03082},
-  {0.86079,0.22945,0.02875},
-  {0.85380,0.22170,0.02677},
-  {0.84662,0.21407,0.02487},
-  {0.83926,0.20654,0.02305},
-  {0.83172,0.19912,0.02131},
-  {0.82399,0.19182,0.01966},
-  {0.81608,0.18462,0.01809},
-  {0.80799,0.17753,0.01660},
-  {0.79971,0.17055,0.01520},
-  {0.79125,0.16368,0.01387},
-  {0.78260,0.15693,0.01264},
-  {0.77377,0.15028,0.01148},
-  {0.76476,0.14374,0.01041},
-  {0.75556,0.13731,0.00942},
-  {0.74617,0.13098,0.00851},
-  {0.73661,0.12477,0.00769},
-  {0.72686,0.11867,0.00695},
-  {0.71692,0.11268,0.00629},
-  {0.70680,0.10680,0.00571},
-  {0.69650,0.10102,0.00522},
-  {0.68602,0.09536,0.00481},
-  {0.67535,0.08980,0.00449},
-  {0.66449,0.08436,0.00424},
-  {0.65345,0.07902,0.00408},
-  {0.64223,0.07380,0.00401},
-  {0.63082,0.06868,0.00401},
-  {0.61923,0.06367,0.00410},
-  {0.60746,0.05878,0.00427},
-  {0.59550,0.05399,0.00453},
-  {0.58336,0.04931,0.00486},
-  {0.57103,0.04474,0.00529},
-  {0.55852,0.04028,0.00579},
-  {0.54583,0.03593,0.00638},
-  {0.53295,0.03169,0.00705},
-  {0.51989,0.02756,0.00780},
-  {0.50664,0.02354,0.00863},
-  {0.49321,0.01963,0.00955},
-  {0.47960,0.01583,0.01055}
-};
-
-
-
-
-uint64_t nanoTime(){
-    using namespace std;
-    using namespace chrono;
-    return duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count();
-}
-//CONFIG
-KOKKOS_INLINE_FUNCTION bool isNaN(float x){
-    #ifdef __CUDA_ARCH__
-    return isnan(x);
-    #else
-    return std::isnan(x);
-    #endif
-}
-KOKKOS_INLINE_FUNCTION bool isINF(float x){
-    #ifdef __CUDA_ARCH__
-    return isinf(x);
-    #else
-    return std::isinf(x);
-    #endif
-}
-KOKKOS_INLINE_FUNCTION bool isNaN(double x){
-    #ifdef __CUDA_ARCH__
-    return isnan(x);
-    #else
-    return std::isnan(x);
-    #endif
-}
-KOKKOS_INLINE_FUNCTION bool isINF(double x){
-    #ifdef __CUDA_ARCH__
-    return isinf(x);
-    #else
-    return std::isinf(x);
-    #endif
-}
-#define assert_isreal(X) assert(!isNaN(X) && !isINF(X))
-template<unsigned int Dim, typename callable, typename... Ts>
-KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> from, ippl::Vector<uint32_t, Dim> to, Ts... args){
-    if constexpr(sizeof...(Ts) == Dim){
-        c(args...);
-    }
-    else{
-        for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){
-            serial_for(c, from, to, args..., i);
-        }
-    }
-}
-
-
-
-
-
-struct config {
-    using scalar = double;
-
-    //using length_unit = funits::length<scalar, funits::planck_base>;
-    //using duration_unit = funits::time<scalar, funits::planck_base>;
-    // GRID PARAMETERS
-    ippl::Vector<uint32_t, 3> resolution;
-
-    ippl::Vector<scalar, 3> extents;
-    scalar total_time;
-    scalar timestep_ratio;
-
-    scalar length_scale_in_jobfile, temporal_scale_in_jobfile;
-
-        // All in unit_charge, or unit_mass
-    scalar charge, mass;
-
-    uint64_t num_particles;
-    bool space_charge;
-
-        // BUNCH PARAMETERS
-    ippl::Vector<scalar, 3> mean_position;
-    ippl::Vector<scalar, 3> sigma_position;
-    ippl::Vector<scalar, 3> position_truncations;
-    ippl::Vector<scalar, 3> sigma_momentum;
-    scalar bunch_gamma;
-
-    scalar undulator_K;
-    scalar undulator_period;
-    scalar undulator_length;
-
-    uint32_t output_rhythm;
-    std::string output_path;
-    std::unordered_map<std::string, double> experiment_options;
-};
-template<typename scalar, unsigned Dim>
-ippl::Vector<scalar, Dim> getVector(const nlohmann::json& j){
-    if(j.is_array()){
-        assert(j.size() == Dim);
-        ippl::Vector<scalar, Dim> ret;
-        for(unsigned i = 0;i < Dim;i++)
-            ret[i] = (scalar)j[i];
-        return ret;
-    }
-    else{
-        std::cerr << "Warning: Obtaining Vector from scalar json\n";
-        ippl::Vector<scalar, Dim> ret;
-        ret.fill((scalar)j);
-        return ret;
-    }
-}
-template<size_t N, typename T>
-struct DefaultedStringLiteral {
-    constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) {
-        std::copy_n(str, N, key);
-    }
-    
-    T value;
-    char key[N];
-};
-template<size_t N>
-struct StringLiteral {
-    constexpr StringLiteral(const char (&str)[N]) {
-        std::copy_n(str, N, value);
-    }
-    
-    char value[N];
-    constexpr DefaultedStringLiteral<N, int> operator>>(int t)const noexcept{
-        return DefaultedStringLiteral<N, int>(value, t);
-    }
-    constexpr size_t size()const noexcept{return N - 1;}
-};
-template<StringLiteral lit>
-constexpr size_t chash(){
-    size_t hash = 5381;
-    int c;
-
-    for(size_t i = 0;i < lit.size();i++){
-        c = lit.value[i];
-        hash = ((hash << 5) + hash) + c; // hash * 33 + c
-    }
-
-    return hash;
-}
-size_t chash(const char* val) {
-    size_t hash = 5381;
-    int c;
-
-    while ((c = *val++)) {
-        hash = ((hash << 5) + hash) + c; // hash * 33 + c
-    }
-
-    return hash;
-}
-size_t chash(const std::string& _val) {
-    size_t hash = 5381;
-    const char* val = _val.c_str();
-    int c;
-
-    while ((c = *val++)) {
-        hash = ((hash << 5) + hash) + c; // hash * 33 + c
-    }
-
-    return hash;
-}
-std::string lowercase_singular(std::string str) {
-    // Convert string to lowercase
-    std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-    // Check if the string ends with "s" and remove it if it does
-    if (!str.empty() && str.back() == 's') {
-        str.pop_back();
-    }
-
-    return str;
-}
-double get_time_multiplier(const nlohmann::json& j){
-    std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]);
-    double time_factor = 1.0;
-    switch (chash(length_scale_string)) {
-        case chash<"planck-time">():
-        case chash<"plancktime">():
-        case chash<"pt">():
-        case chash<"natural">():
-            time_factor = unit_time_in_seconds;
-        break;
-        case chash<"picosecond">():
-            time_factor = 1e-12;
-        break;
-        case chash<"nanosecond">():
-            time_factor = 1e-9;
-        break;
-        case chash<"microsecond">():
-            time_factor = 1e-6;
-        break;
-        case chash<"millisecond">():
-            time_factor = 1e-3;
-        break;
-        case chash<"second">():
-            time_factor = 1.0;
-        break;
-        default:
-            std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n";
-        break;
-    }
-    return time_factor;
-}
-double get_length_multiplier(const nlohmann::json& options){
-    std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]);
-    double length_factor = 1.0;
-    switch (chash(length_scale_string)) {
-        case chash<"planck-length">():
-        case chash<"plancklength">():
-        case chash<"pl">():
-        case chash<"natural">():
-            length_factor = unit_length_in_meters;
-        break;
-        case chash<"picometer">():
-            length_factor = 1e-12;
-        break;
-        case chash<"nanometer">():
-            length_factor = 1e-9;
-        break;
-        case chash<"micrometer">():
-            length_factor = 1e-6;
-        break;
-        case chash<"millimeter">():
-            length_factor = 1e-3;
-        break;
-        case chash<"meter">():
-            length_factor = 1.0;
-        break;
-        default:
-            std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n";
-        break;
-    }
-    return length_factor;
-}
-config read_config(const char *filepath){
-    std::ifstream cfile(filepath);
-    nlohmann::json j;
-    cfile >> j;
-    config::scalar lmult = get_length_multiplier(j);
-    config::scalar tmult = get_time_multiplier(j);
-    config ret;
-
-    ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters;
-    ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters;
-    ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters;
-    ret.resolution = getVector<uint32_t, 3>(j["mesh"]["resolution"]);
-
-    //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n";
-    //std::cerr << "Tmult: " << tmult << "\n";
-    if(j.contains("timestep-ratio")){
-        ret.timestep_ratio = (config::scalar)j["timestep-ratio"];
-    }
-
-    else{
-        ret.timestep_ratio = 1;
-    }
-    ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds;
-    ret.space_charge = (bool)(j["mesh"]["space-charge"]);
-    ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]);
-    if(ret.bunch_gamma < config::scalar(1)){
-        std::cerr << "Gamma must be >= 1\n";
-        exit(1);
-    }
-    assert(j.contains("undulator"));
-    assert(j["undulator"].contains("static-undulator"));
-
-    ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"];
-    ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters;
-    ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters;
-    assert(!std::isnan(ret.undulator_length));
-    assert(!std::isnan(ret.undulator_period));
-    assert(!std::isnan(ret.extents[0]));
-    assert(!std::isnan(ret.extents[1]));
-    assert(!std::isnan(ret.extents[2]));
-    assert(!std::isnan(ret.total_time));
-    ret.length_scale_in_jobfile = get_length_multiplier(j);
-    ret.temporal_scale_in_jobfile = get_time_multiplier(j);
-    ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges;
-    ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses;
-    ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"];
-    ret.mean_position  = getVector<config::scalar, 3>(j["bunch"]["position"])                       * lmult / unit_length_in_meters;
-    ret.sigma_position = getVector<config::scalar, 3>(j["bunch"]["sigma-position"])                 * lmult / unit_length_in_meters;
-    ret.position_truncations = getVector<config::scalar, 3>(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters;
-    ret.sigma_momentum = getVector<config::scalar, 3>(j["bunch"]["sigma-momentum"]);
-    ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0;
-    ret.output_path = "../data/";
-    if(j["output"].contains("path")){
-        ret.output_path = j["output"]["path"];
-        if(!ret.output_path.ends_with('/')){
-            ret.output_path.push_back('/');
-        }
-    }
-    if(j.contains("experimentation")){
-        nlohmann::json je = j["experimentation"];
-        for(auto it = je.begin(); it!= je.end();it++){
-            ret.experiment_options[it.key()] = double(it.value());
-        }
-    }
-    return ret;
-}
-template<typename scalar>
-using FieldVector = ippl::Vector<scalar, 3>;
-template<typename scalar>
-struct BunchInitialize {
-
-    /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is
-     * manual the charge at points of the position vector will be produced.    				*/
-    // std::string     			bunchType_;
-
-    /* Type of the distributions (transverse or longitudinal) in the bunch.				*/
-    std::string distribution_;
-
-    /* Type of the generator for creating the bunch distribution.					*/
-    std::string generator_;
-
-    /* Total number of macroparticles in the bunch.                                                     */
-    unsigned int numberOfParticles_;
-
-    /* Total charge of the bunch in pC.                                                                 */
-    scalar cloudCharge_;
-
-    /* Initial energy of the bunch in MeV.                                                              */
-    scalar initialGamma_;
-
-    /* Initial normalized speed of the bunch.                                                           */
-    scalar initialBeta_;
-
-    /* Initial movement direction of the bunch, which is a unit vector.                                 */
-    FieldVector<scalar> initialDirection_;
-
-    /* Position of the center of the bunch in the unit of length scale.                           	*/
-    // std::vector<FieldVector<scalar> >	position_;
-    FieldVector<scalar> position_;
-
-    /* Number of macroparticles in each direction for 3Dcrystal type.                                   */
-    FieldVector<unsigned int> numbers_;
-
-    /* Lattice constant in x, y, and z directions for 3D crystal type.                                  */
-    FieldVector<scalar> latticeConstants_;
-
-    /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal
-     * type, it will be the spread in position for each micro-bunch of the crystal.			*/
-    FieldVector<scalar> sigmaPosition_;
-
-    /* Spread in energy in each direction.                                                              */
-    FieldVector<scalar> sigmaGammaBeta_;
-
-    /* Store the truncation transverse distance for the electron generation.				*/
-    scalar tranTrun_;
-
-    /* Store the truncation longitudinal distance for the electron generation.				*/
-    scalar longTrun_;
-
-    /* Name of the file for reading the electrons distribution from.					*/
-    std::string fileName_;
-
-    /* The radiation wavelength corresponding to the bunch length outside the undulator			*/
-    scalar lambda_;
-
-    /* Bunching factor for the initialization of the bunch.						*/
-    scalar bF_;
-
-    /* Phase of the bunching factor for the initialization of the bunch.				*/
-    scalar bFP_;
-
-    /* Boolean flag determining the activation of shot-noise.						*/
-    bool shotNoise_;
-
-    /* Initial beta vector of the bunch, which is obtained as the product of beta and direction.	*/
-    FieldVector<scalar> betaVector_;
-
-    /* Initialize the parameters for the bunch initialization to some first values.                     */
-    // BunchInitialize ();
-};
-
-
-
-
-
-//END CONFIG
-
-//LORENTZ FRAME AND UNDULATOR
-template<typename T, unsigned axis = 2>
-struct UniaxialLorentzframe{
-    constexpr static T c = 1.0;
-    using scalar = T;
-    using Vector3 = ippl::Vector<T, 3>;
-    scalar beta_m;
-    scalar gammaBeta_m;
-    scalar gamma_m;
-    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){
-        gammaBeta_m = gammaBeta;
-        beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta));
-        gamma_m = sqrt(1 + gammaBeta * (gammaBeta));
-    }
-    KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{
-        arg[axis] = gamma_m * (arg[axis] + beta_m * time); 
-    }
-    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB)const noexcept{
-        
-        Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
-        ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
-        ret.first  = ippl::Vector<T, 3>(unprimedEB.first  + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
-        ret.second = ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
-        ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
-        ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
-        return ret;
-    }
-    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> inverse_transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB)const noexcept{
-        return UniaxialLorentzframe<T, 2>(-gammaBeta_m).transform_EB(primedEB);
-    }
-};
-template<typename scalar>
-BunchInitialize<scalar> generate_mithra_config(const config& cfg, const UniaxialLorentzframe<scalar>& /*frame_boost unused*/) {
-    using vec3 = ippl::Vector<scalar, 3>;
-    scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K);
-    BunchInitialize<scalar> init;
-    init.generator_ = "random";
-    init.distribution_ = "uniform";
-    init.initialDirection_ = vec3{0, 0, 1};
-    init.initialGamma_ = cfg.bunch_gamma;
-    init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma);
-    init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast<scalar>();
-    init.sigmaPosition_ = cfg.sigma_position.template cast<scalar>();
-
-    // TODO: Initial bunching factor huh
-    init.bF_ = 0.01;
-    init.bFP_ = 0;
-    init.shotNoise_ = false;
-    init.cloudCharge_ = cfg.charge;
-    init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma);
-    init.longTrun_ = cfg.position_truncations[2];
-    init.tranTrun_ = cfg.position_truncations[0];
-    init.position_ = cfg.mean_position.cast<scalar>();
-    init.betaVector_ = ippl::Vector<scalar, 3>{0,0,init.initialBeta_};
-    init.numberOfParticles_ = cfg.num_particles;
-
-    init.numbers_ = 0;                      // UNUSED
-    init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED
-
-    return init;
-}
-template <typename Double>
-struct Charge {
-
-    Double q;                     /* Charge of the point in the unit of electron charge.	*/
-    FieldVector<Double> rnp, rnm; /* Position vector of the charge.			*/
-    FieldVector<Double> gb;       /* Normalized velocity vector of the charge.		*/
-
-    /* Double flag determining if the particle is passing the entrance point of the undulator. This flag
-     * can be used for better boosting the bunch to the moving frame. We need to consider it to be double,
-     * because this flag needs to be communicated during bunch update.					*/
-    Double e;
-
-    // Charge();
-};
-template <typename scalar>
-using ChargeVector = std::list<Charge<scalar>>;
-template<typename Double>
-void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<Double> & chargeVector, int rank, int size, int ia){
-    /* Correct the number of particles if it is not a multiple of four.					*/
-    if ( bunchInit.numberOfParticles_ % 4 != 0 )
-      {
-        unsigned int n = bunchInit.numberOfParticles_ % 4;
-        bunchInit.numberOfParticles_ += 4 - n;
-        //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") +
-        //    std::string("It is corrected to ") +  std::to_string(bunchInit.numberOfParticles_) );
-    }
-
-    /* Save the initially given number of particles.							*/
-    unsigned int	Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size();
-
-    /* Declare the required parameters for the initialization of charge vectors.                      	*/
-    Charge<Double>         	charge; charge.q  = bunchInit.cloudCharge_ / Np;
-    FieldVector<Double> gb = bunchInit.initialGamma_ * bunchInit.betaVector_;
-    FieldVector<Double> r  (0.0);
-    FieldVector<Double> t  (0.0);
-    Double            	t0, g;
-    Double		zmin = 1e100;
-    Double		Ne, bF, bFi;
-    unsigned int	bmi;
-    std::vector<Double>	randomNumbers;
-
-    /* The initialization in group of four particles should only be done if there exists an undulator in
-     * the interaction.											*/
-    unsigned int	ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4;
-
-    /* Check the bunching factor.                                                                     	*/
-    if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 )
-      {
-        //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") );
-        //exit(1);
-      }
-
-    /* If the generator is random we should make sure that different processors do not produce the same
-     * random numbers.											*/
-    if 	( bunchInit.generator_ == "random" )
-      {
-        /* Initialize the random number generator.								*/
-        srand ( time(NULL) );
-        /* Np / ng * 20 is the maximum number of particles.							*/
-        randomNumbers.resize( Np / ng * 20, 0.0);
-        for ( unsigned int ri = 0; ri < Np / ng * 20; ri++)
-          randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX));
-      }
-
-    /* Declare the generator function depending on the input.						*/
-    auto generate = [&] (unsigned int n, unsigned int m) {
-      //if 	( bunchInit.generator_ == "random" )
-        return  ( randomNumbers.at( n * 2 * Np/ng + m ) );
-      //else
-      //  return  ( randomNumbers[ n * 2 * Np/ng + m ] );
-    //TODO: Return halton properly
-        //return 	( halton(n,m) );
-    };
-
-    /* Declare the function for injecting the shot noise.						*/
-    auto insertCharge = [&] (Charge<Double> q) {
-
-      for ( unsigned int ii = 0; ii < ng; ii++ )
-        {
-          /* The random modulation is introduced depending on the shot-noise being activated.		*/
-          if ( bunchInit.shotNoise_ )
-            {
-              /* Obtain the number of beamlet.								*/
-              bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ );
-
-              /* Obtain the phase and amplitude of the modulation.					*/
-              bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) );
-
-              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
-
-              q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) );
-            }
-          else if ( bunchInit.lambda_ != 0.0)
-            {
-              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
-
-              q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 );
-            }
-
-          /* Set this charge into the charge vector.							*/
-          chargeVector.push_back(q);
-        }
-    };
-
-    /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to
-     * calculate the FEL bucket number.									*/
-    if ( bunchInit.shotNoise_ )
-      {
-        for (i = 0; i < Np / ng; i++)
-          {
-            if ( bunchInit.distribution_ == "uniform" )
-              zmin = std::min(   Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin );
-            else if ( bunchInit.distribution_ == "gaussian" )
-              zmin = std::min(  (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin );
-            else
-              {
-                //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
-                exit(1);
-              }
-          }
-
-        if ( bunchInit.distribution_ == "uniform" )
-          for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++)
-            {
-              t0  = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) );
-              t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
-
-              zmin = std::min(   t0 , zmin );
-            }
-
-        //zmin = zmin + bunchInit.position_[ia][2];
-        zmin = zmin + bunchInit.position_[2];
-
-        /* Obtain the average number of electrons per FEL beamlet.					*/
-        Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] );
-
-        /* Set the bunching factor level for the shot noise depending on the given values.		*/
-        bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_;
-
-        //printmessage(std::string(__FILE__), __LINE__, std::string("The standard deviation of the bunching factor for the shot noise implementation is set to ") + stringify(bF) );
-      }
-
-    /* Determine the properties of each charge point and add them to the charge vector.               	*/
-    for (i = rank; i < Np / ng; i += size)
-      {
-        /* Determine the transverse coordinate.								*/
-        r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
-        r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
-
-        /* Determine the longitudinal coordinate.							*/
-        if ( bunchInit.distribution_ == "uniform" )
-          r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2];
-        else if ( bunchInit.distribution_ == "gaussian" )
-          r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
-        else
-          {
-            //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
-            exit(1);
-          }
-
-        /* Determine the transverse momentum.								*/
-        t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
-        t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
-        t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
-
-        if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
-          {
-            /* Shift the generated charge to the center position and momentum space.			*/
-            //charge.rnp    = bunchInit.position_[ia];
-            charge.rnp    = bunchInit.position_;
-            charge.rnp   += r;
-
-            charge.gb   = gb;
-            charge.gb  += t;
-            //std::cout << gb << "\n";
-            if(std::isinf(gb[2])){
-                std::cerr << "it klonked here\n";
-            }
-
-            /* Insert this charge and the mirrored ones into the charge vector.				*/
-            insertCharge(charge);
-          }
-      }
-
-    /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the
-     * CSE from the tail of the bunch.									*/
-    if ( bunchInit.distribution_ == "uniform" ){
-      for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size)
-        {
-            
-          r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
-          r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
-
-          /* Determine the longitudinal coordinate.							*/
-          r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
-          r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
-
-          /* Determine the transverse momentum.								*/
-          t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
-          t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
-          t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
-          //std::cerr << "DOING UNIFORM tapering!!!\n";
-          if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
-            {
-                //std::cerr << "ACTUALLY DOING UNIFORM tapering!!!\n";
-              /* Shift the generated charge to the center position and momentum space.			*/
-              charge.rnp   = bunchInit.position_[ia];
-              charge.rnp  += r;
-
-              charge.gb  = gb;
-              
-              charge.gb += t;
-              //std::cout << gb[0] << "\n";
-              //if(std::isinf(gb.squaredNorm())){
-              //    std::cerr << "it klonked here\n";
-              //}
-              /* Insert this charge and the mirrored ones into the charge vector.			*/
-              insertCharge(charge);
-            }
-        }
-    }
-
-    /* Reset the value for the number of particle variable according to the installed number of
-     * macro-particles and perform the corresponding changes.                                         	*/
-    bunchInit.numberOfParticles_ = chargeVector.size();
-}
-
-template<typename Double>
-void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma){
-    Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma);
-    Double zmaxL = -1.0e100, zmaxG;
-    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
-      {
-        Double g  	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
-        if(std::isinf(g)){
-            std::cerr << __FILE__  << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n";
-            abort();
-        }
-        Double bz 	= iterQ->gb[2] / g;
-        iterQ->rnp[2]  *= frame_gamma;
-        
-        iterQ->gb[2] 	= frame_gamma * g * ( bz - frame_beta );
-        
-        zmaxL 		= std::max( zmaxL , iterQ->rnp[2] );
-      }
-    zmaxG = zmaxL;
-    struct {
-       Double zu_;
-       Double beta_;
-    } bunch_;
-    bunch_.zu_ 		= zmaxG;
-    bunch_.beta_ 	= frame_beta;
-
-    /****************************************************************************************************/
-
-    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
-      {
-        Double g	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
-        iterQ->rnp[0]  += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
-        iterQ->rnp[1]  += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
-        iterQ->rnp[2]  += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
-        if(std::isnan(iterQ->rnp[2])){
-            std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_  << ", " <<  frame_beta << "\n";
-            std::cerr << __FILE__  << ": " << __LINE__ << "   OOOOOF\n";
-            abort();
-        }
-      }
-}
-
-
-
-
-template<typename bunch_type, typename scalar>
-size_t initialize_bunch_mithra(
-    bunch_type& bunch,
-    const BunchInitialize<scalar>& bunchInit,
-    scalar frame_gamma){
-
-    ChargeVector<scalar> oof;
-    initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0);
-    for(auto& c : oof){
-        if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2]))
-            std::cout << "Pos before boost: " << c.rnp << "\n";
-        if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2]))
-            std::cout << "Pos before boost: " << c.rnp << "\n";
-    }
-    boost_bunch(oof, frame_gamma);
-    for(auto& c : oof){
-         if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){
-            std::cout << "Pos after boost: " << c.rnp << "\n";
-            break;
-        }
-    }
-    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> positions("", oof.size());
-    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> gammabetas("", oof.size());
-    auto iterQ = oof.begin();
-    for (size_t i = 0; i < oof.size(); i++) {
-        assert_isreal(iterQ->gb[0]);
-        assert_isreal(iterQ->gb[1]);
-        assert_isreal(iterQ->gb[2]);
-        assert(iterQ->gb[2] != 0.0f);
-        scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
-        assert_isreal(g);
-        scalar bz = iterQ->gb[2] / g;
-        assert_isreal(bz);
-        (void)bz;
-        positions(i) = iterQ->rnp;
-        gammabetas(i) = iterQ->gb;
-        ++iterQ;
-    }
-    if(oof.size() > bunch.getLocalNum()){
-        bunch.create(oof.size() - bunch.getLocalNum());
-    }
-    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions ("", oof.size());
-    Kokkos::View<ippl::Vector<scalar, 3>*> dgammabetas("", oof.size());
-    
-    Kokkos::deep_copy(dpositions, positions);
-    Kokkos::deep_copy(dgammabetas, gammabetas);
-    Kokkos::deep_copy(bunch.R_nm1.getView(), positions);
-    Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas);
-    auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();;
-    Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){
-        rview(i) = dpositions(i);
-        rm1view(i) = dpositions(i);
-        gbview(i) = dgammabetas(i);
-    });
-    Kokkos::fence();
-    
-    return oof.size();
-}
-
-
-
-
-
-
-
-//END LORENTZ FRAME AND UNDULATOR AND BUNCH
-
-
-
-
-
-//PREAMBLE
-
+#include <stb_image_write.h>
 template<typename scalar1, typename... scalar>
     requires((std::is_floating_point_v<scalar1>))
 KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
     uint32_t dim = sizeof...(scalar);
     ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
+    scalar1 vecsum(0);
     for(unsigned d = 0;d < dim;d++){
-        vec[d] = vec[d] * vec[d];
+        vecsum += vec[d] * vec[d];
+        
     }
     #ifndef __CUDA_ARCH__
     using std::exp;
     #endif
-    return exp(-(vec.sum()) / (stddev * stddev)); 
-}
-
-constexpr double sq(double x){
-    return x * x;
-}
-constexpr float sq(float x){
-    return x * x;
+    return exp(-(vecsum) / (stddev * stddev)); 
 }
 
-
-template<unsigned a, unsigned b>
-constexpr KOKKOS_INLINE_FUNCTION auto first(){
-    return a;
-}
-template<unsigned a, unsigned b>
-constexpr KOKKOS_INLINE_FUNCTION auto second(){
-    return b;
-}
-
-template <typename T>
-KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a,
-                                                     const ippl::Vector<T, 3>& b) {
-    ippl::Vector<T, 3> ret{0.0, 0.0, 0.0};
-    ret[0] = a[1] * b[2] - a[2] * b[1];
-    ret[1] = a[2] * b[0] - a[0] * b[2];
-    ret[2] = a[0] * b[1] - a[1] * b[0];
-    return ret;
-}
-template<typename T>
-KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos){
-    //return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5}, ippl::Vector<T, 3>{0,0,0}};
-    //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
-    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
-    //pos -= spacing * T(0.5);
-    ippl::Vector<T, 3> relpos = pos - origin;
-    ippl::Vector<T, 3> gridpos = relpos / hr;
-    ippl::Vector<int, 3> ipos;
-    ipos = gridpos.template cast<int>();
-    ippl::Vector<T, 3> fracpos;
-    for(unsigned k = 0;k < 3;k++){
-        fracpos[k] = gridpos[k] - (int)ipos[k];
-    }
-    //TODO: NGHOST!!!!!!!
-    ipos += ippl::Vector<int, 3>(1);
-    ret.first = ipos;
-    ret.second = fracpos;
-    return ret;
-}
-template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
-    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
-    ipos -= ldom.first();
-    //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
-    if(
-        ipos[0] < 0
-        ||ipos[1] < 0
-        ||ipos[2] < 0
-        ||size_t(ipos[0]) >= view.extent(0) - 1
-        ||size_t(ipos[1]) >= view.extent(1) - 1
-        ||size_t(ipos[2]) >= view.extent(2) - 1
-        ||fracpos[0] < 0
-        ||fracpos[1] < 0
-        ||fracpos[2] < 0
-    ){
-        return;
-    }
-    assert(fracpos[0] >= 0.0f);
-    assert(fracpos[0] <= 1.0f);
-    assert(fracpos[1] >= 0.0f);
-    assert(fracpos[1] <= 1.0f);
-    assert(fracpos[2] >= 0.0f);
-    assert(fracpos[2] <= 1.0f);
-    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
-    assert(one_minus_fracpos[0] >= 0.0f);
-    assert(one_minus_fracpos[0] <= 1.0f);
-    assert(one_minus_fracpos[1] >= 0.0f);
-    assert(one_minus_fracpos[1] <= 1.0f);
-    assert(one_minus_fracpos[2] >= 0.0f);
-    assert(one_minus_fracpos[2] <= 1.0f);
-    scalar accum = 0;
-    
-    for(unsigned i = 0;i < 8;i++){
-        scalar weight = 1;
-        ippl::Vector<int, 3> ipos_l = ipos;
-        for(unsigned d = 0;d < 3;d++){
-            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
-            ipos_l[d] += !!(i & (1 << d));
-        }
-        assert_isreal(value);
-        assert_isreal(weight);
-        accum += weight;
-        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
-    }
-    assert(abs(accum - 1.0f) < 1e-6f);
-}
-template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
-    //std::cout << "Value: " << value << "\n";
-    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
-    ipos -= ldom.first();
-    if(
-        ipos[0] < 0
-        ||ipos[1] < 0
-        ||ipos[2] < 0
-        ||size_t(ipos[0]) >= view.extent(0) - 1
-        ||size_t(ipos[1]) >= view.extent(1) - 1
-        ||size_t(ipos[2]) >= view.extent(2) - 1
-        ||fracpos[0] < 0
-        ||fracpos[1] < 0
-        ||fracpos[2] < 0
-    ){
-        //std::cout << "out of bounds\n";
-        return;
-    }
-    assert(fracpos[0] >= 0.0f);
-    assert(fracpos[0] <= 1.0f);
-    assert(fracpos[1] >= 0.0f);
-    assert(fracpos[1] <= 1.0f);
-    assert(fracpos[2] >= 0.0f);
-    assert(fracpos[2] <= 1.0f);
-    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
-    assert(one_minus_fracpos[0] >= 0.0f);
-    assert(one_minus_fracpos[0] <= 1.0f);
-    assert(one_minus_fracpos[1] >= 0.0f);
-    assert(one_minus_fracpos[1] <= 1.0f);
-    assert(one_minus_fracpos[2] >= 0.0f);
-    assert(one_minus_fracpos[2] <= 1.0f);
-    scalar accum = 0;
-    
-    for(unsigned i = 0;i < 8;i++){
-        scalar weight = 1;
-        ippl::Vector<int, 3> ipos_l = ipos;
-        for(unsigned d = 0;d < 3;d++){
-            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
-            ipos_l[d] += !!(i & (1 << d));
-        }
-        assert_isreal(weight);
-        accum += weight;
-        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
-        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
-        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
-    }
-    assert(abs(accum - 1.0f) < 1e-6f);
-}
-
-template<typename view_type, typename scalar>
-KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
-
-    
-    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
-    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid   = gridCoordinatesOf(hr, origin, to  );
-    //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]);
-    for(int d = 0;d < 3;d++){
-        //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
-        //    std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " << from_grid.first << " " << to_grid.first << std::endl;
-        //}
-        //assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
-    }
-    //const uint32_t nghost = g.nghost();
-    //from_ipos += ippl::Vector<int, 3>(nghost);
-    //to_ipos += ippl::Vector<int, 3>(nghost);
-    
-    if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
-        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
-        
-        return;
-    }
-    ippl::Vector<scalar, 3> relay;
-    const int nghost = 1;
-    const ippl::Vector<scalar, 3> orig = origin;
-    using Kokkos::max;
-    using Kokkos::min;
-    for (unsigned int i = 0; i < 3; i++) {
-        relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i],
-                       max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i],
-                           scalar(0.5) * (to[i] + from[i])));
-    }
-    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
-    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
-}
-
-// END PREAMBLE
-
-
-//BEGIN ABSORBING BOUNDARY CONDITION
-template<typename _scalar, unsigned _main_axis, unsigned... _side_axes>
-struct second_order_abc_face{
-    using scalar = _scalar;
-    scalar Cweights[5];
-    int sign;
-    constexpr static unsigned main_axis = _main_axis;
-    KOKKOS_FUNCTION second_order_abc_face(ippl::Vector<scalar, 3> hr, scalar dt, int _sign) : sign(_sign){
-        constexpr scalar c = 1;
-        constexpr unsigned side_axes[2] = {_side_axes...};
-        static_assert(
-            (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) ||
-            (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) ||
-            (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1)
-        );
-        assert(_main_axis != side_axes[0]);
-        assert(_main_axis != side_axes[1]);
-        assert(side_axes[0] != side_axes[1]);
-        constexpr scalar truncation_order = 2.0;
-        scalar p      = ( 1.0 + 1 * 1 ) / ( 1 + 1 );
-        scalar q      = - 1.0 / ( 1 + 1 );
-
-        scalar d  	 = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt);
-
-        Cweights[0]	= (   1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
-        Cweights[1]	= ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
-        assert(abs(Cweights[1] + 1) < 1e-6); //Like literally
-        Cweights[2]  	= (   p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d;
-        Cweights[3]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d;
-        Cweights[4]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d;
-    }
-    template<typename view_type, typename Coords>
-    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
-        uint32_t i = c[0];
-        uint32_t j = c[1];
-        uint32_t k = c[2];
-        using ippl::apply;
-        constexpr unsigned side_axes[2] = {_side_axes...};
-        ippl::Vector<uint32_t, 3> side_axis1_onehot = ippl::Vector<uint32_t, 3>{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2};
-        ippl::Vector<uint32_t, 3> side_axis2_onehot = ippl::Vector<uint32_t, 3>{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2};
-        ippl::Vector<uint32_t, 3> mainaxis_off = ippl::Vector<int32_t, 3>{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast<uint32_t>();
-        return advanceBoundaryS(
-		    A_nm1(i,j,k), A_n(i,j,k),
-		    apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off),
-		    apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off),
-		    apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot),                apply(A_n, c - side_axis1_onehot),
-		    apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot)
-        );
-    }
-    template<typename value_type>
-    KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 ,
-						 const value_type& v3 , const value_type& v4 , const value_type& v5 ,
-						 const value_type& v6 , const value_type& v7 , const value_type& v8 ,
-						 const value_type& v9 , const value_type& v10, const value_type& v11,
-						 const value_type& v12, const value_type& v13)const noexcept
-    {
-        
-      value_type v0 =
-    	 Cweights[0]  * (v1 + v5) +
-    	(Cweights[1]) * v3 +
-    	(Cweights[2]) * ( v2 + v4 ) +
-    	(Cweights[3]) * ( v6 + v7 + v10 + v11 ) +
-    	(Cweights[4]) * ( v8 + v9 + v12 + v13 );
-      return v0;
-    }
-};
-template<typename _scalar, unsigned edge_axis, unsigned normal_axis1, unsigned normal_axis2, bool na1_zero, bool na2_zero>
-struct second_order_abc_edge{
-    using scalar = _scalar;
-    //
-    scalar Eweights[5];
-    
-    KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector<scalar, 3> hr, scalar dt){
-        static_assert(normal_axis1 != normal_axis2);
-        static_assert(edge_axis != normal_axis2);
-        static_assert(edge_axis != normal_axis1);
-        static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0));
-        constexpr scalar c0_ = scalar(1);
-        scalar d    =    ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt );
-        if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z)
-            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
-            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
-        }
-        else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y)
-            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
-            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
-        }
-        else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x)
-            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
-            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
-        }
-        else{
-            assert(false);
-        }
-        
-
-
-        
-    }
-    template<typename view_type, typename Coords>
-    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
-        uint32_t i = c[0];
-        uint32_t j = c[1];
-        uint32_t k = c[2];
-        using ippl::apply;
-        //constexpr unsigned nax[2] = {normal_axis1, normal_axis2};
-        ippl::Vector<int32_t, 3> normal_axis1_onehot = ippl::Vector<int32_t, 3>{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1);
-        ippl::Vector<int32_t, 3> normal_axis2_onehot = ippl::Vector<int32_t, 3>{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1);
-        ippl::Vector<uint32_t, 3> acc0 = {i, j, k};
-        ippl::Vector<uint32_t, 3> acc1 = acc0 + normal_axis1_onehot.cast<uint32_t>();
-        ippl::Vector<uint32_t, 3> acc2 = acc0 + normal_axis2_onehot.cast<uint32_t>();
-        ippl::Vector<uint32_t, 3> acc3 = acc0 + normal_axis1_onehot.cast<uint32_t>() + normal_axis2_onehot.cast<uint32_t>();
-        //ippl::Vector<uint32_t, 3> axism = (-ippl::Vector<int, 3>{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast<uint32_t>();
-        ippl::Vector<uint32_t, 3> axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2};
-        //return A_n(i, j, k);
-        return advanceEdgeS(
-                                        A_n(i, j, k),      A_nm1(i, j, k),
-            apply(A_np1, acc1),   apply(A_n, acc1   ), apply(A_nm1, acc1),
-            apply(A_np1, acc2),   apply(A_n, acc2   ), apply(A_nm1, acc2),
-            apply(A_np1, acc3),   apply(A_n, acc3   ), apply(A_nm1, acc3),
-            apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp),
-            apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp)
-        );
-    }
-    template<typename value_type>
-    KOKKOS_INLINE_FUNCTION value_type advanceEdgeS 		
-            (              value_type v1 , value_type v2 ,
-                           value_type v3 , value_type v4 , value_type v5 ,
-                           value_type v6 , value_type v7 , value_type v8 ,
-                           value_type v9 , value_type v10, value_type v11,
-                           value_type v12, value_type v13, value_type v14,
-                           value_type v15, value_type v16, value_type v17,
-                           value_type v18, value_type v19)const noexcept{
-    value_type v0 =
-    Eweights[0] * (v3 + v8) +
-    Eweights[1] * (v5 + v6) +
-    Eweights[2] * (v2 + v9) +
-    Eweights[3] * (v1 + v4 + v7 + v10) +
-    Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11;
-    return v0;
-  }
-};
-template<typename _scalar, bool x0, bool y0, bool z0>
-struct second_order_abc_corner{
-    using scalar = _scalar;
-    scalar Cweights[17];
-    KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector<scalar, 3> hr, scalar dt){
-        constexpr scalar c0_ = scalar(1);
-        Cweights[0]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[1]  =   (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[2]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[3]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[4]  =   (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[5]  =   (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[6]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[7]  =   (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[8]  = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[9]  = - (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[12] = - (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[13] = - (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[15] = - (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt);
-    }
-    template<typename view_type, typename Coords>
-    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
-        //First implementation: 0,0,0 corner
-        constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1);
-        constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1);
-        constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1);
-        using ippl::apply;
-        const ippl::Vector<uint32_t, 3> offsets[8] = {
-            ippl::Vector<uint32_t, 3>{0,0,0},
-            ippl::Vector<uint32_t, 3>{xoff,0,0},
-            ippl::Vector<uint32_t, 3>{0,yoff,0},
-            ippl::Vector<uint32_t, 3>{0,0,zoff},
-            ippl::Vector<uint32_t, 3>{xoff,yoff,0},
-            ippl::Vector<uint32_t, 3>{xoff,0,zoff},
-            ippl::Vector<uint32_t, 3>{0,yoff,zoff},
-            ippl::Vector<uint32_t, 3>{xoff,yoff,zoff},
-        };
-        return advanceCornerS(
-                                          apply(A_n, c), apply(A_nm1, c),
-            apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]),
-            apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]),
-            apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]),
-            apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]),
-            apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]),
-            apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]),
-            apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7])
-        );
-    }
-    template<typename value_type>
-    KOKKOS_INLINE_FUNCTION value_type advanceCornerS         
-                            (       value_type v1 , value_type v2 ,
-                             value_type v3 , value_type v4 , value_type v5 ,
-                             value_type v6 , value_type v7 , value_type v8 ,
-                             value_type v9 , value_type v10, value_type v11,
-                             value_type v12, value_type v13, value_type v14,
-                             value_type v15, value_type v16, value_type v17,
-                             value_type v18, value_type v19, value_type v20,
-                             value_type v21, value_type v22, value_type v23)const noexcept{
-    return      - ( v1  * (Cweights[16]) + v2  * (Cweights[8]) +
-    v3  * Cweights[1] + v4  * Cweights[16] + v5  * Cweights[9] +
-    v6  * Cweights[2] + v7  * Cweights[16] + v8  * Cweights[10] +
-    v9  * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] +
-    v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] +
-    v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] +
-    v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] +
-    v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0];
-  }
-};
-
-
-
-
-
-
-struct second_order_mur_boundary_conditions{
-    template<typename field_type, typename dt_type>
-    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr, ippl::NDIndex<3> lDom){
-        using scalar = decltype(dt);
-        //TODO: tbh don't know
-        //const unsigned nghost = 1;
-        const ippl::Vector<scalar, 3> hr = FA_n.get_mesh().getMeshSpacing();
-        //assert_isreal((betaMur[0]));
-        //assert_isreal((betaMur[1]));
-        //assert_isreal((betaMur[2]));
-        auto A_n   = FA_n.getView();
-        auto A_np1 = FA_np1.getView();
-        auto A_nm1 = FA_nm1.getView();
-        ippl::Vector<uint32_t, 3> local_nr{
-            uint32_t(A_n.extent(0)),
-            uint32_t(A_n.extent(1)),
-            uint32_t(A_n.extent(2))
-        };
-        constexpr uint32_t min_abc_boundary = 1;
-        constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1;
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t ig = i + lDom.first()[0];
-            uint32_t jg = j + lDom.first()[1];
-            uint32_t kg = k + lDom.first()[2];
-            
-            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
-
-            if(Kokkos::popcount(lval) > 1)return;
-            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
-                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
-
-            if(Kokkos::popcount(val) == 1){
-                if(ig == min_abc_boundary){
-                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(jg == min_abc_boundary){
-                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(kg == min_abc_boundary){
-                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(ig == true_nr[0] - max_abc_boundary_sub){
-                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(jg == true_nr[1] - max_abc_boundary_sub){
-                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-            }
-        });
-        Kokkos::fence();
-        //FA_np1.fillHalo();
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t ig = i + lDom.first()[0];
-            uint32_t jg = j + lDom.first()[1];
-            uint32_t kg = k + lDom.first()[2];
-
-            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
-
-            if(Kokkos::popcount(lval) > 2)return;
-            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
-                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
-            if(Kokkos::popcount(val) == 2){ //Edge
-                if(ig == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-
-                else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-
-                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-
-                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else{
-                    assert(false);
-                }
-            }
-        });
-        Kokkos::fence();
-        //FA_np1.fillHalo();
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t ig = i + lDom.first()[0];
-            uint32_t jg = j + lDom.first()[1];
-            uint32_t kg = k + lDom.first()[2];
-
-            //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-            //             + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
-
-            //if(Kokkos::popcount(lval) > 1)return;
-            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
-                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
-            
-            if(Kokkos::popcount(val) == 3){
-                //printf("Corner: %d, %d, %d\n", i, j, k);
-                if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else{
-                    assert(false);
-                }
-            }
-        });
-        Kokkos::fence();
-    }
-};
-//END ABSORBING BOUNDARY CONDITION
-
-
-
-
-
-
-
-
-
-namespace ippl {
-    
-    template<typename scalar>
-    struct undulator_parameters{
-        scalar lambda; //MITHRA: lambda_u
-        scalar K; //Undulator parameter
-        scalar length;
-        scalar B_magnitude;
-        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){
-            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u);
-            //std::cout << "Setting bmag: " << B_magnitude << "\n";
-        }
-        undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){
-            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda);
-        }
-    };
-
-    template<typename scalar>
-    struct nondispersive{
-        scalar a1;
-        scalar a2;
-        scalar a4;
-        scalar a6;
-        scalar a8;
-    };
-    template <typename _scalar, class PLayout>
-    struct  Bunch : public ippl::ParticleBase<PLayout> {
-        using scalar = _scalar;
-
-        // Constructor for the Bunch class, taking a PLayout reference
-        Bunch(PLayout& playout)
-            : ippl::ParticleBase<PLayout>(playout) {
-            // Add attributes to the particle bunch
-            this->addAttribute(Q);          // Charge attribute
-            this->addAttribute(mass);       // Mass attribute
-            this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta)
-            this->addAttribute(R_np1);      // Position attribute for the next time step
-            this->addAttribute(R_nm1);      // Position attribute for the next time step
-            this->addAttribute(EB_gather);   // Electric field attribute for particle gathering
-        }
-
-        // Destructor for the Bunch class
-        ~Bunch() {}
-
-        // Define container types for various attributes
-        using charge_container_type   = ippl::ParticleAttrib<scalar>;
-        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
-        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
-        using vector4_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
-
-        // Declare instances of the attribute containers
-        charge_container_type Q;          // Charge container
-        charge_container_type mass;       // Mass container
-        velocity_container_type gamma_beta; // Gamma-beta container
-        typename ippl::ParticleBase<PLayout>::particle_position_type R_np1; // Position container for the next time step
-        typename ippl::ParticleBase<PLayout>::particle_position_type R_nm1; // Position container for the previous time step
-        ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>> EB_gather;   // Electric field container for particle gathering
-
-    };
-
-
-    template <typename scalar>
-    // clang-format off
-    struct FDTDFieldState{
-        
-        //Sorry, can't do more than 3d
-
-        constexpr static unsigned int dim = 3;
-        using Vector_t = ippl::Vector<scalar, 3>;
-        using value_type = ippl::Vector<scalar, 4>;
-        using EB_type = ippl::Vector<ippl::Vector<scalar, 3>, 2>;
-        using Mesh_t               = ippl::UniformCartesian<scalar, dim>;
-
-        bool periodic_bc;
-        
-        using VField_t = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        using EBField_t = ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        using view_type = VField_t::view_type;
-        using ev_view_type = EBField_t::view_type;
-        //Fields
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_np1;
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_n;
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_nm1;
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> J;
-        ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> EB;
-        
-        //Discretization options
-        Vector_t hr_m;
-        ippl::Vector<uint32_t, 3> nr_global;
-        ippl::Vector<uint32_t, 3> nr_local;
-        scalar dt;
-        Mesh_t* mesh_mp;
-        FieldLayout<dim>* layout_mp;
-        using playout_type = ParticleSpatialLayout<scalar, 3>;
-        playout_type playout;
-        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
-        using bunch_type =  Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
-        config m_config;
-
-        /**
-         * @brief Construct a new FDTDFieldState object
-         * Mesh and resolution parameter are technically redundant
-         * 
-         * @param resolution 
-         * @param layout 
-         * @param mesch 
-         */
-        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){
-            FA_np1.initialize(mesch, layout, 1);
-            FA_n.initialize(mesch, layout, 1);
-            FA_nm1.initialize(mesch, layout, 1);
-            J.initialize(mesch, layout, 1);
-            EB.initialize(mesch, layout, 1);
-            FA_n   = value_type(0);
-            FA_np1 = value_type(0);
-            FA_nm1 = value_type(0);
-            hr_m = mesch.getMeshSpacing();
-            nr_global = ippl::Vector<uint32_t, 3>{
-                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
-                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
-                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)
-            };
-            nr_local = ippl::Vector<uint32_t, 3>{
-                uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1),
-                uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1),
-                uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1)
-            };
-            //std::cout << "NR_M_g: " << nr_global << "\n";
-            //std::cout << "NR_M_l: " << nr_local << "\n";
-            dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2]));
-            particles.create(nparticles);
-            setNoBoundaryConditions();
-            
-        }
-        void setNoBoundaryConditions() {
-            periodic_bc = false;
-            typename VField_t::BConds_t vector_bcs;
-            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
-                vector_bcs[Idx] = std::make_shared<ippl::NoBcFace<VField_t>>(Idx);
-                return 0;
-            };
-            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
-                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
-                (void)x;
-            };
-            bcsetter(std::make_index_sequence<dim * 2>{});
-            FA_n  .setFieldBC(vector_bcs);
-            FA_np1.setFieldBC(vector_bcs);
-            FA_nm1.setFieldBC(vector_bcs);
-        }
-        void setPeriodicBoundaryConditions() {
-            periodic_bc = true;
-            typename VField_t::BConds_t vector_bcs;
-            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
-                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<VField_t>>(Idx);
-                return 0;
-            };
-            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
-                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
-                (void)x;
-            };
-            bcsetter(std::make_index_sequence<dim * 2>{});
-            FA_n  .setFieldBC(vector_bcs);
-            FA_np1.setFieldBC(vector_bcs);
-            FA_nm1.setFieldBC(vector_bcs);
-        }
-        
-        void scatterBunch(){
-            //ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
-            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
-            assert_isreal(volume);
-            assert_isreal((scalar(1) / volume));
-            auto Jview = J.getView();
-            auto qview = particles.Q.getView();
-            auto rview = particles.R.getView();
-            auto rm1view = particles.R_nm1.getView();
-            auto orig = mesh_mp->getOrigin();
-            auto hr = mesh_mp->getMeshSpacing();
-            auto dt = this->dt;
-            bool space_charge = m_config.space_charge;
-            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
-            Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                Vector_t pos = rview(i);
-                Vector_t to = rview(i);
-                Vector_t from = rm1view(i);
-                if(space_charge){
-                    scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
-                }
-                scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
-            });
-            Kokkos::fence();
-            J.accumulateHalo();
-        }
-
-        void fieldStep(){
-            const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1])));
-            nondispersive<scalar> ndisp{
-                .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])),
-                .a2 = sq(dt / hr_m[0]),
-                .a4 = sq(dt / hr_m[1]),
-                .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0])  - 2 * calA * sq(dt / hr_m[1]),
-                .a8 = sq(dt)
-            };
-            //if(periodic_bc){
-            //    FA_n.getFieldBC().apply(FA_n);
-            //}
-            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
-            auto source = this->J.getView();
-            //FA_nm1.fillHalo();
-            FA_n.fillHalo();
-            ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
-            const auto& ldom = layout_mp->getLocalNDIndex();
-            Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                uint32_t ig = i + ldom.first()[0];
-                uint32_t jg = j + ldom.first()[1];
-                uint32_t kg = k + ldom.first()[2];
-                uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2)
-                             + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5);
-                if(!val){
-                    A_np1(i, j, k) =    -A_nm1(i,j,k)
-                            + ndisp.a1 * A_n  (i,j,k)
-                            + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1))
-                            + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1))
-                            + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1))
-                            + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1))
-                            + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k);
-                }
-            });
-            Kokkos::fence();
-            
-            if(!periodic_bc){
-                FA_np1.fillHalo();
-                second_order_mur_boundary_conditions bc;
-                
-                
-                bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom);
-            }
-            Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView());
-            Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView());
-            //std::swap(this->A_n, this->A_nm1);
-            //std::swap(this->A_np1, this->A_n);
-            
-            evaluate_EB();
-        }
-        void evaluate_EB(){
-            FA_n.fillHalo();//FA_nm1.fillHalo();
-            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
-            const scalar idt = scalar(1.0) / dt;
-            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
-            auto source = this->J.getView();
-            auto EBv = this->EB.getView();
-            Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
-                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
-                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
-                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
-
-                ippl::Vector<scalar, 3> grad_phi{
-                    dAdx[0], dAdy[0], dAdz[0]
-                };
-                ippl::Vector<scalar, 3> curlA{
-                    dAdy[3] - dAdz[2],
-                    dAdz[1] - dAdx[3],
-                    dAdx[2] - dAdy[1],
-                };
-                EBv(i,j,k)[0] = -dAdt - grad_phi;
-                EBv(i,j,k)[1] = curlA;
-            });
-            Kokkos::fence();
-        }
-        template<typename callable>
-        void updateBunch(scalar time, UniaxialLorentzframe<scalar> ulb, callable undulator_field){
-            
-            Kokkos::fence();
-            auto gbview = particles.gamma_beta.getView();
-            auto ebview = particles.EB_gather.getView();
-            auto qview = particles.Q.getView();
-            auto mview = particles.mass.getView();
-            auto rview = particles.R.getView();
-            auto rm1view = particles.R_nm1.getView();
-            auto rp1view = particles.R_np1.getView();
-            scalar bunch_dt = dt / 3;
-            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
-            EB.fillHalo();
-            Kokkos::fence();
-            for(int bts = 0;bts < 3;bts++){
-                
-                particles.EB_gather.gather(EB, particles.R);
-                Kokkos::fence();
-                Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                    const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
-                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB = ebview(i);
-                    ippl::Vector<scalar, 3> labpos = rview(i);
-
-                    ulb.primedToUnprimed(labpos, time);
-
-                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_frame = undulator_field(labpos);
-                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame);
-                    assert_isreal((EB_undulator_bunch.first[0]));
-                    assert_isreal((EB_undulator_bunch.first[1]));
-                    assert_isreal((EB_undulator_bunch.first[2]));
-                    assert_isreal((EB_undulator_bunch.second[0]));
-                    assert_isreal((EB_undulator_bunch.second[1]));
-                    assert_isreal((EB_undulator_bunch.second[2]));
-                    EB[0] += EB_undulator_bunch.first;
-                    EB[1] += EB_undulator_bunch.second;
-
-                    const scalar charge = qview(i);
-                    const scalar mass = mview(i);
-                    const ippl::Vector<scalar, 3> t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-                    const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
-                    const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
-                    const ippl::Vector<scalar, 3> t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0  + alpha * alpha * (EB[1].dot(EB[1])))));
-                    const ippl::Vector<scalar, 3> ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-
-                    assert_isreal((ngammabeta[0]));
-                    assert_isreal((ngammabeta[1]));
-                    assert_isreal((ngammabeta[2]));
-                    rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
-                    gbview(i) = ngammabeta;
-                });
-                Kokkos::fence();
-            }
-            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
-            size_t invalid_count = 0;
-            auto origo = mesh_mp->getOrigin();
-            ippl::Vector<scalar, 3> extenz;//
-            extenz[0] = nr_global[0] * hr_m[0];
-            extenz[1] = nr_global[1] * hr_m[1];
-            extenz[2] = nr_global[2] * hr_m[2];
-            Kokkos::parallel_reduce(
-                Kokkos::RangePolicy<typename playout_type::RegionLayout_t::view_type::execution_space>(0, particles.getLocalNum()),
-                KOKKOS_LAMBDA(size_t i, size_t& ref){
-                    bool out_of_bounds = false;
-                    ippl::Vector<scalar, dim> ppos = rview(i);
-                    for(size_t d = 0;d < dim;d++){
-                        out_of_bounds |= (ppos[d] <= origo[d]);
-                        out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain
-                    }
-                    invalid(i) = out_of_bounds;
-                    ref += out_of_bounds;
-                }, 
-                invalid_count);
-            particles.destroy(invalid, invalid_count);
-            Kokkos::fence();
-            
-        }
-    };
-    // clang-format on
-}  // namespace ippl
-bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
-    const int channels = 3; // RGB
-    const int stride = width * channels;
-    std::vector<unsigned char> flippedData(data, data + stride * height);
-
-    // Use stb_image_write to write the BMP image to the file descriptor
-    if (!stbi_write_bmp_to_func(
-            [](void* context, void* data, int size) {
-                FILE* f = reinterpret_cast<FILE*>(context);
-                fwrite(data, 1, size, f);
-            },
-            fd, width, height, channels, flippedData.data())) {
-        return false;
-    }
-
-    return true;
-}
-template<typename View, typename T, unsigned Dim>
-KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector<T, Dim>& pos, const ippl::Vector<T, 3>& origin, const ippl::Vector<T, 3>& hr, const ippl::NDIndex<3>& lDom){
-    using vector_type = ippl::Vector<T, 3>;
-
-    vector_type l;
-    //vector_type origin = v.get_mesh().getOrigin();
-    //auto lDom = v.getLayout().getLocalNDIndex();
-    //vector_type hr = v.get_mesh().getMeshSpacing();
-    for(unsigned k = 0;k < 3;k++){
-        l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong
-    }                     
-
-    ippl::Vector<int, 3> index{int(l[0]), int(l[1]), int(l[2])};
-    ippl::Vector<T, 3> whi = l - index;
-    ippl::Vector<T, 3> wlo(1.0);
-    wlo -= whi;
-    //TODO: nghost
-    ippl::Vector<size_t, 3> args = index - lDom.first() + 1;
-    for(unsigned k = 0;k < 3;k++){
-        if(args[k] >= v.extent(k) || args[k] == 0){
-            return typename View::value_type(0);
-        }
-    }
-    //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n";
-    return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/;
-
-}
-template<typename scalar>
-scalar test_gauss_law(uint32_t n){
-
-    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
-    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
-    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths};
-    std::array<bool, 3> isParallel;
-    isParallel.fill(false);
-    isParallel[2] = true;
-        
-        // all parallel layout, standard domain, normal axis order
-    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
-
-        //[-1, 1] box
-    ippl::Vector<scalar, 3> hx     = extents / nr.cast<scalar>();
-    using vector_type = ippl::Vector<scalar, 3>;
-    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
-    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
-
-    uint32_t pcount = 1 << 20;
-    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, config{.space_charge = true});
-    field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount;
-    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
-    auto pview = field_state.particles.R.getView();
-    auto p1view = field_state.particles.R_nm1.getView();
-
-    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
-    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
-    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
-    
-    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
-        //bunch.gammaBeta[i].fill(scalar(0));
-        auto state = random_pool.get_state();
-        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        p1view(i) = pview(i);
-        random_pool.free_state(state);
-    });
-    Kokkos::fence();
-    field_state.J = scalar(0.0);
-    field_state.scatterBunch();
-    for(size_t i = 0;i < 8*n;i++){
-        field_state.fieldStep();
-    }
-    field_state.evaluate_EB();
-    Kokkos::fence();
-    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
-    
-    std::ofstream line("gauss_line.txt");
-    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
-    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
-    for(unsigned i = 1;i < nr[2];i++){
-        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
-        pos[2] += hx[2] * scalar(i);
-        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
-        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
-        line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
-    }
-    return 0.0f;
-}
-template<typename scalar>
-scalar test_amperes_law(uint32_t n){
-
-    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
-    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
-    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths};
-    std::array<bool, 3> isParallel;
-    isParallel.fill(false);
-    isParallel[2] = true;
-        
-        // all parallel layout, standard domain, normal axis order
-    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
-
-        //[-1, 1] box
-    ippl::Vector<scalar, 3> hx;
-    for(unsigned d = 0;d < 3;d++){
-        hx[d] = extents[d] / (scalar)nr[d];
-    }
-    using vector_type = ippl::Vector<scalar, 3>;
-    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
-    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
-
-    uint32_t pcount = 1 << 20;
-    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, config{.space_charge = true});
-    field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount;
-    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
-    auto pview = field_state.particles.R.getView();
-    auto p1view = field_state.particles.R_nm1.getView();
-    constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
-    scalar timestep = field_state.dt;
-    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
-    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
-    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
-    
-    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
-        //bunch.gammaBeta[i].fill(scalar(0));
-        auto state = random_pool.get_state();
-        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1);
-        p1view(i) = pview(i);
-        p1view(i)[1] -= vy * timestep;
-        random_pool.free_state(state);
-    });
-    Kokkos::fence();
-    field_state.J = scalar(0.0);
-    field_state.scatterBunch();
-    for(size_t i = 0;i < 8*n;i++){
-        field_state.fieldStep();
-    }
-    field_state.evaluate_EB();
-    Kokkos::fence();
-    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
-    
-    std::ofstream line("ampere_line.txt");
-    
-    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
-    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
-    for(unsigned i = 1;i < nr[2];i++){
-        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
-        pos[2] += hx[2] * scalar(i);
-        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
-        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
-        line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n";
-    }
-    return 0.0f;
-}
-int main(int argc, char* argv[]) {
-    using scalar = double;
+int main(int argc, char* argv[]){
     ippl::initialize(argc, argv);
     {
-        
-        test_gauss_law<scalar>(64);
-        test_amperes_law<scalar>(64);
-        goto exit;
-        config cfg = read_config("../config.json");
-        const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
-        cfg.extents[2] *= frame_gamma;
-        cfg.total_time /= frame_gamma;
-        
-        const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma));
-        const scalar frame_gammabeta = frame_gamma * frame_beta;
-        UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
-        ippl::undulator_parameters<scalar> uparams(cfg);
-        const scalar k_u  = scalar(2.0 * M_PI) / uparams.lambda;
-        const scalar distance_to_entry  = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma);
-        auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector<scalar, 3>& position_in_lab_frame){
-            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
-            ret.first.fill(0);
-            ret.second.fill(0);
-
-            if(position_in_lab_frame[2] < distance_to_entry){
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator < 0);
-                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5));
-                ret.second[0] = 0;
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal;
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal;
-            }
-            else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator >= 0);
-                ret.second[0] = 0;
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator);
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator);
-            }
-            return ret;
-
-        };
-        BunchInitialize<scalar> mithra_config = generate_mithra_config(cfg, frame_boost);
-        ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
-
+        using scalar = float;
+        const unsigned dim = 3;
+        using vector_type = ippl::Vector<scalar, 3>;
+        using vector4_type = ippl::Vector<scalar, 4>;
+        using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        constexpr size_t n = 160;
+        ippl::Vector<uint32_t, 3> nr{n/2, n/2, 2*n};
+        ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
+        ippl::Vector<scalar, 3> extents{scalar(1), scalar(1), scalar(1)};
         std::array<bool, 3> isParallel;
         isParallel.fill(false);
         isParallel[2] = true;
-        
+
         // all parallel layout, standard domain, normal axis order
         ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
 
-        //[-1, 1] box
-        ippl::Vector<scalar, 3> hx     = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])};
-        ippl::Vector<scalar, 3> origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)};
-        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
-        std::cout << "hx: " << hx << "\n";
-        std::cout << "origin: " << origin << "\n";
-        std::cout << "extents: " << cfg.extents << std::endl;
-        if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){
-            std::cerr << "Dispersion relation not satisfiable\n";
-            abort();
-        }
-
-        ippl::FDTDFieldState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
-        
-        if(ippl::Comm->rank() == 0){
-            std::cout << "Init particles: " << std::endl;
-            size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma);
-            fdtd_state.particles.Q = cfg.charge / actual_pc;
-            fdtd_state.particles.mass = cfg.mass / actual_pc;
-        }
-        else{
-            fdtd_state.particles.create(0);
-        }
-        {
-            auto rview = fdtd_state.particles.R.getView();
-            auto rm1view = fdtd_state.particles.R_nm1.getView();
-            ippl::Vector<scalar, 3> meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum());
-    
-            Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                rview(i) -= meanpos;
-                rm1view(i) -= meanpos;
-            });
-        }
-        fdtd_state.particles.setParticleBC(ippl::NO);
-        //fdtd_state.scatterBunch();
-        //std::cout << cfg.charge << "\n";
-        
-        size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
-        uint64_t starttime =  nanoTime();
-        std::ofstream rad;
-        FILE* ffmpeg_file = nullptr;
-        if(ippl::Comm->rank() == 0){
-            rad = std::ofstream("radiation.txt");
-            const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv";
-            if(cfg.output_rhythm != 0){
-                ffmpeg_file = popen(ffmpegCmd, "w");
-            }
+        ippl::Vector<scalar, 3> hx;
+        for(unsigned d = 0;d < 3;d++){
+            hx[d] = extents[d] / (scalar)nr[d];
         }
-
-
-        for(size_t i = 0;i < timesteps_required;i++){
-            fdtd_state.J = scalar(0.0);
-            fdtd_state.playout.update(fdtd_state.particles);
-            fdtd_state.scatterBunch();
-            //std::cout << fdtd_state.J.getVolumeIntegral() << "\n";
-            fdtd_state.fieldStep();
-            fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
-            auto ldom = layout.getLocalNDIndex();
-            auto nrg = fdtd_state.nr_global;
-            auto ebv = fdtd_state.EB.getView();
-            double radiation = 0.0;
-            Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
-                //uint32_t ig = i + ldom.first()[0];
-                //uint32_t jg = j + ldom.first()[1];
-                Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]};
-                ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
-                ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
-                uint32_t kg = k + ldom.first()[2];
-                if(kg == nrg[2] - 3){
-                    ref += Elab.cross(Blab)[2];
-                }
-
-            }, radiation);
-            double radiation_in_watt_on_this_rank = radiation *
-            double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) *
-            fdtd_state.hr_m[0] * fdtd_state.hr_m[1];
-            double radiation_in_watt_global = 0.0;
-            MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
-            if(ippl::Comm->rank() == 0){
-                ippl::Vector<scalar, 3> pos{0,0,0};
-                frame_boost.primedToUnprimed(pos, fdtd_state.dt * i);
-                rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
-            }
-            //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n";
-            //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n";
-            int rank = ippl::Comm->rank();
-            int size = ippl::Comm->size();
-            if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
-
-                
-                int img_height = 400;
-                int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]);
-                float* imagedata = new float[img_width * img_height * 3];
-                std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
-                float* idata_recvbuffer = new float[img_width * img_height * 3];
-                int floatcount = img_width * img_height * 3;
-                uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
-                std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float));
-                auto phmirror = fdtd_state.particles.R.getHostMirror();
-                Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView());
-                for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){
-                    ippl::Vector<scalar, 3> ppos = phmirror(hi);
-                    ppos -= mesh.getOrigin();
-                    ppos /= cfg.extents.cast<scalar>();
-                    int x_imgcoord = ppos[2] * img_width;
-                    int y_imgcoord = ppos[0] * img_height;
-                    //printf("%d, %d\n", x_imgcoord, y_imgcoord);
-                    if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){
-                        const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles);
-                        //std::cout << intensity << "\n";
-                        imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = 
-                        std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity);
-                    }
-                };
-                auto ebh = fdtd_state.EB.getHostMirror();
-                Kokkos::deep_copy(ebh, fdtd_state.EB.getView());
-
-                //double exp_avg = double(exp_sum) / double(acount);
-                {
-                    for(int i = 1;i < img_width;i++){
-                        for(int j = 1;j < img_height;j++){
-                            int i_remap = (double(i) / (img_width  - 1)) * (fdtd_state.nr_global[2] - 4) + 2;
-                            int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2;
-                            if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){
-                                if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){
-                                    ippl::Vector<ippl::Vector<scalar, 3>, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
-                                    
-                                    ippl::Vector<scalar, 3> poynting = acc[0].cross(acc[1]);
-                                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> eblab =  frame_boost.inverse_transform_EB(
-                                        Kokkos::make_pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>(acc[0], acc[1])
-                                    );
-                                    ippl::Vector<scalar, 3> poyntinglab = eblab.first.cross(eblab.second);
-                                    poynting = poyntinglab;
-                                    if(poynting.norm() > 0){
-                                        //std::cout << poynting.norm() << "\n";
-                                    }
-                                    float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f;
-                                    int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f));
-                                    imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
-                                    imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
-                                    imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
-                                }
-                            }
-                        }
-                    }
-                }
-                int mask = 1;
-                while (mask < size) {
-                    int partner = rank ^ mask;
-                    //if((rank & (mask - 1)) == 0)
-                    {
-                        if ((rank & mask) == 0) {
-                            // Send data to partner
-                            MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE);
-                            // Apply image summation
-                            for(int f = 0;f < floatcount;f++){
-                                imagedata[f] += idata_recvbuffer[f];
-                            }
-                        } else {
-                            MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator());
-                            // Receive data from partner and apply reduction
+        ippl::Vector<scalar, 3> origin = {0,0,0};
+        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+        FourField source(mesh, layout);
+        ThreeField E(mesh, layout);
+        ThreeField B(mesh, layout);
+        source = vector4_type(0);
+        ippl::NonStandardFDTDSolver<ThreeField, FourField> sfdsolver(source, E, B);
+        sfdsolver.setPeriodicBoundaryConditions();
+        auto aview = sfdsolver.A_n.getView();
+        auto am1view = sfdsolver.A_nm1.getView();
+        auto lDom = layout.getLocalNDIndex();
+        size_t nghost = sfdsolver.A_n.getNghost();
+        Kokkos::parallel_for(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+            const size_t ig = i + lDom.first()[0] - nghost;
+            const size_t jg = j + lDom.first()[1] - nghost;
+            const size_t kg = k + lDom.first()[2] - nghost;
+            scalar x = scalar(ig) / nr[0];
+            scalar y = scalar(jg) / nr[1];
+            scalar z = scalar(kg) / nr[2];
+            //std::cout << x << ", " << y << ", " << z << "\n";
+            (void)x;
+            (void)y;
+            (void)z;
+            const scalar magnitude = gauss(scalar(0.5), scalar(0.01), z);
+            aview  (i,j,k) = vector4_type{scalar(0), x, y, z};
+            am1view(i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)};
+        });
+        Kokkos::fence();
+        sfdsolver.A_n.getFieldBC().apply(sfdsolver.A_n);
+        Kokkos::fence();
+        std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n";
+        std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n";
+        goto exit;
+        sfdsolver.A_np1.getFieldBC().apply(sfdsolver.A_np1);
+        sfdsolver.A_nm1.getFieldBC().apply(sfdsolver.A_nm1);
+        sfdsolver.A_n.fillHalo();
+        sfdsolver.A_nm1.fillHalo();
+        int img_width  = 500;
+        int img_height = 500;
+        float* imagedata = new float[img_width * img_height * 3];
+        std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
+        uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
+        for(size_t s = 0;s < 2 * n;s++){
+            auto ebh = sfdsolver.A_n.getHostMirror();
+            Kokkos::deep_copy(ebh, sfdsolver.A_n.getView());
+            for(int i = 1;i < img_width;i++){
+                for(int j = 1;j < img_height;j++){
+                    int i_remap = (double(i) / (img_width  - 1)) * (nr[2] - 4) + 2;
+                    int j_remap = (double(j) / (img_height - 1)) * (nr[0] - 4) + 2;
+                    if(i_remap >= lDom.first()[2] && i_remap <= lDom.last()[2]){
+                        if(j_remap >= lDom.first()[0] && j_remap <= lDom.last()[0]){
+                            ippl::Vector<scalar, 4> acc = ebh(j_remap + 1 - lDom.first()[0], nr[1] / 2, i_remap + 1 - lDom.first()[2]);
+                            
                             
+                            float normalized_colorscale_value = acc.norm();
+                            //int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f));
+                            imagedata[(j * img_width + i) * 3 + 0] = normalized_colorscale_value * 255.0f;
+                            imagedata[(j * img_width + i) * 3 + 1] = normalized_colorscale_value * 255.0f;
+                            imagedata[(j * img_width + i) * 3 + 2] = normalized_colorscale_value * 255.0f;
                         }
                     }
-                    mask <<= 1;  // Move to next bit position for pairing
                 }
-                if(rank == 0){
-                    char output[1024] = {0};
-                    
-                    snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i);
-                    std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
-                    if(ffmpeg_file != nullptr)
-                        writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final);
-                }
-                delete[] imagedata;
-                delete[] idata_recvbuffer;
-                delete[] imagedata_final;
-            }    
-        }
-        uint64_t endtime = nanoTime();
-        std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
+            }
+            std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
+            char output[1024] = {0};        
+            snprintf(output, 1023, "%soutimage%.05lu.bmp", "data/", s);
+            if(s % 4 == 0)
+                stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
+            sfdsolver.solve();
+        }
+        double sum_error = 0;
+        Kokkos::parallel_reduce(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k, double& ref){
+            const size_t ig = i + lDom.first()[0] - nghost;
+            const size_t jg = j + lDom.first()[1] - nghost;
+            const size_t kg = k + lDom.first()[2] - nghost;
+            scalar x = scalar(ig) / nr[0];
+            scalar y = scalar(jg) / nr[1];
+            scalar z = scalar(kg) / nr[2];
+            (void)x;
+            (void)y;
+            (void)z;
+            ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.01), z) - aview(i,j,k)[2]);
+        }, sum_error);
+        std::cout << "Sum: " << sum_error / double(n * n * n) << "\n";
     }
     exit:
     ippl::finalize();

From 68b3111b3ce4320a631232236c90b9a9cf1da379 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Thu, 2 May 2024 20:28:30 +0200
Subject: [PATCH 17/32] Second order ABCs working

---
 src/MaxwellSolvers/AbsorbingBC.h | 427 +++++++++++++++++++++++++++++++
 src/MaxwellSolvers/FDTD.h        |  55 ++--
 test/solver/TestFDTDSolver.cpp   |  24 +-
 3 files changed, 478 insertions(+), 28 deletions(-)
 create mode 100644 src/MaxwellSolvers/AbsorbingBC.h

diff --git a/src/MaxwellSolvers/AbsorbingBC.h b/src/MaxwellSolvers/AbsorbingBC.h
new file mode 100644
index 000000000..4a87df71c
--- /dev/null
+++ b/src/MaxwellSolvers/AbsorbingBC.h
@@ -0,0 +1,427 @@
+#ifndef ABC_H
+#define ABC_H
+#include <Kokkos_Core.hpp>
+#include "Types/Vector.h"
+#include "Field/Field.h"
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto first(){
+    return a;
+}
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto second(){
+    return b;
+}
+template<typename _scalar, unsigned _main_axis, unsigned... _side_axes>
+struct second_order_abc_face{
+    using scalar = _scalar;
+    scalar Cweights[5];
+    int sign;
+    constexpr static unsigned main_axis = _main_axis;
+    KOKKOS_FUNCTION second_order_abc_face(ippl::Vector<scalar, 3> hr, scalar dt, int _sign) : sign(_sign){
+        constexpr scalar c = 1;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        static_assert(
+            (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) ||
+            (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) ||
+            (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1)
+        );
+        assert(_main_axis != side_axes[0]);
+        assert(_main_axis != side_axes[1]);
+        assert(side_axes[0] != side_axes[1]);
+        constexpr scalar truncation_order = 2.0;
+        scalar p      = ( 1.0 + 1 * 1 ) / ( 1 + 1 );
+        scalar q      = - 1.0 / ( 1 + 1 );
+
+        scalar d  	 = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt);
+
+        Cweights[0]	= (   1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        Cweights[1]	= ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        assert(abs(Cweights[1] + 1) < 1e-6); //Like literally
+        Cweights[2]  	= (   p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d;
+        Cweights[3]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d;
+        Cweights[4]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d;
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        ippl::Vector<uint32_t, 3> side_axis1_onehot = ippl::Vector<uint32_t, 3>{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2};
+        ippl::Vector<uint32_t, 3> side_axis2_onehot = ippl::Vector<uint32_t, 3>{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2};
+        ippl::Vector<uint32_t, 3> mainaxis_off = ippl::Vector<int32_t, 3>{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast<uint32_t>();
+        return advanceBoundaryS(
+		    A_nm1(i,j,k), A_n(i,j,k),
+		    apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off),
+		    apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off),
+		    apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot),                apply(A_n, c - side_axis1_onehot),
+		    apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 ,
+						 const value_type& v3 , const value_type& v4 , const value_type& v5 ,
+						 const value_type& v6 , const value_type& v7 , const value_type& v8 ,
+						 const value_type& v9 , const value_type& v10, const value_type& v11,
+						 const value_type& v12, const value_type& v13)const noexcept
+    {
+        
+      value_type v0 =
+    	 Cweights[0]  * (v1 + v5) +
+    	(Cweights[1]) * v3 +
+    	(Cweights[2]) * ( v2 + v4 ) +
+    	(Cweights[3]) * ( v6 + v7 + v10 + v11 ) +
+    	(Cweights[4]) * ( v8 + v9 + v12 + v13 );
+      return v0;
+    }
+};
+template<typename _scalar, unsigned edge_axis, unsigned normal_axis1, unsigned normal_axis2, bool na1_zero, bool na2_zero>
+struct second_order_abc_edge{
+    using scalar = _scalar;
+    //
+    scalar Eweights[5];
+    
+    KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector<scalar, 3> hr, scalar dt){
+        static_assert(normal_axis1 != normal_axis2);
+        static_assert(edge_axis != normal_axis2);
+        static_assert(edge_axis != normal_axis1);
+        static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0));
+        constexpr scalar c0_ = scalar(1);
+        scalar d    =    ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt );
+        if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else{
+            assert(false);
+        }
+        
+
+
+        
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        //constexpr unsigned nax[2] = {normal_axis1, normal_axis2};
+        ippl::Vector<int32_t, 3> normal_axis1_onehot = ippl::Vector<int32_t, 3>{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1);
+        ippl::Vector<int32_t, 3> normal_axis2_onehot = ippl::Vector<int32_t, 3>{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1);
+        ippl::Vector<uint32_t, 3> acc0 = {i, j, k};
+        ippl::Vector<uint32_t, 3> acc1 = acc0 + normal_axis1_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc2 = acc0 + normal_axis2_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc3 = acc0 + normal_axis1_onehot.cast<uint32_t>() + normal_axis2_onehot.cast<uint32_t>();
+        //ippl::Vector<uint32_t, 3> axism = (-ippl::Vector<int, 3>{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2};
+        //return A_n(i, j, k);
+        return advanceEdgeS(
+                                        A_n(i, j, k),      A_nm1(i, j, k),
+            apply(A_np1, acc1),   apply(A_n, acc1   ), apply(A_nm1, acc1),
+            apply(A_np1, acc2),   apply(A_n, acc2   ), apply(A_nm1, acc2),
+            apply(A_np1, acc3),   apply(A_n, acc3   ), apply(A_nm1, acc3),
+            apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp),
+            apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceEdgeS 		
+            (              value_type v1 , value_type v2 ,
+                           value_type v3 , value_type v4 , value_type v5 ,
+                           value_type v6 , value_type v7 , value_type v8 ,
+                           value_type v9 , value_type v10, value_type v11,
+                           value_type v12, value_type v13, value_type v14,
+                           value_type v15, value_type v16, value_type v17,
+                           value_type v18, value_type v19)const noexcept{
+    value_type v0 =
+    Eweights[0] * (v3 + v8) +
+    Eweights[1] * (v5 + v6) +
+    Eweights[2] * (v2 + v9) +
+    Eweights[3] * (v1 + v4 + v7 + v10) +
+    Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11;
+    return v0;
+  }
+};
+template<typename _scalar, bool x0, bool y0, bool z0>
+struct second_order_abc_corner{
+    using scalar = _scalar;
+    scalar Cweights[17];
+    KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector<scalar, 3> hr, scalar dt){
+        constexpr scalar c0_ = scalar(1);
+        Cweights[0]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[1]  =   (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[2]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[3]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[4]  =   (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[5]  =   (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[6]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[7]  =   (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[8]  = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[9]  = - (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[12] = - (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[13] = - (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[15] = - (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt);
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        //First implementation: 0,0,0 corner
+        constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1);
+        constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1);
+        constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1);
+        using ippl::apply;
+        const ippl::Vector<uint32_t, 3> offsets[8] = {
+            ippl::Vector<uint32_t, 3>{0,0,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,0},
+            ippl::Vector<uint32_t, 3>{0,yoff,0},
+            ippl::Vector<uint32_t, 3>{0,0,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,zoff},
+            ippl::Vector<uint32_t, 3>{0,yoff,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,zoff},
+        };
+        return advanceCornerS(
+                                          apply(A_n, c), apply(A_nm1, c),
+            apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]),
+            apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]),
+            apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]),
+            apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]),
+            apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]),
+            apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]),
+            apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7])
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceCornerS         
+                            (       value_type v1 , value_type v2 ,
+                             value_type v3 , value_type v4 , value_type v5 ,
+                             value_type v6 , value_type v7 , value_type v8 ,
+                             value_type v9 , value_type v10, value_type v11,
+                             value_type v12, value_type v13, value_type v14,
+                             value_type v15, value_type v16, value_type v17,
+                             value_type v18, value_type v19, value_type v20,
+                             value_type v21, value_type v22, value_type v23)const noexcept{
+    return      - ( v1  * (Cweights[16]) + v2  * (Cweights[8]) +
+    v3  * Cweights[1] + v4  * Cweights[16] + v5  * Cweights[9] +
+    v6  * Cweights[2] + v7  * Cweights[16] + v8  * Cweights[10] +
+    v9  * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] +
+    v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] +
+    v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] +
+    v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] +
+    v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0];
+  }
+};
+
+
+
+
+
+
+struct second_order_mur_boundary_conditions{
+    template<typename field_type, typename dt_type>
+    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr, ippl::NDIndex<3> lDom){
+        using scalar = decltype(dt);
+        //TODO: tbh don't know
+        //const unsigned nghost = 1;
+        const ippl::Vector<scalar, 3> hr = FA_n.get_mesh().getMeshSpacing();
+        //assert_isreal((betaMur[0]));
+        //assert_isreal((betaMur[1]));
+        //assert_isreal((betaMur[2]));
+        auto A_n   = FA_n.getView();
+        auto A_np1 = FA_np1.getView();
+        auto A_nm1 = FA_nm1.getView();
+        ippl::Vector<uint32_t, 3> local_nr{
+            uint32_t(A_n.extent(0)),
+            uint32_t(A_n.extent(1)),
+            uint32_t(A_n.extent(2))
+        };
+        constexpr uint32_t min_abc_boundary = 1;
+        constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1;
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+            
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+
+            if(Kokkos::popcount(val) == 1){
+                if(ig == min_abc_boundary){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(jg == min_abc_boundary){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(kg == min_abc_boundary){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(ig == true_nr[0] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+            }
+        });
+        Kokkos::fence();
+        //FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 2)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+            if(Kokkos::popcount(val) == 2){ //Edge
+                if(ig == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+        //FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+            //             + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            //if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+            
+            if(Kokkos::popcount(val) == 3){
+                //printf("Corner: %d, %d, %d\n", i, j, k);
+                if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+    }
+};
+#endif
\ No newline at end of file
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 827ed7fef..772ff538c 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -4,11 +4,12 @@
 using std::size_t;
 #include "Types/Vector.h"
 
-#include "Field/Field.h"
+
 
 #include "FieldLayout/FieldLayout.h"
 #include "Meshes/UniformCartesian.h"
 #include "MaxwellSolvers/Maxwell.h"
+#include "MaxwellSolvers/AbsorbingBC.h"
 constexpr double sqrt_4pi = 3.54490770181103205459;
 constexpr double alpha_scaling_factor = 1e30;
 constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor;
@@ -102,6 +103,19 @@ namespace ippl {
             Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
             Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
         }
+        void applyBCs(){
+            if constexpr(boundary_conditions == periodic){
+                A_n.getFieldBC().apply(A_n);
+                A_nm1.getFieldBC().apply(A_nm1);
+                A_np1.getFieldBC().apply(A_np1);
+            }
+            else{
+                Vector<uint32_t, Dim> true_nr = nr_m;
+                true_nr += (A_n.getNghost() * 2);
+                second_order_mur_boundary_conditions bcs{};
+                bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
+            }
+        }
 
         void step(){
             const auto& ldom    = layout_mp->getLocalNDIndex();
@@ -118,18 +132,16 @@ namespace ippl {
             const scalar a8 = sq(dt);
             Vector<uint32_t, Dim> true_nr = nr_m;
             true_nr += (nghost * 2);
-            A_n.getFieldBC().apply(A_n);
-            A_np1.getFieldBC().apply(A_np1);
-            A_nm1.getFieldBC().apply(A_nm1);
+            constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0;
             Kokkos::parallel_for(
-            "Scalar potential update", ippl::getRangePolicy(aview, nghost),
+            "Four potential update", ippl::getRangePolicy(aview, nghost),
             KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
                 // global indices
                 const uint32_t ig = i + ldom.first()[0];
                 const uint32_t jg = j + ldom.first()[1];
                 const uint32_t kg = k + ldom.first()[2];
-                uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
-                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+                uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
+                             + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
                 
                 if(val == 0){
                     FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k)
@@ -144,8 +156,8 @@ namespace ippl {
                 }
             });
             Kokkos::fence();
+            applyBCs();
             A_np1.fillHalo();
-            A_np1.getFieldBC().apply(A_np1);
         }
         void evaluate_EB(){
             ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
@@ -266,6 +278,20 @@ namespace ippl {
             Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
             Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
         }
+
+        void applyBCs(){
+            if constexpr(boundary_conditions == periodic){
+                A_n.getFieldBC().apply(A_n);
+                A_nm1.getFieldBC().apply(A_nm1);
+                A_np1.getFieldBC().apply(A_np1);
+            }
+            else{
+                Vector<uint32_t, Dim> true_nr = nr_m;
+                true_nr += (A_n.getNghost() * 2);
+                second_order_mur_boundary_conditions bcs{};
+                bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
+            }
+        }
         template<typename scalar>
         struct nondispersive{
             scalar a1;
@@ -292,17 +318,14 @@ namespace ippl {
             };
             Vector<uint32_t, Dim> true_nr = nr_m;
             true_nr += (nghost * 2);
-            std::cout << hr_m << "\n";
-            std::cout << dt << "\n";
-            A_n.getFieldBC().apply(A_n);
-            A_np1.getFieldBC().apply(A_np1);
-            A_nm1.getFieldBC().apply(A_nm1);
+            constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0;
             Kokkos::parallel_for(ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
                 uint32_t ig = i + ldom.first()[0];
                 uint32_t jg = j + ldom.first()[1];
                 uint32_t kg = k + ldom.first()[2];
-                uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2)
-                             + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5);
+                uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
+                             + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
+                
                 if(!val){
                     anp1view(i, j, k) = -anm1view(i,j,k)
                             + ndisp.a1 * aview(i,j,k)
@@ -315,7 +338,7 @@ namespace ippl {
             });
             Kokkos::fence();
             A_np1.fillHalo();
-            A_np1.getFieldBC().apply(A_np1);
+            applyBCs();
         }
         void evaluate_EB(){
             ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index 6cf9d4d15..b45c96298 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -32,7 +32,7 @@ int main(int argc, char* argv[]){
         using vector4_type = ippl::Vector<scalar, 4>;
         using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
         using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        constexpr size_t n = 160;
+        constexpr size_t n = 100;
         ippl::Vector<uint32_t, 3> nr{n/2, n/2, 2*n};
         ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
         ippl::Vector<scalar, 3> extents{scalar(1), scalar(1), scalar(1)};
@@ -53,7 +53,7 @@ int main(int argc, char* argv[]){
         ThreeField E(mesh, layout);
         ThreeField B(mesh, layout);
         source = vector4_type(0);
-        ippl::NonStandardFDTDSolver<ThreeField, FourField> sfdsolver(source, E, B);
+        ippl::StandardFDTDSolver<ThreeField, FourField, ippl::absorbing> sfdsolver(source, E, B);
         sfdsolver.setPeriodicBoundaryConditions();
         auto aview = sfdsolver.A_n.getView();
         auto am1view = sfdsolver.A_nm1.getView();
@@ -70,16 +70,16 @@ int main(int argc, char* argv[]){
             (void)x;
             (void)y;
             (void)z;
-            const scalar magnitude = gauss(scalar(0.5), scalar(0.01), z);
-            aview  (i,j,k) = vector4_type{scalar(0), x, y, z};
+            const scalar magnitude = gauss(scalar(0.5), scalar(0.05), z);
+            aview  (i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)};
             am1view(i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)};
         });
         Kokkos::fence();
         sfdsolver.A_n.getFieldBC().apply(sfdsolver.A_n);
-        Kokkos::fence();
-        std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n";
-        std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n";
-        goto exit;
+        //Kokkos::fence();
+        //std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n";
+        //std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n";
+        //goto exit;
         sfdsolver.A_np1.getFieldBC().apply(sfdsolver.A_np1);
         sfdsolver.A_nm1.getFieldBC().apply(sfdsolver.A_nm1);
         sfdsolver.A_n.fillHalo();
@@ -89,7 +89,7 @@ int main(int argc, char* argv[]){
         float* imagedata = new float[img_width * img_height * 3];
         std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
         uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
-        for(size_t s = 0;s < 2 * n;s++){
+        for(size_t s = 0;s < 4 * n;s++){
             auto ebh = sfdsolver.A_n.getHostMirror();
             Kokkos::deep_copy(ebh, sfdsolver.A_n.getView());
             for(int i = 1;i < img_width;i++){
@@ -112,7 +112,7 @@ int main(int argc, char* argv[]){
             }
             std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
             char output[1024] = {0};        
-            snprintf(output, 1023, "%soutimage%.05lu.bmp", "data/", s);
+            snprintf(output, 1023, "%soutimage%.05lu.bmp", "renderdata/", s);
             if(s % 4 == 0)
                 stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
             sfdsolver.solve();
@@ -128,10 +128,10 @@ int main(int argc, char* argv[]){
             (void)x;
             (void)y;
             (void)z;
-            ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.01), z) - aview(i,j,k)[2]);
+            ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.05), z) - aview(i,j,k)[2]);
         }, sum_error);
         std::cout << "Sum: " << sum_error / double(n * n * n) << "\n";
     }
-    exit:
+    //exit:
     ippl::finalize();
 }
\ No newline at end of file

From 10495c7db0196996452d02ad0ba3ba5fe3997710 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Fri, 3 May 2024 10:17:22 +0200
Subject: [PATCH 18/32] Add Solver with particles

---
 alpine/FreeElectronLaser.cpp                | 446 +-------------------
 src/Expression/IpplOperations.h             |   3 +
 src/MaxwellSolvers/FDTD.h                   | 336 ++++++++++++++-
 test/solver/CMakeLists.txt                  |   6 +
 test/solver/TestFDTDSolver.cpp              |   4 +-
 test/solver/TestFDTDSolverWithParticles.cpp |  59 +++
 6 files changed, 408 insertions(+), 446 deletions(-)
 create mode 100644 test/solver/TestFDTDSolverWithParticles.cpp

diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
index d15dd6e76..5c014e96a 100644
--- a/alpine/FreeElectronLaser.cpp
+++ b/alpine/FreeElectronLaser.cpp
@@ -311,7 +311,6 @@ KOKKOS_INLINE_FUNCTION bool isINF(double x){
     return std::isinf(x);
     #endif
 }
-#define assert_isreal(X) assert(!isNaN(X) && !isINF(X))
 template<unsigned int Dim, typename callable, typename... Ts>
 KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> from, ippl::Vector<uint32_t, Dim> to, Ts... args){
     if constexpr(sizeof...(Ts) == Dim){
@@ -1074,17 +1073,6 @@ KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
 }
 
 
-
-
-template<unsigned a, unsigned b>
-constexpr KOKKOS_INLINE_FUNCTION auto first(){
-    return a;
-}
-template<unsigned a, unsigned b>
-constexpr KOKKOS_INLINE_FUNCTION auto second(){
-    return b;
-}
-
 template <typename T>
 KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a,
                                                      const ippl::Vector<T, 3>& b) {
@@ -1250,430 +1238,6 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view
 // END PREAMBLE
 
 
-//BEGIN ABSORBING BOUNDARY CONDITION
-template<typename _scalar, unsigned _main_axis, unsigned... _side_axes>
-struct second_order_abc_face{
-    using scalar = _scalar;
-    scalar Cweights[5];
-    int sign;
-    constexpr static unsigned main_axis = _main_axis;
-    KOKKOS_FUNCTION second_order_abc_face(ippl::Vector<scalar, 3> hr, scalar dt, int _sign) : sign(_sign){
-        constexpr scalar c = 1;
-        constexpr unsigned side_axes[2] = {_side_axes...};
-        static_assert(
-            (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) ||
-            (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) ||
-            (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1)
-        );
-        assert(_main_axis != side_axes[0]);
-        assert(_main_axis != side_axes[1]);
-        assert(side_axes[0] != side_axes[1]);
-        constexpr scalar truncation_order = 2.0;
-        scalar p      = ( 1.0 + 1 * 1 ) / ( 1 + 1 );
-        scalar q      = - 1.0 / ( 1 + 1 );
-
-        scalar d  	 = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt);
-
-        Cweights[0]	= (   1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
-        Cweights[1]	= ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
-        assert(abs(Cweights[1] + 1) < 1e-6); //Like literally
-        Cweights[2]  	= (   p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d;
-        Cweights[3]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d;
-        Cweights[4]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d;
-    }
-    template<typename view_type, typename Coords>
-    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
-        uint32_t i = c[0];
-        uint32_t j = c[1];
-        uint32_t k = c[2];
-        using ippl::apply;
-        constexpr unsigned side_axes[2] = {_side_axes...};
-        ippl::Vector<uint32_t, 3> side_axis1_onehot = ippl::Vector<uint32_t, 3>{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2};
-        ippl::Vector<uint32_t, 3> side_axis2_onehot = ippl::Vector<uint32_t, 3>{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2};
-        ippl::Vector<uint32_t, 3> mainaxis_off = ippl::Vector<int32_t, 3>{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast<uint32_t>();
-        return advanceBoundaryS(
-		    A_nm1(i,j,k), A_n(i,j,k),
-		    apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off),
-		    apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off),
-		    apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot),                apply(A_n, c - side_axis1_onehot),
-		    apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot)
-        );
-    }
-    template<typename value_type>
-    KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 ,
-						 const value_type& v3 , const value_type& v4 , const value_type& v5 ,
-						 const value_type& v6 , const value_type& v7 , const value_type& v8 ,
-						 const value_type& v9 , const value_type& v10, const value_type& v11,
-						 const value_type& v12, const value_type& v13)const noexcept
-    {
-        
-      value_type v0 =
-    	 Cweights[0]  * (v1 + v5) +
-    	(Cweights[1]) * v3 +
-    	(Cweights[2]) * ( v2 + v4 ) +
-    	(Cweights[3]) * ( v6 + v7 + v10 + v11 ) +
-    	(Cweights[4]) * ( v8 + v9 + v12 + v13 );
-      return v0;
-    }
-};
-template<typename _scalar, unsigned edge_axis, unsigned normal_axis1, unsigned normal_axis2, bool na1_zero, bool na2_zero>
-struct second_order_abc_edge{
-    using scalar = _scalar;
-    //
-    scalar Eweights[5];
-    
-    KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector<scalar, 3> hr, scalar dt){
-        static_assert(normal_axis1 != normal_axis2);
-        static_assert(edge_axis != normal_axis2);
-        static_assert(edge_axis != normal_axis1);
-        static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0));
-        constexpr scalar c0_ = scalar(1);
-        scalar d    =    ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt );
-        if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z)
-            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
-            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
-        }
-        else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y)
-            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
-            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
-        }
-        else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x)
-            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
-            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
-            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
-        }
-        else{
-            assert(false);
-        }
-        
-
-
-        
-    }
-    template<typename view_type, typename Coords>
-    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
-        uint32_t i = c[0];
-        uint32_t j = c[1];
-        uint32_t k = c[2];
-        using ippl::apply;
-        //constexpr unsigned nax[2] = {normal_axis1, normal_axis2};
-        ippl::Vector<int32_t, 3> normal_axis1_onehot = ippl::Vector<int32_t, 3>{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1);
-        ippl::Vector<int32_t, 3> normal_axis2_onehot = ippl::Vector<int32_t, 3>{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1);
-        ippl::Vector<uint32_t, 3> acc0 = {i, j, k};
-        ippl::Vector<uint32_t, 3> acc1 = acc0 + normal_axis1_onehot.cast<uint32_t>();
-        ippl::Vector<uint32_t, 3> acc2 = acc0 + normal_axis2_onehot.cast<uint32_t>();
-        ippl::Vector<uint32_t, 3> acc3 = acc0 + normal_axis1_onehot.cast<uint32_t>() + normal_axis2_onehot.cast<uint32_t>();
-        //ippl::Vector<uint32_t, 3> axism = (-ippl::Vector<int, 3>{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast<uint32_t>();
-        ippl::Vector<uint32_t, 3> axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2};
-        //return A_n(i, j, k);
-        return advanceEdgeS(
-                                        A_n(i, j, k),      A_nm1(i, j, k),
-            apply(A_np1, acc1),   apply(A_n, acc1   ), apply(A_nm1, acc1),
-            apply(A_np1, acc2),   apply(A_n, acc2   ), apply(A_nm1, acc2),
-            apply(A_np1, acc3),   apply(A_n, acc3   ), apply(A_nm1, acc3),
-            apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp),
-            apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp)
-        );
-    }
-    template<typename value_type>
-    KOKKOS_INLINE_FUNCTION value_type advanceEdgeS 		
-            (              value_type v1 , value_type v2 ,
-                           value_type v3 , value_type v4 , value_type v5 ,
-                           value_type v6 , value_type v7 , value_type v8 ,
-                           value_type v9 , value_type v10, value_type v11,
-                           value_type v12, value_type v13, value_type v14,
-                           value_type v15, value_type v16, value_type v17,
-                           value_type v18, value_type v19)const noexcept{
-    value_type v0 =
-    Eweights[0] * (v3 + v8) +
-    Eweights[1] * (v5 + v6) +
-    Eweights[2] * (v2 + v9) +
-    Eweights[3] * (v1 + v4 + v7 + v10) +
-    Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11;
-    return v0;
-  }
-};
-template<typename _scalar, bool x0, bool y0, bool z0>
-struct second_order_abc_corner{
-    using scalar = _scalar;
-    scalar Cweights[17];
-    KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector<scalar, 3> hr, scalar dt){
-        constexpr scalar c0_ = scalar(1);
-        Cweights[0]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[1]  =   (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[2]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[3]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[4]  =   (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[5]  =   (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[6]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[7]  =   (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[8]  = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[9]  = - (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[12] = - (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[13] = - (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[15] = - (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
-        Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt);
-    }
-    template<typename view_type, typename Coords>
-    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
-        //First implementation: 0,0,0 corner
-        constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1);
-        constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1);
-        constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1);
-        using ippl::apply;
-        const ippl::Vector<uint32_t, 3> offsets[8] = {
-            ippl::Vector<uint32_t, 3>{0,0,0},
-            ippl::Vector<uint32_t, 3>{xoff,0,0},
-            ippl::Vector<uint32_t, 3>{0,yoff,0},
-            ippl::Vector<uint32_t, 3>{0,0,zoff},
-            ippl::Vector<uint32_t, 3>{xoff,yoff,0},
-            ippl::Vector<uint32_t, 3>{xoff,0,zoff},
-            ippl::Vector<uint32_t, 3>{0,yoff,zoff},
-            ippl::Vector<uint32_t, 3>{xoff,yoff,zoff},
-        };
-        return advanceCornerS(
-                                          apply(A_n, c), apply(A_nm1, c),
-            apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]),
-            apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]),
-            apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]),
-            apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]),
-            apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]),
-            apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]),
-            apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7])
-        );
-    }
-    template<typename value_type>
-    KOKKOS_INLINE_FUNCTION value_type advanceCornerS         
-                            (       value_type v1 , value_type v2 ,
-                             value_type v3 , value_type v4 , value_type v5 ,
-                             value_type v6 , value_type v7 , value_type v8 ,
-                             value_type v9 , value_type v10, value_type v11,
-                             value_type v12, value_type v13, value_type v14,
-                             value_type v15, value_type v16, value_type v17,
-                             value_type v18, value_type v19, value_type v20,
-                             value_type v21, value_type v22, value_type v23)const noexcept{
-    return      - ( v1  * (Cweights[16]) + v2  * (Cweights[8]) +
-    v3  * Cweights[1] + v4  * Cweights[16] + v5  * Cweights[9] +
-    v6  * Cweights[2] + v7  * Cweights[16] + v8  * Cweights[10] +
-    v9  * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] +
-    v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] +
-    v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] +
-    v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] +
-    v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0];
-  }
-};
-
-
-
-
-
-
-struct second_order_mur_boundary_conditions{
-    template<typename field_type, typename dt_type>
-    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr, ippl::NDIndex<3> lDom){
-        using scalar = decltype(dt);
-        //TODO: tbh don't know
-        //const unsigned nghost = 1;
-        const ippl::Vector<scalar, 3> hr = FA_n.get_mesh().getMeshSpacing();
-        //assert_isreal((betaMur[0]));
-        //assert_isreal((betaMur[1]));
-        //assert_isreal((betaMur[2]));
-        auto A_n   = FA_n.getView();
-        auto A_np1 = FA_np1.getView();
-        auto A_nm1 = FA_nm1.getView();
-        ippl::Vector<uint32_t, 3> local_nr{
-            uint32_t(A_n.extent(0)),
-            uint32_t(A_n.extent(1)),
-            uint32_t(A_n.extent(2))
-        };
-        constexpr uint32_t min_abc_boundary = 1;
-        constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1;
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t ig = i + lDom.first()[0];
-            uint32_t jg = j + lDom.first()[1];
-            uint32_t kg = k + lDom.first()[2];
-            
-            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
-
-            if(Kokkos::popcount(lval) > 1)return;
-            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
-                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
-
-            if(Kokkos::popcount(val) == 1){
-                if(ig == min_abc_boundary){
-                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(jg == min_abc_boundary){
-                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(kg == min_abc_boundary){
-                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(ig == true_nr[0] - max_abc_boundary_sub){
-                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(jg == true_nr[1] - max_abc_boundary_sub){
-                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                if(kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-            }
-        });
-        Kokkos::fence();
-        //FA_np1.fillHalo();
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t ig = i + lDom.first()[0];
-            uint32_t jg = j + lDom.first()[1];
-            uint32_t kg = k + lDom.first()[2];
-
-            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
-
-            if(Kokkos::popcount(lval) > 2)return;
-            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
-                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
-            if(Kokkos::popcount(val) == 2){ //Edge
-                if(ig == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-
-                else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-
-                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-
-                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
-                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else{
-                    assert(false);
-                }
-            }
-        });
-        Kokkos::fence();
-        //FA_np1.fillHalo();
-        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
-            uint32_t ig = i + lDom.first()[0];
-            uint32_t jg = j + lDom.first()[1];
-            uint32_t kg = k + lDom.first()[2];
-
-            //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
-            //             + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
-
-            //if(Kokkos::popcount(lval) > 1)return;
-            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
-                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
-            
-            if(Kokkos::popcount(val) == 3){
-                //printf("Corner: %d, %d, %d\n", i, j, k);
-                if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
-                    second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
-                    second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
-                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
-                }
-                else{
-                    assert(false);
-                }
-            }
-        });
-        Kokkos::fence();
-    }
-};
-//END ABSORBING BOUNDARY CONDITION
-
-
-
-
-
-
-
-
-
 namespace ippl {
     
     template<typename scalar>
@@ -1700,11 +1264,11 @@ namespace ippl {
         scalar a8;
     };
     template <typename _scalar, class PLayout>
-    struct  Bunch : public ippl::ParticleBase<PLayout> {
+    struct  Bunch_eb : public ippl::ParticleBase<PLayout> {
         using scalar = _scalar;
 
         // Constructor for the Bunch class, taking a PLayout reference
-        Bunch(PLayout& playout)
+        Bunch_eb(PLayout& playout)
             : ippl::ParticleBase<PLayout>(playout) {
             // Add attributes to the particle bunch
             this->addAttribute(Q);          // Charge attribute
@@ -1716,7 +1280,7 @@ namespace ippl {
         }
 
         // Destructor for the Bunch class
-        ~Bunch() {}
+        ~Bunch_eb() {}
 
         // Define container types for various attributes
         using charge_container_type   = ippl::ParticleAttrib<scalar>;
@@ -1770,8 +1334,8 @@ namespace ippl {
         FieldLayout<dim>* layout_mp;
         using playout_type = ParticleSpatialLayout<scalar, 3>;
         playout_type playout;
-        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
-        using bunch_type =  Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+        Bunch_eb<scalar, ParticleSpatialLayout<scalar, 3>> particles;
+        using bunch_type =  Bunch_eb<scalar, ParticleSpatialLayout<scalar, 3>>;
         config m_config;
 
         /**
diff --git a/src/Expression/IpplOperations.h b/src/Expression/IpplOperations.h
index 198f20b44..3aa3177d2 100644
--- a/src/Expression/IpplOperations.h
+++ b/src/Expression/IpplOperations.h
@@ -276,6 +276,9 @@ namespace ippl {
                 }
                 return res;
             }
+            KOKKOS_INLINE_FUNCTION operator typename E1::value_type() const{
+                return apply();
+            }
 
             /*
              * This is required for BareField::dot
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 772ff538c..6ba94672e 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -9,6 +9,7 @@ using std::size_t;
 #include "FieldLayout/FieldLayout.h"
 #include "Meshes/UniformCartesian.h"
 #include "MaxwellSolvers/Maxwell.h"
+#include "Particle/ParticleBase.h"
 #include "MaxwellSolvers/AbsorbingBC.h"
 constexpr double sqrt_4pi = 3.54490770181103205459;
 constexpr double alpha_scaling_factor = 1e30;
@@ -51,7 +52,7 @@ constexpr double sq(double x){
 constexpr float sq(float x){
     return x * x;
 }
-
+#define assert_isreal(X) assert(!Kokkos::isnan(X) && !Kokkos::isinf(X))
 namespace ippl {
     enum fdtd_bc{
         periodic, absorbing
@@ -95,7 +96,8 @@ namespace ippl {
         FourField A_n;
         FourField A_np1;
         FourField A_nm1;
-        
+        scalar dt;
+
         private:
         void timeShift(){
 
@@ -116,7 +118,7 @@ namespace ippl {
                 bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
             }
         }
-
+        public:
         void step(){
             const auto& ldom    = layout_mp->getLocalNDIndex();
             const int nghost    = A_n.getNghost();
@@ -194,7 +196,8 @@ namespace ippl {
         FieldLayout<Dim>* layout_mp;
         NDIndex<Dim> domain_m;
         Vector_t hr_m;
-        scalar dt;
+        
+        
         Vector<int, Dim> nr_m;
         
         void initialize() {
@@ -300,6 +303,7 @@ namespace ippl {
             scalar a6;
             scalar a8;
         };
+        public:
         void step(){
             const auto& ldom    = layout_mp->getLocalNDIndex();
             const int nghost    = A_n.getNghost();
@@ -401,6 +405,330 @@ namespace ippl {
             A_np1 = 0.0;
         };
     };
+    
+    template <typename _scalar, class PLayout>
+    struct  Bunch : public ippl::ParticleBase<PLayout> {
+        using scalar = _scalar;
+
+        // Constructor for the Bunch class, taking a PLayout reference
+        Bunch(PLayout& playout)
+            : ippl::ParticleBase<PLayout>(playout) {
+            // Add attributes to the particle bunch
+            this->addAttribute(Q);          // Charge attribute
+            this->addAttribute(mass);       // Mass attribute
+            this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(R_np1);      // Position attribute for the next time step
+            this->addAttribute(R_nm1);      // Position attribute for the next time step
+            this->addAttribute(E_gather);   // Electric field attribute for particle gathering
+            this->addAttribute(B_gather);   // Magnedit field attribute for particle gathering
+        }
+
+        // Destructor for the Bunch class
+        ~Bunch() {}
+
+        // Define container types for various attributes
+        using charge_container_type   = ippl::ParticleAttrib<scalar>;
+        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector4_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+
+        // Declare instances of the attribute containers
+        charge_container_type Q;          // Charge container
+        charge_container_type mass;       // Mass container
+        velocity_container_type gamma_beta; // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type R_np1; // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type R_nm1; // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>> E_gather;   // Electric field container for particle gathering
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>> B_gather;   // Magnetio field container for particle gathering
+
+    };
+    template<typename T>
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos){
+        //return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5}, ippl::Vector<T, 3>{0,0,0}};
+        //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
+        //pos -= spacing * T(0.5);
+        ippl::Vector<T, 3> relpos = pos - origin;
+        ippl::Vector<T, 3> gridpos = relpos / hr;
+        ippl::Vector<int, 3> ipos;
+        ipos = gridpos.template cast<int>();
+        ippl::Vector<T, 3> fracpos;
+        for(unsigned k = 0;k < 3;k++){
+            fracpos[k] = gridpos[k] - (int)ipos[k];
+        }
+        //TODO: NGHOST!!!!!!!
+        ipos += ippl::Vector<int, 3>(1);
+        ret.first = ipos;
+        ret.second = fracpos;
+        return ret;
+    }
+    template<typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
+        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+        ipos -= ldom.first();
+        //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
+        if(
+            ipos[0] < 0
+            ||ipos[1] < 0
+            ||ipos[2] < 0
+            ||size_t(ipos[0]) >= view.extent(0) - 1
+            ||size_t(ipos[1]) >= view.extent(1) - 1
+            ||size_t(ipos[2]) >= view.extent(2) - 1
+            ||fracpos[0] < 0
+            ||fracpos[1] < 0
+            ||fracpos[2] < 0
+        ){
+            return;
+        }
+        assert(fracpos[0] >= 0.0f);
+        assert(fracpos[0] <= 1.0f);
+        assert(fracpos[1] >= 0.0f);
+        assert(fracpos[1] <= 1.0f);
+        assert(fracpos[2] >= 0.0f);
+        assert(fracpos[2] <= 1.0f);
+        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+        assert(one_minus_fracpos[0] >= 0.0f);
+        assert(one_minus_fracpos[0] <= 1.0f);
+        assert(one_minus_fracpos[1] >= 0.0f);
+        assert(one_minus_fracpos[1] <= 1.0f);
+        assert(one_minus_fracpos[2] >= 0.0f);
+        assert(one_minus_fracpos[2] <= 1.0f);
+        scalar accum = 0;
+
+        for(unsigned i = 0;i < 8;i++){
+            scalar weight = 1;
+            ippl::Vector<int, 3> ipos_l = ipos;
+            for(unsigned d = 0;d < 3;d++){
+                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+                ipos_l[d] += !!(i & (1 << d));
+            }
+            assert_isreal(value);
+            assert_isreal(weight);
+            accum += weight;
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
+        }
+        assert(abs(accum - 1.0f) < 1e-6f);
+    }
+    template<typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
+        //std::cout << "Value: " << value << "\n";
+        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+        ipos -= ldom.first();
+        if(
+            ipos[0] < 0
+            ||ipos[1] < 0
+            ||ipos[2] < 0
+            ||size_t(ipos[0]) >= view.extent(0) - 1
+            ||size_t(ipos[1]) >= view.extent(1) - 1
+            ||size_t(ipos[2]) >= view.extent(2) - 1
+            ||fracpos[0] < 0
+            ||fracpos[1] < 0
+            ||fracpos[2] < 0
+        ){
+            //std::cout << "out of bounds\n";
+            return;
+        }
+        assert(fracpos[0] >= 0.0f);
+        assert(fracpos[0] <= 1.0f);
+        assert(fracpos[1] >= 0.0f);
+        assert(fracpos[1] <= 1.0f);
+        assert(fracpos[2] >= 0.0f);
+        assert(fracpos[2] <= 1.0f);
+        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+        assert(one_minus_fracpos[0] >= 0.0f);
+        assert(one_minus_fracpos[0] <= 1.0f);
+        assert(one_minus_fracpos[1] >= 0.0f);
+        assert(one_minus_fracpos[1] <= 1.0f);
+        assert(one_minus_fracpos[2] >= 0.0f);
+        assert(one_minus_fracpos[2] <= 1.0f);
+        scalar accum = 0;
+
+        for(unsigned i = 0;i < 8;i++){
+            scalar weight = 1;
+            ippl::Vector<int, 3> ipos_l = ipos;
+            for(unsigned d = 0;d < 3;d++){
+                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+                ipos_l[d] += !!(i & (1 << d));
+            }
+            assert_isreal(weight);
+            accum += weight;
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
+        }
+        assert(abs(accum - 1.0f) < 1e-6f);
+    }
+    template<typename view_type, typename scalar>
+    KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
+
+
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid   = gridCoordinatesOf(hr, origin, to  );
+        //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]);
+        for(int d = 0;d < 3;d++){
+            //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
+            //    std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " << from_grid.first << " " << to_grid.first << std::endl;
+            //}
+            //assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
+        }
+        //const uint32_t nghost = g.nghost();
+        //from_ipos += ippl::Vector<int, 3>(nghost);
+        //to_ipos += ippl::Vector<int, 3>(nghost);
+
+        if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
+            scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
+
+            return;
+        }
+        ippl::Vector<scalar, 3> relay;
+        const int nghost = 1;
+        const ippl::Vector<scalar, 3> orig = origin;
+        using Kokkos::max;
+        using Kokkos::min;
+        for (unsigned int i = 0; i < 3; i++) {
+            relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i],
+                           max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i],
+                               scalar(0.5) * (to[i] + from[i])));
+        }
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
+    }
+    template<typename scalar, fdtd_bc boundary_conditions>
+    class NSFDSolverWithParticles{
+        public:
+        constexpr static unsigned dim = 3;
+        using vector_type = ippl::Vector<scalar, 3>;
+        using vector4_type = ippl::Vector<scalar, 4>;
+        using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using playout_type = ParticleSpatialLayout<scalar, 3>;
+        using bunch_type = Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+        using Mesh_t = ippl::UniformCartesian<scalar, dim>;
+        FieldLayout<dim>* layout_mp;
+        Mesh_t* mesh_mp;
+        playout_type playout;
+        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
+        FourField J;
+        ThreeField E;
+        ThreeField B;
+        NonStandardFDTDSolver<ThreeField, FourField> field_solver;
+
+        ippl::Vector<uint32_t, 3> nr_global;
+        ippl::Vector<scalar, 3> hr_m;
+        size_t steps_taken;
+        NSFDSolverWithParticles(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles) : layout_mp(&layout), mesh_mp(&mesch), playout(layout, mesch), particles(playout), J(mesch, layout), E(mesch, layout), B(mesch, layout), field_solver(J, E, B){
+            particles.create(nparticles);
+            nr_global = ippl::Vector<uint32_t, 3>{
+                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
+                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
+                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)
+            };
+            hr_m = mesh_mp->getMeshSpacing();
+            steps_taken = 0;
+            //field_solver.setEMFields(E, B);
+        }
+        template<bool space_charge = true>
+        void scatterBunch(){
+            //ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
+            auto hr_m = mesh_mp->getMeshSpacing();
+            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
+            assert_isreal(volume);
+            assert_isreal((scalar(1) / volume));
+            J = typename decltype(J)::value_type(0);
+            auto Jview = J.getView();
+            auto qview = particles.Q.getView();
+            auto rview = particles.R.getView();
+            auto rm1view = particles.R_nm1.getView();
+            auto orig = mesh_mp->getOrigin();
+            auto hr = mesh_mp->getMeshSpacing();
+            auto dt = field_solver.dt;
+            bool sc = space_charge;
+            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
+            Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                vector_type pos = rview(i);
+                vector_type to = rview(i);
+                vector_type from = rm1view(i);
+                if(sc){
+                    scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
+                }
+                scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
+            });
+            Kokkos::fence();
+            J.accumulateHalo();
+        }
+        template<typename callable>
+        void updateBunch(scalar /*time*/){
+            
+            Kokkos::fence();
+            auto gbview = particles.gamma_beta.getView();
+            auto eview = particles.E_gather.getView();
+            auto bview = particles.B_gather.getView();
+            auto qview = particles.Q.getView();
+            auto mview = particles.mass.getView();
+            auto rview = particles.R.getView();
+            auto rm1view = particles.R_nm1.getView();
+            auto rp1view = particles.R_np1.getView();
+            scalar bunch_dt = field_solver.dt / 3;
+            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
+            E.fillHalo();
+            B.fillHalo();
+            Kokkos::fence();
+            for(int bts = 0;bts < 3;bts++){
+                
+                particles.E_gather.gather(E, particles.R);
+                particles.B_gather.gather(B, particles.R);
+                Kokkos::fence();
+                Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+                    const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
+                    ippl::Vector<scalar, 3> E = eview(i);
+                    ippl::Vector<scalar, 3> B = bview(i);
+                    ippl::Vector<scalar, 3> labpos = rview(i);
+                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{E, B};
+
+                    const scalar charge = qview(i);
+                    const scalar mass = mview(i);
+                    const ippl::Vector<scalar, 3> t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+                    const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
+                    const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
+                    const ippl::Vector<scalar, 3> t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0  + alpha * alpha * (EB[1].dot(EB[1])))));
+                    const ippl::Vector<scalar, 3> ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+
+                    rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
+                    gbview(i) = ngammabeta;
+                });
+                Kokkos::fence();
+            }
+            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
+            size_t invalid_count = 0;
+            auto origo = mesh_mp->getOrigin();
+            ippl::Vector<scalar, 3> extenz;//
+            extenz[0] = nr_global[0] * hr_m[0];
+            extenz[1] = nr_global[1] * hr_m[1];
+            extenz[2] = nr_global[2] * hr_m[2];
+            Kokkos::parallel_reduce(
+                Kokkos::RangePolicy<typename playout_type::RegionLayout_t::view_type::execution_space>(0, particles.getLocalNum()),
+                KOKKOS_LAMBDA(size_t i, size_t& ref){
+                    bool out_of_bounds = false;
+                    ippl::Vector<scalar, dim> ppos = rview(i);
+                    for(size_t d = 0;d < dim;d++){
+                        out_of_bounds |= (ppos[d] <= origo[d]);
+                        out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain
+                    }
+                    invalid(i) = out_of_bounds;
+                    ref += out_of_bounds;
+                }, 
+                invalid_count);
+            particles.destroy(invalid, invalid_count);
+            Kokkos::fence();
+            
+        }
+        void solve(){
+            scatterBunch();
+            field_solver.solve();
+            updateBunch<float>(field_solver.dt * steps_taken);
+
+        }
+    };
 }
 
 #endif
\ No newline at end of file
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index dda292fff..7bf9f853b 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -31,6 +31,12 @@ target_link_libraries (
     ${IPPL_LIBS}
     ${MPI_CXX_LIBRARIES}
 )
+add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp)
+target_link_libraries (
+    TestFDTDSolverWithParticles
+    ${IPPL_LIBS}
+    ${MPI_CXX_LIBRARIES}
+)
 
 if (ENABLE_FFT)
     add_executable (TestGaussian_convergence TestGaussian_convergence.cpp)
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
index b45c96298..62088acc9 100644
--- a/test/solver/TestFDTDSolver.cpp
+++ b/test/solver/TestFDTDSolver.cpp
@@ -33,7 +33,7 @@ int main(int argc, char* argv[]){
         using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
         using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
         constexpr size_t n = 100;
-        ippl::Vector<uint32_t, 3> nr{n/2, n/2, 2*n};
+        ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
         ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
         ippl::Vector<scalar, 3> extents{scalar(1), scalar(1), scalar(1)};
         std::array<bool, 3> isParallel;
@@ -131,6 +131,8 @@ int main(int argc, char* argv[]){
             ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.05), z) - aview(i,j,k)[2]);
         }, sum_error);
         std::cout << "Sum: " << sum_error / double(n * n * n) << "\n";
+
+        
     }
     //exit:
     ippl::finalize();
diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp
new file mode 100644
index 000000000..f7b6dbcfd
--- /dev/null
+++ b/test/solver/TestFDTDSolverWithParticles.cpp
@@ -0,0 +1,59 @@
+#include <cstddef>
+using std::size_t;
+#include <Kokkos_Core.hpp>
+#include "Ippl.h"
+#include "Types/Vector.h"
+#include "Field/Field.h"
+#include "MaxwellSolvers/FDTD.h"
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <stb_image_write.h>
+template<typename scalar1, typename... scalar>
+    requires((std::is_floating_point_v<scalar1>))
+KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
+    uint32_t dim = sizeof...(scalar);
+    ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
+    scalar1 vecsum(0);
+    for(unsigned d = 0;d < dim;d++){
+        vecsum += vec[d] * vec[d];
+        
+    }
+    #ifndef __CUDA_ARCH__
+    using std::exp;
+    #endif
+    return exp(-(vecsum) / (stddev * stddev)); 
+}
+
+int main(int argc, char* argv[]){
+    ippl::initialize(argc, argv);
+    {
+        using scalar = float;
+        //const unsigned dim = 3;
+        //using vector_type = ippl::Vector<scalar, 3>;
+        //using vector4_type = ippl::Vector<scalar, 4>;
+        //using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        //using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        constexpr size_t n = 100;
+        ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+        ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
+        ippl::Vector<scalar, 3> extents{scalar(1), scalar(1), scalar(1)};
+        std::array<bool, 3> isParallel;
+        isParallel.fill(false);
+        isParallel[2] = true;
+
+        // all parallel layout, standard domain, normal axis order
+        ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        ippl::Vector<scalar, 3> hx;
+        for(unsigned d = 0;d < 3;d++){
+            hx[d] = extents[d] / (scalar)nr[d];
+        }
+        ippl::Vector<scalar, 3> origin = {0,0,0};
+        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+        
+        ippl::NSFDSolverWithParticles<scalar, ippl::absorbing> solver(layout, mesh, 1 << 17);
+        for(int i = 0;i < 10;i++)
+            solver.solve();
+    }
+    //exit:
+    ippl::finalize();
+}
\ No newline at end of file

From ae9472c9df5fe9171424a0e919e2fb81e7453b12 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Fri, 3 May 2024 10:34:26 +0200
Subject: [PATCH 19/32] Support for external fields

---
 src/MaxwellSolvers/FDTD.h | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 6ba94672e..5ad333171 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -657,7 +657,7 @@ namespace ippl {
             J.accumulateHalo();
         }
         template<typename callable>
-        void updateBunch(scalar /*time*/){
+        void updateBunch(scalar time, callable external_field){
             
             Kokkos::fence();
             auto gbview = particles.gamma_beta.getView();
@@ -680,10 +680,16 @@ namespace ippl {
                 Kokkos::fence();
                 Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
                     const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
-                    ippl::Vector<scalar, 3> E = eview(i);
-                    ippl::Vector<scalar, 3> B = bview(i);
-                    ippl::Vector<scalar, 3> labpos = rview(i);
-                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{E, B};
+                    ippl::Vector<scalar, 3> E_grid = eview(i);
+                    ippl::Vector<scalar, 3> B_grid = bview(i);
+                    
+                    ippl::Vector<scalar, 3> bunchpos = rview(i);
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb = external_field(bunchpos, time);
+                    
+                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{
+                        ippl::Vector<scalar, 3>(E_grid + external_eb.first), 
+                        ippl::Vector<scalar, 3>(B_grid + external_eb.second)
+                    };
 
                     const scalar charge = qview(i);
                     const scalar mass = mview(i);
@@ -725,8 +731,16 @@ namespace ippl {
         void solve(){
             scatterBunch();
             field_solver.solve();
-            updateBunch<float>(field_solver.dt * steps_taken);
-
+            updateBunch(field_solver.dt * steps_taken, /*no external field*/[]KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/){return Kokkos::pair<vector_type, vector_type>{
+                vector_type(0),
+                vector_type(0)
+            };});
+        }
+        template<typename callable>
+        void solve(callable external_field){
+            scatterBunch();
+            field_solver.solve();
+            updateBunch(field_solver.dt * steps_taken, external_field);
         }
     };
 }

From a72e92ffd2ccaa803cbb7ee0b0b509fc3df6fb78 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 4 May 2024 01:04:56 +0200
Subject: [PATCH 20/32] Particle solwer

---
 alpine/FreeElectronLaser.cpp                |  4 ++--
 src/MaxwellSolvers/FDTD.h                   |  3 ++-
 src/Particle/ParticleAttrib.hpp             |  1 +
 test/solver/TestFDTDSolverWithParticles.cpp | 26 +++++++++++++++++++--
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
index 5c014e96a..fc26b93b8 100644
--- a/alpine/FreeElectronLaser.cpp
+++ b/alpine/FreeElectronLaser.cpp
@@ -1774,7 +1774,7 @@ int main(int argc, char* argv[]) {
         
         test_gauss_law<scalar>(64);
         test_amperes_law<scalar>(64);
-        goto exit;
+        //goto exit;
         config cfg = read_config("../config.json");
         const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
         cfg.extents[2] *= frame_gamma;
@@ -1873,7 +1873,7 @@ int main(int argc, char* argv[]) {
             fdtd_state.J = scalar(0.0);
             fdtd_state.playout.update(fdtd_state.particles);
             fdtd_state.scatterBunch();
-            //std::cout << fdtd_state.J.getVolumeIntegral() << "\n";
+            std::cout << i << "\n";
             fdtd_state.fieldStep();
             fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
             auto ldom = layout.getLocalNDIndex();
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 5ad333171..c3187579c 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -682,7 +682,8 @@ namespace ippl {
                     const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
                     ippl::Vector<scalar, 3> E_grid = eview(i);
                     ippl::Vector<scalar, 3> B_grid = bview(i);
-                    
+                    //std::cout << "E_grid: " << E_grid << "\n";
+                    //std::cout << "B_grid: " << B_grid << "\n";
                     ippl::Vector<scalar, 3> bunchpos = rview(i);
                     Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb = external_field(bunchpos, time);
                     
diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index 3b37f920a..e7ee0b12c 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -194,6 +194,7 @@ namespace ippl {
                 Vector<size_t, Field::dim> args = index - lDom.first() + nghost;
 
                 // gather
+                //std::cout << index << std::endl;
                 dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{},
                                                        view, wlo, whi, args);
             });
diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp
index f7b6dbcfd..4b58a1f6c 100644
--- a/test/solver/TestFDTDSolverWithParticles.cpp
+++ b/test/solver/TestFDTDSolverWithParticles.cpp
@@ -7,6 +7,8 @@ using std::size_t;
 #include "MaxwellSolvers/FDTD.h"
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.h>
+
+#include <Kokkos_Random.hpp>
 template<typename scalar1, typename... scalar>
     requires((std::is_floating_point_v<scalar1>))
 KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
@@ -35,7 +37,9 @@ int main(int argc, char* argv[]){
         constexpr size_t n = 100;
         ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
         ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
-        ippl::Vector<scalar, 3> extents{scalar(1), scalar(1), scalar(1)};
+        ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,
+                                        meter_in_unit_lengths,
+                                        meter_in_unit_lengths};
         std::array<bool, 3> isParallel;
         isParallel.fill(false);
         isParallel[2] = true;
@@ -50,7 +54,25 @@ int main(int argc, char* argv[]){
         ippl::Vector<scalar, 3> origin = {0,0,0};
         ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
         
-        ippl::NSFDSolverWithParticles<scalar, ippl::absorbing> solver(layout, mesh, 1 << 17);
+        ippl::NSFDSolverWithParticles<scalar, ippl::absorbing> solver(layout, mesh, 1);
+        
+        auto pview = solver.particles.R.getView();
+        auto p1view = solver.particles.R_nm1.getView();
+        auto gbview = solver.particles.gamma_beta.getView();
+
+        Kokkos::Random_XorShift64_Pool<> random_pool(12345);
+        Kokkos::parallel_for(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+            auto state = random_pool.get_state();
+            pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.01 * extents[0]);
+            pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.01 * extents[1]);
+            pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.01 * extents[2]);
+            p1view(i) = pview(i);
+            gbview(i) = 0;
+            random_pool.free_state(state);
+        });
+        solver.playout.update(solver.particles);
+        solver.particles.Q = electron_charge_in_unit_charges;
+        solver.particles.mass = electron_mass_in_unit_masses;
         for(int i = 0;i < 10;i++)
             solver.solve();
     }

From c4ff2762930e05c0ba7fe056586e9677a11e9a7b Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 4 May 2024 09:44:04 +0200
Subject: [PATCH 21/32] ParticleAttribute: add custom reduction loop

---
 src/Particle/ParticleAttrib.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h
index 70168a79b..093e2b519 100644
--- a/src/Particle/ParticleAttrib.h
+++ b/src/Particle/ParticleAttrib.h
@@ -130,6 +130,17 @@ namespace ippl {
         T max();
         T min();
         T prod();
+        template<typename returnType, typename reductionLambda>
+            requires (std::is_invocable_v<reductionLambda, T, returnType&>)
+        returnType customReduction(reductionLambda lambda, returnType identity){
+            auto dview = this->dview_m;
+            Kokkos::parallel_reduce(*(this->localNum_mp), KOKKOS_LAMBDA(size_t i, returnType& ref){
+                lambda(dview(i), ref);
+            }, identity);
+            returnType globaltemp;
+            Comm->allreduce(identity, globaltemp, 1, std::plus<returnType>());
+            return globaltemp;
+        }
 
     private:
         view_type dview_m;

From fd86bdc12dcd018a1a06fc7dc667a1c83dcbe4a5 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 4 May 2024 09:54:18 +0200
Subject: [PATCH 22/32] fdtd particle test

---
 src/MaxwellSolvers/FDTD.h                   |  1 +
 test/solver/TestFDTDSolverWithParticles.cpp | 28 +++++++++++++++++----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index c3187579c..26ebfbbb6 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -727,6 +727,7 @@ namespace ippl {
                 invalid_count);
             particles.destroy(invalid, invalid_count);
             Kokkos::fence();
+            playout.update(particles);
             
         }
         void solve(){
diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp
index 4b58a1f6c..a48a829e1 100644
--- a/test/solver/TestFDTDSolverWithParticles.cpp
+++ b/test/solver/TestFDTDSolverWithParticles.cpp
@@ -54,7 +54,7 @@ int main(int argc, char* argv[]){
         ippl::Vector<scalar, 3> origin = {0,0,0};
         ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
         
-        ippl::NSFDSolverWithParticles<scalar, ippl::absorbing> solver(layout, mesh, 1);
+        ippl::NSFDSolverWithParticles<scalar, ippl::absorbing> solver(layout, mesh, 1 << 15);
         
         auto pview = solver.particles.R.getView();
         auto p1view = solver.particles.R_nm1.getView();
@@ -63,18 +63,36 @@ int main(int argc, char* argv[]){
         Kokkos::Random_XorShift64_Pool<> random_pool(12345);
         Kokkos::parallel_for(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
             auto state = random_pool.get_state();
-            pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.01 * extents[0]);
-            pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.01 * extents[1]);
-            pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.01 * extents[2]);
+            pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.04 * extents[0]);
+            pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.04 * extents[1]);
+            pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.04 * extents[2]);
             p1view(i) = pview(i);
             gbview(i) = 0;
             random_pool.free_state(state);
         });
+        {
+            double var = 0;
+            Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){
+                ippl::Vector<scalar, 3> pd(pview(i) - (origin + extents * 0.5));
+                ref += pd.dot(pd);
+            }, var);
+            std::cout << ippl::Vector<double, 3>(var * (1.0 / solver.particles.getLocalNum())) << "\n";
+        }
         solver.playout.update(solver.particles);
         solver.particles.Q = electron_charge_in_unit_charges;
         solver.particles.mass = electron_mass_in_unit_masses;
-        for(int i = 0;i < 10;i++)
+        for(int i = 0;i < 200;i++){
             solver.solve();
+        }
+        {
+            double var = 0;
+            Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){
+                ippl::Vector<scalar, 3> pd(pview(i) - (origin + extents * 0.5));
+                ref += pd.dot(pd);
+            }, var);
+            std::cout << ippl::Vector<double, 3>(var * (1.0 / solver.particles.getLocalNum())) << "\n";
+        }
+        
     }
     //exit:
     ippl::finalize();

From 5731b7f5042f73c98744bd124dcf62e53ed491af Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 4 May 2024 16:03:17 +0200
Subject: [PATCH 23/32] Hack gather routine

---
 src/MaxwellSolvers/FDTD.h       | 4 +++-
 src/Particle/ParticleAttrib.hpp | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 26ebfbbb6..32bbdec3f 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -345,6 +345,8 @@ namespace ippl {
             applyBCs();
         }
         void evaluate_EB(){
+            *(Maxwell<EMField, FourField>::En_mp) = typename EMField::value_type(0);
+            *(Maxwell<EMField, FourField>::Bn_mp) = typename EMField::value_type(0);
             ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
             const scalar idt = scalar(1.0) / dt;
             auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView();
@@ -611,7 +613,7 @@ namespace ippl {
         FourField J;
         ThreeField E;
         ThreeField B;
-        NonStandardFDTDSolver<ThreeField, FourField> field_solver;
+        NonStandardFDTDSolver<ThreeField, FourField, absorbing> field_solver;
 
         ippl::Vector<uint32_t, 3> nr_global;
         ippl::Vector<scalar, 3> hr_m;
diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index e7ee0b12c..4a8f0010a 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -186,7 +186,7 @@ namespace ippl {
             "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)),
             KOKKOS_CLASS_LAMBDA(const size_t idx) {
                 // find nearest grid point
-                vector_type l                        = (pp(idx) - origin) * invdx + 0.5;
+                vector_type l                        = (pp(idx) - origin) * invdx + 1.0;
                 Vector<int, Field::dim> index        = l;
                 Vector<PositionType, Field::dim> whi = l - index;
                 Vector<PositionType, Field::dim> wlo = 1.0 - whi;

From 427b5f78263655c0d2ae907cb625d3978324232f Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 5 May 2024 10:40:33 +0200
Subject: [PATCH 24/32] helper structs

---
 alpine/FreeElectronLaser.cpp | 79 +++++++++++++++++++++++++++++-------
 1 file changed, 65 insertions(+), 14 deletions(-)

diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
index fc26b93b8..e4d679b4e 100644
--- a/alpine/FreeElectronLaser.cpp
+++ b/alpine/FreeElectronLaser.cpp
@@ -661,10 +661,30 @@ struct UniaxialLorentzframe{
     scalar beta_m;
     scalar gammaBeta_m;
     scalar gamma_m;
+    KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma){
+
+        UniaxialLorentzframe ret;
+        ret.gamma_m = gamma;
+        scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma));
+        scalar gammabeta = gamma * beta;
+        ret.beta_m = beta;
+        ret.gammaBeta_m = gammabeta;
+        return ret;
+    }
+    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe<T, axis> negative()const noexcept{
+        UniaxialLorentzframe ret;
+        ret.beta_m = -beta_m;
+        ret.gammaBeta_m = -gammaBeta_m;
+        ret.gamma_m = gamma_m;
+        return ret;
+    }
+
+    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default;
     KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){
+        using Kokkos::sqrt;
         gammaBeta_m = gammaBeta;
-        beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta));
-        gamma_m = sqrt(1 + gammaBeta * (gammaBeta));
+        beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta);
+        gamma_m = sqrt(1 + gammaBeta * gammaBeta);
     }
     KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{
         arg[axis] = gamma_m * (arg[axis] + beta_m * time); 
@@ -680,7 +700,7 @@ struct UniaxialLorentzframe{
         return ret;
     }
     KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> inverse_transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB)const noexcept{
-        return UniaxialLorentzframe<T, 2>(-gammaBeta_m).transform_EB(primedEB);
+        return negative().transform_EB(primedEB);
     }
 };
 template<typename scalar>
@@ -748,7 +768,7 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
     FieldVector<Double> gb = bunchInit.initialGamma_ * bunchInit.betaVector_;
     FieldVector<Double> r  (0.0);
     FieldVector<Double> t  (0.0);
-    Double            	t0, g;
+    Double            	t0;//, g;
     Double		zmin = 1e100;
     Double		Ne, bF, bFi;
     unsigned int	bmi;
@@ -1297,11 +1317,40 @@ namespace ippl {
         ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>> EB_gather;   // Electric field container for particle gathering
 
     };
+    template<typename scalar>
+    struct Undulator{
+        undulator_parameters<scalar> uparams;
+        scalar distance_to_entry;
+        scalar k_u;
+        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte) : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda){}
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame)const noexcept{
+            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
+            ret.first.fill(0);
+            ret.second.fill(0);
+            if(position_in_lab_frame[2] < distance_to_entry){
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator < 0);
+                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5));
+                ret.second[0] = 0;
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal;
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal;
+            }
+            else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator >= 0);
+                ret.second[0] = 0;
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator);
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator);
+            }
+            return ret;
+        };
+    };
+    
 
 
     template <typename scalar>
     // clang-format off
-    struct FDTDFieldState{
+    struct FELSimulationState{
         
         //Sorry, can't do more than 3d
 
@@ -1337,16 +1386,18 @@ namespace ippl {
         Bunch_eb<scalar, ParticleSpatialLayout<scalar, 3>> particles;
         using bunch_type =  Bunch_eb<scalar, ParticleSpatialLayout<scalar, 3>>;
         config m_config;
-
+        UniaxialLorentzframe<scalar, 2 /*along z*/> ulb;
+        undulator_parameters<scalar> uparams;
+        Undulator<scalar> undulator;
         /**
          * @brief Construct a new FDTDFieldState object
          * Mesh and resolution parameter are technically redundant
-         * 
+         * @details ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5) is the frame's gamma factor
          * @param resolution 
          * @param layout 
          * @param mesch 
          */
-        FDTDFieldState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){
+        FELSimulationState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg), ulb(UniaxialLorentzframe<scalar, 2>::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){
             FA_np1.initialize(mesch, layout, 1);
             FA_n.initialize(mesch, layout, 1);
             FA_nm1.initialize(mesch, layout, 1);
@@ -1658,7 +1709,7 @@ scalar test_gauss_law(uint32_t n){
     uint32_t pcount = 1 << 20;
     config cfg{};
     cfg.space_charge = true;
-    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, cfg);
+    ippl::FELSimulationState<scalar> field_state(layout, mesh, pcount, cfg);
     field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount;
     field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
     auto pview = field_state.particles.R.getView();
@@ -1688,7 +1739,7 @@ scalar test_gauss_law(uint32_t n){
     auto lDom = field_state.EB.getLayout().getLocalNDIndex();
     
     std::ofstream line("gauss_line.txt");
-    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    typename ippl::FELSimulationState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
     //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
     for(unsigned i = 1;i < nr[2];i++){
         vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
@@ -1723,7 +1774,7 @@ scalar test_amperes_law(uint32_t n){
 
     uint32_t pcount = 1 << 20;
     config cfg{};cfg.space_charge = true;
-    ippl::FDTDFieldState<scalar> field_state(layout, mesh, pcount, cfg);
+    ippl::FELSimulationState<scalar> field_state(layout, mesh, pcount, cfg);
     field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount;
     field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
     auto pview = field_state.particles.R.getView();
@@ -1756,7 +1807,7 @@ scalar test_amperes_law(uint32_t n){
     
     std::ofstream line("ampere_line.txt");
     
-    typename ippl::FDTDFieldState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    typename ippl::FELSimulationState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
     //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
     for(unsigned i = 1;i < nr[2];i++){
         vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
@@ -1831,7 +1882,7 @@ int main(int argc, char* argv[]) {
             abort();
         }
 
-        ippl::FDTDFieldState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
+        ippl::FELSimulationState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
         
         if(ippl::Comm->rank() == 0){
             std::cout << "Init particles: " << std::endl;
@@ -2001,6 +2052,6 @@ int main(int argc, char* argv[]) {
         uint64_t endtime = nanoTime();
         std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
     }
-    exit:
+    //exit:
     ippl::finalize();
 }
\ No newline at end of file

From 059807ac100669d5910effa6c78b12affeaab6ce Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sun, 5 May 2024 13:37:53 +0200
Subject: [PATCH 25/32] FEL-IR working

---
 alpine/FreeElectronLaser.cpp | 363 ++++++++---------------------------
 config.json                  |   5 +-
 src/MaxwellSolvers/FDTD.h    |   5 +-
 3 files changed, 85 insertions(+), 288 deletions(-)

diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
index e4d679b4e..10b66f94f 100644
--- a/alpine/FreeElectronLaser.cpp
+++ b/alpine/FreeElectronLaser.cpp
@@ -1324,6 +1324,11 @@ namespace ippl {
         scalar k_u;
         KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte) : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda){}
         KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame)const noexcept{
+            using Kokkos::sin;
+            using Kokkos::sinh;
+            using Kokkos::cos;
+            using Kokkos::cosh;
+            using Kokkos::exp;
             Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
             ret.first.fill(0);
             ret.second.fill(0);
@@ -1365,26 +1370,18 @@ namespace ippl {
         using ThreeField = ippl::Field<Vector_t, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
         using VField_t = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
         using EBField_t = ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        using view_type = VField_t::view_type;
-        using ev_view_type = EBField_t::view_type;
+        using view_type = typename VField_t::view_type;
+        using ev_view_type = typename EBField_t::view_type;
+        using e_view_type = typename ThreeField::view_type;
+        using b_view_type = typename ThreeField::view_type;
         //Fields
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_np1;
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_n;
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> FA_nm1;
-        ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> J;
-        ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering> EB;
+        ippl::NSFDSolverWithParticles<scalar, absorbing> fieldsAndParticles;
         
         //Discretization options
         Vector_t hr_m;
         ippl::Vector<uint32_t, 3> nr_global;
         ippl::Vector<uint32_t, 3> nr_local;
-        scalar dt;
-        Mesh_t* mesh_mp;
-        FieldLayout<dim>* layout_mp;
-        using playout_type = ParticleSpatialLayout<scalar, 3>;
-        playout_type playout;
-        Bunch_eb<scalar, ParticleSpatialLayout<scalar, 3>> particles;
-        using bunch_type =  Bunch_eb<scalar, ParticleSpatialLayout<scalar, 3>>;
+        //scalar dt;
         config m_config;
         UniaxialLorentzframe<scalar, 2 /*along z*/> ulb;
         undulator_parameters<scalar> uparams;
@@ -1397,18 +1394,8 @@ namespace ippl {
          * @param layout 
          * @param mesch 
          */
-        FELSimulationState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg), ulb(UniaxialLorentzframe<scalar, 2>::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){
-            FA_np1.initialize(mesch, layout, 1);
-            FA_n.initialize(mesch, layout, 1);
-            FA_nm1.initialize(mesch, layout, 1);
-            J.initialize(mesch, layout, 1);
-            EB.initialize(mesch, layout, 1);
-            ThreeField Edummy;
-            Edummy.initialize(mesch, layout, 1);
-            FA_n   = value_type(0);
-            FA_np1 = value_type(0);
-            FA_nm1 = value_type(0);
-            ippl::StandardFDTDSolver<ThreeField, FourField> solver(J, Edummy, Edummy);
+        FELSimulationState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : fieldsAndParticles(layout, mesch, nparticles), m_config(cfg), ulb(UniaxialLorentzframe<scalar, 2>::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){
+
             hr_m = mesch.getMeshSpacing();
             nr_global = ippl::Vector<uint32_t, 3>{
                 uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
@@ -1422,223 +1409,22 @@ namespace ippl {
             };
             //std::cout << "NR_M_g: " << nr_global << "\n";
             //std::cout << "NR_M_l: " << nr_local << "\n";
-            dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2]));
-            particles.create(nparticles);
-            setNoBoundaryConditions();
-            
-        }
-        void setNoBoundaryConditions() {
-            periodic_bc = false;
-            typename VField_t::BConds_t vector_bcs;
-            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
-                vector_bcs[Idx] = std::make_shared<ippl::NoBcFace<VField_t>>(Idx);
-                return 0;
-            };
-            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
-                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
-                (void)x;
-            };
-            bcsetter(std::make_index_sequence<dim * 2>{});
-            FA_n  .setFieldBC(vector_bcs);
-            FA_np1.setFieldBC(vector_bcs);
-            FA_nm1.setFieldBC(vector_bcs);
         }
-        void setPeriodicBoundaryConditions() {
-            periodic_bc = true;
-            typename VField_t::BConds_t vector_bcs;
-            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
-                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<VField_t>>(Idx);
-                return 0;
-            };
-            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
-                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
-                (void)x;
-            };
-            bcsetter(std::make_index_sequence<dim * 2>{});
-            FA_n  .setFieldBC(vector_bcs);
-            FA_np1.setFieldBC(vector_bcs);
-            FA_nm1.setFieldBC(vector_bcs);
+        scalar dt()const noexcept{
+            return fieldsAndParticles.field_solver.dt;
         }
-        
-        void scatterBunch(){
-            //ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
-            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
-            assert_isreal(volume);
-            assert_isreal((scalar(1) / volume));
-            auto Jview = J.getView();
-            auto qview = particles.Q.getView();
-            auto rview = particles.R.getView();
-            auto rm1view = particles.R_nm1.getView();
-            auto orig = mesh_mp->getOrigin();
-            auto hr = mesh_mp->getMeshSpacing();
-            auto dt = this->dt;
-            bool space_charge = m_config.space_charge;
-            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
-            Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                Vector_t pos = rview(i);
-                Vector_t to = rview(i);
-                Vector_t from = rm1view(i);
-                if(space_charge){
-                    scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
-                }
-                scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
+        void step(){
+            //scalar time = fieldsAndParticles.steps_taken * fieldsAndParticles.field_solver.dt;
+            auto und = this->undulator;
+            auto lb = this->ulb;
+            fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector<scalar, 3> pos, scalar time){
+                lb.primedToUnprimed(pos, time);
+                auto eb = und(pos);
+                return lb.transform_EB(eb);
             });
-            Kokkos::fence();
-            J.accumulateHalo();
         }
+        void computeRadiation(){
 
-        void fieldStep(){
-            const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1])));
-            nondispersive<scalar> ndisp{
-                .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])),
-                .a2 = sq(dt / hr_m[0]),
-                .a4 = sq(dt / hr_m[1]),
-                .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0])  - 2 * calA * sq(dt / hr_m[1]),
-                .a8 = sq(dt)
-            };
-            //if(periodic_bc){
-            //    FA_n.getFieldBC().apply(FA_n);
-            //}
-            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
-            auto source = this->J.getView();
-            //FA_nm1.fillHalo();
-            FA_n.fillHalo();
-            ippl::Vector<uint32_t, 3> true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2};
-            const auto& ldom = layout_mp->getLocalNDIndex();
-            Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                uint32_t ig = i + ldom.first()[0];
-                uint32_t jg = j + ldom.first()[1];
-                uint32_t kg = k + ldom.first()[2];
-                uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2)
-                             + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5);
-                if(!val){
-                    A_np1(i, j, k) =    -A_nm1(i,j,k)
-                            + ndisp.a1 * A_n  (i,j,k)
-                            + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1))
-                            + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1))
-                            + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1))
-                            + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1))
-                            + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k);
-                }
-            });
-            Kokkos::fence();
-            
-            if(!periodic_bc){
-                FA_np1.fillHalo();
-                second_order_mur_boundary_conditions bc;
-                
-                
-                bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom);
-            }
-            Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView());
-            Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView());
-            //std::swap(this->A_n, this->A_nm1);
-            //std::swap(this->A_np1, this->A_n);
-            
-            evaluate_EB();
-        }
-        void evaluate_EB(){
-            FA_n.fillHalo();//FA_nm1.fillHalo();
-            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
-            const scalar idt = scalar(1.0) / dt;
-            auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView();
-            auto source = this->J.getView();
-            auto EBv = this->EB.getView();
-            Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
-                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
-                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
-                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
-
-                ippl::Vector<scalar, 3> grad_phi{
-                    dAdx[0], dAdy[0], dAdz[0]
-                };
-                ippl::Vector<scalar, 3> curlA{
-                    dAdy[3] - dAdz[2],
-                    dAdz[1] - dAdx[3],
-                    dAdx[2] - dAdy[1],
-                };
-                EBv(i,j,k)[0] = -dAdt - grad_phi;
-                EBv(i,j,k)[1] = curlA;
-            });
-            Kokkos::fence();
-        }
-        template<typename callable>
-        void updateBunch(scalar time, UniaxialLorentzframe<scalar> ulb, callable undulator_field){
-            
-            Kokkos::fence();
-            auto gbview = particles.gamma_beta.getView();
-            auto ebview = particles.EB_gather.getView();
-            auto qview = particles.Q.getView();
-            auto mview = particles.mass.getView();
-            auto rview = particles.R.getView();
-            auto rm1view = particles.R_nm1.getView();
-            auto rp1view = particles.R_np1.getView();
-            scalar bunch_dt = dt / 3;
-            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
-            EB.fillHalo();
-            Kokkos::fence();
-            for(int bts = 0;bts < 3;bts++){
-                
-                particles.EB_gather.gather(EB, particles.R);
-                Kokkos::fence();
-                Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                    const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
-                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB = ebview(i);
-                    ippl::Vector<scalar, 3> labpos = rview(i);
-
-                    ulb.primedToUnprimed(labpos, time);
-
-                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_frame = undulator_field(labpos);
-                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame);
-                    assert_isreal((EB_undulator_bunch.first[0]));
-                    assert_isreal((EB_undulator_bunch.first[1]));
-                    assert_isreal((EB_undulator_bunch.first[2]));
-                    assert_isreal((EB_undulator_bunch.second[0]));
-                    assert_isreal((EB_undulator_bunch.second[1]));
-                    assert_isreal((EB_undulator_bunch.second[2]));
-                    EB[0] += EB_undulator_bunch.first;
-                    EB[1] += EB_undulator_bunch.second;
-
-                    const scalar charge = qview(i);
-                    const scalar mass = mview(i);
-                    const ippl::Vector<scalar, 3> t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-                    const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
-                    const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
-                    const ippl::Vector<scalar, 3> t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0  + alpha * alpha * (EB[1].dot(EB[1])))));
-                    const ippl::Vector<scalar, 3> ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-
-                    assert_isreal((ngammabeta[0]));
-                    assert_isreal((ngammabeta[1]));
-                    assert_isreal((ngammabeta[2]));
-                    rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
-                    gbview(i) = ngammabeta;
-                });
-                Kokkos::fence();
-            }
-            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
-            size_t invalid_count = 0;
-            auto origo = mesh_mp->getOrigin();
-            ippl::Vector<scalar, 3> extenz;//
-            extenz[0] = nr_global[0] * hr_m[0];
-            extenz[1] = nr_global[1] * hr_m[1];
-            extenz[2] = nr_global[2] * hr_m[2];
-            Kokkos::parallel_reduce(
-                Kokkos::RangePolicy<typename playout_type::RegionLayout_t::view_type::execution_space>(0, particles.getLocalNum()),
-                KOKKOS_LAMBDA(size_t i, size_t& ref){
-                    bool out_of_bounds = false;
-                    ippl::Vector<scalar, dim> ppos = rview(i);
-                    for(size_t d = 0;d < dim;d++){
-                        out_of_bounds |= (ppos[d] <= origo[d]);
-                        out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain
-                    }
-                    invalid(i) = out_of_bounds;
-                    ref += out_of_bounds;
-                }, 
-                invalid_count);
-            particles.destroy(invalid, invalid_count);
-            Kokkos::fence();
-            
         }
     };
     // clang-format on
@@ -1707,9 +1493,9 @@ scalar test_gauss_law(uint32_t n){
     ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
 
     uint32_t pcount = 1 << 20;
-    config cfg{};
-    cfg.space_charge = true;
-    ippl::FELSimulationState<scalar> field_state(layout, mesh, pcount, cfg);
+    //config cfg{};
+    //cfg.space_charge = true;
+    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh, pcount);
     field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount;
     field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
     auto pview = field_state.particles.R.getView();
@@ -1732,21 +1518,21 @@ scalar test_gauss_law(uint32_t n){
     field_state.J = scalar(0.0);
     field_state.scatterBunch();
     for(size_t i = 0;i < 8*n;i++){
-        field_state.fieldStep();
+        field_state.field_solver.solve();
     }
-    field_state.evaluate_EB();
+
     Kokkos::fence();
-    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
+    auto lDom = field_state.E.getLayout().getLocalNDIndex();
     
     std::ofstream line("gauss_line.txt");
-    typename ippl::FELSimulationState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    typename ippl::FELSimulationState<scalar>::e_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.E.getView());
     //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
     for(unsigned i = 1;i < nr[2];i++){
         vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
         pos[2] += hx[2] * scalar(i);
-        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
+        ippl::Vector<scalar, 3> ebg = gather_helper(view, pos, origin, hx, lDom);
         //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
-        line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " " << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
     }
     return 0.0f;
 }
@@ -1773,14 +1559,13 @@ scalar test_amperes_law(uint32_t n){
     ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
 
     uint32_t pcount = 1 << 20;
-    config cfg{};cfg.space_charge = true;
-    ippl::FELSimulationState<scalar> field_state(layout, mesh, pcount, cfg);
+    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh, pcount);
     field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount;
     field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
     auto pview = field_state.particles.R.getView();
     auto p1view = field_state.particles.R_nm1.getView();
     constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
-    scalar timestep = field_state.dt;
+    scalar timestep = field_state.field_solver.dt;
     //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
     Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
     //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
@@ -1799,22 +1584,22 @@ scalar test_amperes_law(uint32_t n){
     field_state.J = scalar(0.0);
     field_state.scatterBunch();
     for(size_t i = 0;i < 8*n;i++){
-        field_state.fieldStep();
+        field_state.field_solver.solve();
     }
-    field_state.evaluate_EB();
+    field_state.field_solver.evaluate_EB();
     Kokkos::fence();
-    auto lDom = field_state.EB.getLayout().getLocalNDIndex();
+    auto lDom = field_state.B.getLayout().getLocalNDIndex();
     
     std::ofstream line("ampere_line.txt");
     
-    typename ippl::FELSimulationState<scalar>::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView());
+    typename ippl::FELSimulationState<scalar>::b_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.B.getView());
     //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
     for(unsigned i = 1;i < nr[2];i++){
         vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
         pos[2] += hx[2] * scalar(i);
-        ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, pos, origin, hx, lDom);
+        ippl::Vector<scalar, 3> ebg = gather_helper(view, pos, origin, hx, lDom);
         //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
-        line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n";
+        line << pos.norm() * unit_length_in_meters << " " << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n";
     }
     return 0.0f;
 }
@@ -1835,7 +1620,7 @@ int main(int argc, char* argv[]) {
         const scalar frame_gammabeta = frame_gamma * frame_beta;
         UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
         ippl::undulator_parameters<scalar> uparams(cfg);
-        const scalar k_u  = scalar(2.0 * M_PI) / uparams.lambda;
+        /*const scalar k_u  = scalar(2.0 * M_PI) / uparams.lambda;
         const scalar distance_to_entry  = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma);
         auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector<scalar, 3>& position_in_lab_frame){
             Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
@@ -1859,7 +1644,7 @@ int main(int argc, char* argv[]) {
             }
             return ret;
 
-        };
+        };*/
         BunchInitialize<scalar> mithra_config = generate_mithra_config(cfg, frame_boost);
         ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
 
@@ -1886,28 +1671,28 @@ int main(int argc, char* argv[]) {
         
         if(ippl::Comm->rank() == 0){
             std::cout << "Init particles: " << std::endl;
-            size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma);
-            fdtd_state.particles.Q = cfg.charge / actual_pc;
-            fdtd_state.particles.mass = cfg.mass / actual_pc;
+            size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles, mithra_config, frame_gamma);
+            fdtd_state.fieldsAndParticles.particles.Q = cfg.charge / actual_pc;
+            fdtd_state.fieldsAndParticles.particles.mass = cfg.mass / actual_pc;
         }
         else{
-            fdtd_state.particles.create(0);
+            fdtd_state.fieldsAndParticles.particles.create(0);
         }
         {
-            auto rview = fdtd_state.particles.R.getView();
-            auto rm1view = fdtd_state.particles.R_nm1.getView();
-            ippl::Vector<scalar, 3> meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum());
+            auto rview = fdtd_state.fieldsAndParticles.particles.R.getView();
+            auto rm1view = fdtd_state.fieldsAndParticles.particles.R_nm1.getView();
+            ippl::Vector<scalar, 3> meanpos = fdtd_state.fieldsAndParticles.particles.R.sum() * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum());
     
-            Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+            Kokkos::parallel_for(fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
                 rview(i) -= meanpos;
                 rm1view(i) -= meanpos;
             });
         }
-        fdtd_state.particles.setParticleBC(ippl::NO);
+        fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO);
         //fdtd_state.scatterBunch();
         //std::cout << cfg.charge << "\n";
         
-        size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt);
+        size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt());
         uint64_t starttime =  nanoTime();
         std::ofstream rad;
         FILE* ffmpeg_file = nullptr;
@@ -1921,20 +1706,24 @@ int main(int argc, char* argv[]) {
 
 
         for(size_t i = 0;i < timesteps_required;i++){
-            fdtd_state.J = scalar(0.0);
-            fdtd_state.playout.update(fdtd_state.particles);
-            fdtd_state.scatterBunch();
+
+            //fdtd_state.J = scalar(0.0);
+            //fdtd_state.playout.update(fdtd_state.particles);
+            //fdtd_state.scatterBunch();
             std::cout << i << "\n";
-            fdtd_state.fieldStep();
-            fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
+            fdtd_state.step();
+            //fdtd_state.fieldStep();
+            //fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
             auto ldom = layout.getLocalNDIndex();
             auto nrg = fdtd_state.nr_global;
-            auto ebv = fdtd_state.EB.getView();
+            auto eview = fdtd_state.fieldsAndParticles.E.getView();
+            auto bview = fdtd_state.fieldsAndParticles.B.getView();
+            //auto ebv = fdtd_state.EB.getView();
             double radiation = 0.0;
-            Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
+            Kokkos::parallel_reduce(ippl::getRangePolicy(eview, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
                 //uint32_t ig = i + ldom.first()[0];
                 //uint32_t jg = j + ldom.first()[1];
-                Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]};
+                Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{eview(i,j,k), bview(i,j,k)};
                 ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
                 ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
                 uint32_t kg = k + ldom.first()[2];
@@ -1950,7 +1739,7 @@ int main(int argc, char* argv[]) {
             MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
             if(ippl::Comm->rank() == 0){
                 ippl::Vector<scalar, 3> pos{0,0,0};
-                frame_boost.primedToUnprimed(pos, fdtd_state.dt * i);
+                frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i);
                 rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
             }
             //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n";
@@ -1968,9 +1757,9 @@ int main(int argc, char* argv[]) {
                 int floatcount = img_width * img_height * 3;
                 uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
                 std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float));
-                auto phmirror = fdtd_state.particles.R.getHostMirror();
-                Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView());
-                for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){
+                auto phmirror = fdtd_state.fieldsAndParticles.particles.R.getHostMirror();
+                Kokkos::deep_copy(phmirror, fdtd_state.fieldsAndParticles.particles.R.getView());
+                for(size_t hi = 0;hi < fdtd_state.fieldsAndParticles.particles.getLocalNum();hi++){
                     ippl::Vector<scalar, 3> ppos = phmirror(hi);
                     ppos -= mesh.getOrigin();
                     ppos /= cfg.extents.cast<scalar>();
@@ -1984,8 +1773,11 @@ int main(int argc, char* argv[]) {
                         std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity);
                     }
                 };
-                auto ebh = fdtd_state.EB.getHostMirror();
-                Kokkos::deep_copy(ebh, fdtd_state.EB.getView());
+
+                auto eh = fdtd_state.fieldsAndParticles.E.getHostMirror();
+                auto bh = fdtd_state.fieldsAndParticles.B.getHostMirror();
+                Kokkos::deep_copy(eh, fdtd_state.fieldsAndParticles.E.getView());
+                Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView());
 
                 //double exp_avg = double(exp_sum) / double(acount);
                 {
@@ -1995,11 +1787,12 @@ int main(int argc, char* argv[]) {
                             int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2;
                             if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){
                                 if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){
-                                    ippl::Vector<ippl::Vector<scalar, 3>, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
+                                    ippl::Vector<scalar, 3> E = eh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
+                                    ippl::Vector<scalar, 3> B = bh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
                                     
-                                    ippl::Vector<scalar, 3> poynting = acc[0].cross(acc[1]);
+                                    ippl::Vector<scalar, 3> poynting = E.cross(B);
                                     Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> eblab =  frame_boost.inverse_transform_EB(
-                                        Kokkos::make_pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>(acc[0], acc[1])
+                                        Kokkos::make_pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>(E, B)
                                     );
                                     ippl::Vector<scalar, 3> poyntinglab = eblab.first.cross(eblab.second);
                                     poynting = poyntinglab;
diff --git a/config.json b/config.json
index 0948650ee..f5cd3b99f 100644
--- a/config.json
+++ b/config.json
@@ -4,7 +4,7 @@
       "length-scale" : "micrometers",
       "time-scale"   : "picoseconds",
       "extents": [3400.0, 3400.0, 280.0],
-      "resolution": [32, 32, 800],
+      "resolution": [64, 64, 2800],
       "mesh-center": [0.0, 0.0, 0.0],
       "total-time": 30000.0,
       "bunch-time-step": 1.6,
@@ -19,7 +19,7 @@
       "_mass": 1,
       "charge": 1.846e8,
       "mass": 1.846e8,
-      "number-of-particles": 50000,
+      "number-of-particles": 1000000,
       "gamma": 100.41,
       "_gamma": 1.0,
       "direction (ignored)": [0.0, 0.0, 1.0],
@@ -61,6 +61,7 @@
     "output":{
       "rhythm" : 30,
       "count" : 20,
+      "path": "../renderdata",
       "type" : "E",
       "track" : {
         "radiation" : "boundary_radiation.txt",
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 32bbdec3f..574026d73 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -687,7 +687,7 @@ namespace ippl {
                     //std::cout << "E_grid: " << E_grid << "\n";
                     //std::cout << "B_grid: " << B_grid << "\n";
                     ippl::Vector<scalar, 3> bunchpos = rview(i);
-                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb = external_field(bunchpos, time);
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb = external_field(bunchpos, time + bunch_dt * bts);
                     
                     ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{
                         ippl::Vector<scalar, 3>(E_grid + external_eb.first), 
@@ -739,12 +739,15 @@ namespace ippl {
                 vector_type(0),
                 vector_type(0)
             };});
+            ++steps_taken;
         }
         template<typename callable>
         void solve(callable external_field){
             scatterBunch();
             field_solver.solve();
+            //std::cout << field_solver.dt * steps_taken << "\n";
             updateBunch(field_solver.dt * steps_taken, external_field);
+            ++steps_taken;
         }
     };
 }

From b1ed73b19bc7c9354751d182707f9e0df00056bc Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Tue, 7 May 2024 15:03:52 +0200
Subject: [PATCH 26/32] Fix FDTD visibility

---
 src/MaxwellSolvers/FDTD.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 574026d73..da6c727e2 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -295,6 +295,8 @@ namespace ippl {
                 bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
             }
         }
+        
+        public:
         template<typename scalar>
         struct nondispersive{
             scalar a1;
@@ -303,7 +305,6 @@ namespace ippl {
             scalar a6;
             scalar a8;
         };
-        public:
         void step(){
             const auto& ldom    = layout_mp->getLocalNDIndex();
             const int nghost    = A_n.getNghost();

From e164a96930d204d4e97911cc0fa41761ecc25b16 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Tue, 7 May 2024 16:20:07 +0200
Subject: [PATCH 27/32] hack and fix

---
 alpine/FreeElectronLaser.cpp | 11 ++++++-----
 src/MaxwellSolvers/FDTD.h    |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
index 10b66f94f..aaafd3b65 100644
--- a/alpine/FreeElectronLaser.cpp
+++ b/alpine/FreeElectronLaser.cpp
@@ -1604,12 +1604,12 @@ scalar test_amperes_law(uint32_t n){
     return 0.0f;
 }
 int main(int argc, char* argv[]) {
-    using scalar = double;
+    using scalar = float;
     ippl::initialize(argc, argv);
     {
         
-        test_gauss_law<scalar>(64);
-        test_amperes_law<scalar>(64);
+        //test_gauss_law<scalar>(64);
+        //test_amperes_law<scalar>(64);
         //goto exit;
         config cfg = read_config("../config.json");
         const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
@@ -1738,7 +1738,7 @@ int main(int argc, char* argv[]) {
             double radiation_in_watt_global = 0.0;
             MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
             if(ippl::Comm->rank() == 0){
-                ippl::Vector<scalar, 3> pos{0,0,0};
+                ippl::Vector<scalar, 3> pos{0,0,(float)cfg.extents[2]};
                 frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i);
                 rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
             }
@@ -1843,7 +1843,8 @@ int main(int argc, char* argv[]) {
             }    
         }
         uint64_t endtime = nanoTime();
-        std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
+        if(ippl::Comm->rank() == 0)
+            std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
     }
     //exit:
     ippl::finalize();
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index da6c727e2..a1cecb131 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -630,7 +630,7 @@ namespace ippl {
             steps_taken = 0;
             //field_solver.setEMFields(E, B);
         }
-        template<bool space_charge = true>
+        template<bool space_charge = false>
         void scatterBunch(){
             //ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
             auto hr_m = mesh_mp->getMeshSpacing();

From 7f1f3963b438b4e5a60530359ca98b0c45156379 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 18 May 2024 13:35:02 +0200
Subject: [PATCH 28/32] Add comments, remove dead lines

---
 alpine/FreeElectronLaser.cpp    | 397 ++++++++++++++++----------------
 src/MaxwellSolvers/FDTD.h       |   6 +-
 src/Particle/ParticleAttrib.h   |   2 +-
 src/Particle/ParticleAttrib.hpp |   7 +-
 4 files changed, 210 insertions(+), 202 deletions(-)

diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
index aaafd3b65..f03190583 100644
--- a/alpine/FreeElectronLaser.cpp
+++ b/alpine/FreeElectronLaser.cpp
@@ -329,39 +329,39 @@ KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> f
 
 struct config {
     using scalar = double;
-
-    //using length_unit = funits::length<scalar, funits::planck_base>;
-    //using duration_unit = funits::time<scalar, funits::planck_base>;
+    
     // GRID PARAMETERS
-    ippl::Vector<uint32_t, 3> resolution;
-
-    ippl::Vector<scalar, 3> extents;
-    scalar total_time;
-    scalar timestep_ratio;
-
-    scalar length_scale_in_jobfile, temporal_scale_in_jobfile;
-
-        // All in unit_charge, or unit_mass
-    scalar charge, mass;
-
-    uint64_t num_particles;
-    bool space_charge;
-
-        // BUNCH PARAMETERS
-    ippl::Vector<scalar, 3> mean_position;
-    ippl::Vector<scalar, 3> sigma_position;
-    ippl::Vector<scalar, 3> position_truncations;
-    ippl::Vector<scalar, 3> sigma_momentum;
-    scalar bunch_gamma;
-
-    scalar undulator_K;
-    scalar undulator_period;
-    scalar undulator_length;
-
-    uint32_t output_rhythm;
-    std::string output_path;
-    std::unordered_map<std::string, double> experiment_options;
+    ippl::Vector<uint32_t, 3> resolution;               // Grid resolution in 3D
+    ippl::Vector<scalar, 3> extents;                    // Physical extents of the grid in each dimension
+    scalar total_time;                                  // Total simulation time
+    scalar timestep_ratio;                              // Ratio of timestep to some reference value
+
+    scalar length_scale_in_jobfile;                     // Length scale defined in the jobfile
+    scalar temporal_scale_in_jobfile;                   // Temporal scale defined in the jobfile
+
+    // PARTICLE PARAMETERS
+    scalar charge;                                      // Particle charge in unit_charge
+    scalar mass;                                        // Particle mass in unit_mass
+    uint64_t num_particles;                             // Number of particles in the simulation
+    bool space_charge;                                  // Flag for considering space charge effects
+
+    // BUNCH PARAMETERS
+    ippl::Vector<scalar, 3> mean_position;              // Mean initial position of the particle bunch
+    ippl::Vector<scalar, 3> sigma_position;             // Standard deviation of the initial position distribution
+    ippl::Vector<scalar, 3> position_truncations;       // Truncations of the position distribution
+    ippl::Vector<scalar, 3> sigma_momentum;             // Standard deviation of the initial momentum distribution
+    scalar bunch_gamma;                                 // Relativistic gamma factor of the bunch
+
+    // UNDULATOR PARAMETERS
+    scalar undulator_K;                                 // Undulator parameter K
+    scalar undulator_period;                            // Period of the undulator
+    scalar undulator_length;                            // Length of the undulator
+
+    uint32_t output_rhythm;                             // Frequency of output in timesteps
+    std::string output_path;                            // Path to output files
+    std::unordered_map<std::string, double> experiment_options; // Additional experimental options
 };
+
 template<typename scalar, unsigned Dim>
 ippl::Vector<scalar, Dim> getVector(const nlohmann::json& j){
     if(j.is_array()){
@@ -646,12 +646,6 @@ struct BunchInitialize {
     // BunchInitialize ();
 };
 
-
-
-
-
-//END CONFIG
-
 //LORENTZ FRAME AND UNDULATOR
 template<typename T, unsigned axis = 2>
 struct UniaxialLorentzframe{
@@ -849,7 +843,7 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
               zmin = std::min(  (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin );
             else
               {
-                //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
+                std::cout << std::string("The longitudinal type is not correctly given to the code !!!\n");
                 exit(1);
               }
           }
@@ -863,7 +857,6 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
               zmin = std::min(   t0 , zmin );
             }
 
-        //zmin = zmin + bunchInit.position_[ia][2];
         zmin = zmin + bunchInit.position_[2];
 
         /* Obtain the average number of electrons per FEL beamlet.					*/
@@ -872,7 +865,6 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
         /* Set the bunching factor level for the shot noise depending on the given values.		*/
         bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_;
 
-        //printmessage(std::string(__FILE__), __LINE__, std::string("The standard deviation of the bunching factor for the shot noise implementation is set to ") + stringify(bF) );
       }
 
     /* Determine the properties of each charge point and add them to the charge vector.               	*/
@@ -889,7 +881,6 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
           r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
         else
           {
-            //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") );
             exit(1);
           }
 
@@ -901,15 +892,13 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
         if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
           {
             /* Shift the generated charge to the center position and momentum space.			*/
-            //charge.rnp    = bunchInit.position_[ia];
             charge.rnp    = bunchInit.position_;
             charge.rnp   += r;
 
             charge.gb   = gb;
             charge.gb  += t;
-            //std::cout << gb << "\n";
             if(std::isinf(gb[2])){
-                std::cerr << "it klonked here\n";
+                std::cerr << "[Warning] Gammabeta obtained an klonked here\n";
             }
 
             /* Insert this charge and the mirrored ones into the charge vector.				*/
@@ -934,10 +923,8 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
           t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
           t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
           t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
-          //std::cerr << "DOING UNIFORM tapering!!!\n";
           if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
             {
-                //std::cerr << "ACTUALLY DOING UNIFORM tapering!!!\n";
               /* Shift the generated charge to the center position and momentum space.			*/
               charge.rnp   = bunchInit.position_[ia];
               charge.rnp  += r;
@@ -945,10 +932,6 @@ void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<D
               charge.gb  = gb;
               
               charge.gb += t;
-              //std::cout << gb[0] << "\n";
-              //if(std::isinf(gb.squaredNorm())){
-              //    std::cerr << "it klonked here\n";
-              //}
               /* Insert this charge and the mirrored ones into the charge vector.			*/
               insertCharge(charge);
             }
@@ -1103,11 +1086,8 @@ KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a
     return ret;
 }
 template<typename T>
-KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos){
-    //return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5}, ippl::Vector<T, 3>{0,0,0}};
-    //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
+KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos, int nghost = 1){
     Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
-    //pos -= spacing * T(0.5);
     ippl::Vector<T, 3> relpos = pos - origin;
     ippl::Vector<T, 3> gridpos = relpos / hr;
     ippl::Vector<int, 3> ipos;
@@ -1116,17 +1096,15 @@ KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gr
     for(unsigned k = 0;k < 3;k++){
         fracpos[k] = gridpos[k] - (int)ipos[k];
     }
-    //TODO: NGHOST!!!!!!!
-    ipos += ippl::Vector<int, 3>(1);
+    ipos += ippl::Vector<int, 3>(nghost);
     ret.first = ipos;
     ret.second = fracpos;
     return ret;
 }
 template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
-    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value, int nghost = 1){
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost);
     ipos -= ldom.first();
-    //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
     if(
         ipos[0] < 0
         ||ipos[1] < 0
@@ -1170,9 +1148,9 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
     assert(abs(accum - 1.0f) < 1e-6f);
 }
 template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value, int nghost = 1){
     //std::cout << "Value: " << value << "\n";
-    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost);
     ipos -= ldom.first();
     if(
         ipos[0] < 0
@@ -1220,21 +1198,11 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
 }
 
 template<typename view_type, typename scalar>
-KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
+KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor, int nghost = 1){ 
 
     
     Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
     Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid   = gridCoordinatesOf(hr, origin, to  );
-    //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]);
-    for(int d = 0;d < 3;d++){
-        //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
-        //    std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " << from_grid.first << " " << to_grid.first << std::endl;
-        //}
-        //assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
-    }
-    //const uint32_t nghost = g.nghost();
-    //from_ipos += ippl::Vector<int, 3>(nghost);
-    //to_ipos += ippl::Vector<int, 3>(nghost);
     
     if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
         scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
@@ -1242,7 +1210,6 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view
         return;
     }
     ippl::Vector<scalar, 3> relay;
-    const int nghost = 1;
     const ippl::Vector<scalar, 3> orig = origin;
     using Kokkos::max;
     using Kokkos::min;
@@ -1255,23 +1222,21 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view
     scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
 }
 
-// END PREAMBLE
 
 
 namespace ippl {
     
     template<typename scalar>
     struct undulator_parameters{
-        scalar lambda; //MITHRA: lambda_u
+        scalar lambda_u; //MITHRA: lambda_u
         scalar K; //Undulator parameter
         scalar length;
         scalar B_magnitude;
-        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){
+        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda_u(lambda_u), K(K_undulator_parameter), length(_length){
             B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u);
-            //std::cout << "Setting bmag: " << B_magnitude << "\n";
         }
-        undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){
-            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda);
+        undulator_parameters(const config& cfg): lambda_u(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u);
         }
     };
 
@@ -1317,115 +1282,179 @@ namespace ippl {
         ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>> EB_gather;   // Electric field container for particle gathering
 
     };
+
+
+    /**
+     * @brief Struct representing an undulator.
+     * 
+     * @tparam scalar Type of the scalar values (e.g., float, double).
+     */
     template<typename scalar>
-    struct Undulator{
-        undulator_parameters<scalar> uparams;
-        scalar distance_to_entry;
-        scalar k_u;
-        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte) : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda){}
-        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame)const noexcept{
+    struct Undulator {
+        undulator_parameters<scalar> uparams; ///< Parameters of the undulator.
+        scalar distance_to_entry;             ///< Distance to the entry of the undulator.
+        scalar k_u;                           ///< Wavenumber of the undulator.
+
+        /**
+         * @brief Constructor to initialize undulator parameters and calculate k_u.
+         * 
+         * @param p Parameters of the undulator.
+         * @param dte Distance to the entry of the undulator.
+         */
+        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte) 
+            : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda_u) {}
+
+        /**
+         * @brief Overloaded operator() to compute magnetic field components.
+         * 
+         * @param position_in_lab_frame Position vector in the lab frame.
+         * @return Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> 
+         *         Pair containing magnetic field and its derivative.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> 
+        operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame) const noexcept {
             using Kokkos::sin;
             using Kokkos::sinh;
             using Kokkos::cos;
             using Kokkos::cosh;
             using Kokkos::exp;
-            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
-            ret.first.fill(0);
-            ret.second.fill(0);
-            if(position_in_lab_frame[2] < distance_to_entry){
+
+            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret; // Return pair containing magnetic field and its derivative.
+            ret.first.fill(0);  // Initialize magnetic field vector.
+            ret.second.fill(0); // Initialize derivative vector.
+
+            // If the position is before the undulator entry.
+            if (position_in_lab_frame[2] < distance_to_entry) {
                 scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator < 0);
-                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5));
-                ret.second[0] = 0;
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal;
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal;
+                assert(z_in_undulator < 0); // Ensure we are in the correct region.
+                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); // Gaussian decay factor.
+
+                ret.second[0] = 0; // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; // z-component.
             }
-            else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){
+            // If the position is within the undulator.
+            else if (position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length) {
                 scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator >= 0);
-                ret.second[0] = 0;
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator);
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator);
+                assert(z_in_undulator >= 0); // Ensure we are in the correct region.
+
+                ret.second[0] = 0; // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); // z-component.
             }
             return ret;
-        };
+        }
     };
-    
 
 
+    /**
+     * @brief Struct representing the state of an FEL (Free Electron Laser) simulation.
+     * 
+     * @tparam scalar Type of the scalar values (e.g., float, double).
+     */
     template <typename scalar>
-    // clang-format off
-    struct FELSimulationState{
-        
-        //Sorry, can't do more than 3d
+    struct FELSimulationState {
+        // clang-format off
 
-        constexpr static unsigned int dim = 3;
-        using Vector_t = ippl::Vector<scalar, 3>;
-        using value_type = ippl::Vector<scalar, 4>;
-        using EB_type = ippl::Vector<ippl::Vector<scalar, 3>, 2>;
-        using Mesh_t               = ippl::UniformCartesian<scalar, dim>;
+        constexpr static unsigned int dim = 3; ///< Dimensionality of the simulation (3D).
+        using Vector_t = ippl::Vector<scalar, dim>; ///< Type alias for a 3D vector.
+        using value_type = ippl::Vector<scalar, 4>; ///< Type alias for a 4D vector.
+        using EB_type = ippl::Vector<ippl::Vector<scalar, 3>, 2>; ///< Type alias for an electric and magnetic field vector pair.
+        using Mesh_t = ippl::UniformCartesian<scalar, dim>; ///< Type alias for a uniform Cartesian mesh.
 
-        bool periodic_bc;
+        bool periodic_bc; ///< Flag indicating if periodic boundary conditions are used.
+
+        /// Type alias for a field with 4D vector values.
         using FourField = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for a field with 3D vector values.
         using ThreeField = ippl::Field<Vector_t, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for a field with 4D vector values (repeated for consistency).
         using VField_t = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        using EBField_t = ippl::Field<EB_type   , dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for a field with electric and magnetic field vector pairs.
+        using EBField_t = ippl::Field<EB_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for the view type of a field with 4D vector values.
         using view_type = typename VField_t::view_type;
+
+        /// Type alias for the view type of a field with electric and magnetic field vector pairs.
         using ev_view_type = typename EBField_t::view_type;
+
+        /// Type alias for the view type of a field with 3D vector values (electric field).
         using e_view_type = typename ThreeField::view_type;
+
+        /// Type alias for the view type of a field with 3D vector values (magnetic field).
         using b_view_type = typename ThreeField::view_type;
-        //Fields
+
+        /// Solver and particle handler.
         ippl::NSFDSolverWithParticles<scalar, absorbing> fieldsAndParticles;
-        
-        //Discretization options
-        Vector_t hr_m;
-        ippl::Vector<uint32_t, 3> nr_global;
-        ippl::Vector<uint32_t, 3> nr_local;
-        //scalar dt;
-        config m_config;
-        UniaxialLorentzframe<scalar, 2 /*along z*/> ulb;
-        undulator_parameters<scalar> uparams;
-        Undulator<scalar> undulator;
+
+        Vector_t hr_m; ///< Mesh spacing vector.
+        ippl::Vector<uint32_t, dim> nr_global; ///< Global number of cells in each dimension.
+        ippl::Vector<uint32_t, dim> nr_local; ///< Local number of cells in each dimension.
+        config m_config; ///< Configuration object.
+        UniaxialLorentzframe<scalar, 2> ulb; ///< Uniaxial Lorentz frame transformation along z-axis.
+        undulator_parameters<scalar> uparams; ///< Undulator parameters.
+        Undulator<scalar> undulator; ///< Undulator object.
+
         /**
-         * @brief Construct a new FDTDFieldState object
-         * Mesh and resolution parameter are technically redundant
-         * @details ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5) is the frame's gamma factor
-         * @param resolution 
-         * @param layout 
-         * @param mesch 
+         * @brief Construct a new FELSimulationState object.
+         * 
+         * Initializes the simulation state with the given field layout, mesh, number of particles, and configuration.
+         * 
+         * @details 
+         * - `ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5)` is the frame's gamma factor.
+         * 
+         * @param layout Field layout.
+         * @param mesch Mesh object.
+         * @param nparticles Number of particles.
+         * @param cfg Configuration object.
          */
-        FELSimulationState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) : fieldsAndParticles(layout, mesch, nparticles), m_config(cfg), ulb(UniaxialLorentzframe<scalar, 2>::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){
+        FELSimulationState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) 
+            : fieldsAndParticles(layout, mesch, nparticles), 
+              m_config(cfg), 
+              ulb(UniaxialLorentzframe<scalar, 2>::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), 
+              uparams(cfg), 
+              undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m) {
 
             hr_m = mesch.getMeshSpacing();
-            nr_global = ippl::Vector<uint32_t, 3>{
+            nr_global = ippl::Vector<uint32_t, dim>{
                 uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
                 uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
                 uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)
             };
-            nr_local = ippl::Vector<uint32_t, 3>{
+            nr_local = ippl::Vector<uint32_t, dim>{
                 uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1),
                 uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1),
                 uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1)
             };
-            //std::cout << "NR_M_g: " << nr_global << "\n";
-            //std::cout << "NR_M_l: " << nr_local << "\n";
         }
-        scalar dt()const noexcept{
+
+        /**
+         * @brief Get the time step size.
+         * 
+         * @return scalar Time step size.
+         */
+        scalar dt() const noexcept {
             return fieldsAndParticles.field_solver.dt;
         }
-        void step(){
-            //scalar time = fieldsAndParticles.steps_taken * fieldsAndParticles.field_solver.dt;
+
+        /**
+         * @brief Perform a simulation step.
+         * 
+         * Solves the fields and updates the state using the undulator and Lorentz frame transformations.
+         */
+        void step() {
             auto und = this->undulator;
             auto lb = this->ulb;
-            fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector<scalar, 3> pos, scalar time){
+            fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector<scalar, 3> pos, scalar time) {
                 lb.primedToUnprimed(pos, time);
                 auto eb = und(pos);
                 return lb.transform_EB(eb);
             });
         }
-        void computeRadiation(){
-
-        }
     };
     // clang-format on
 }  // namespace ippl
@@ -1446,22 +1475,41 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
 
     return true;
 }
+/**
+ * @brief Helper function to gather data from a field using interpolation.
+ * 
+ * This function computes the value at a given position by interpolating the values in the field.
+ * The position is transformed from physical coordinates to grid coordinates and then used to 
+ * gather data from the field. The interpolation weights are computed based on the relative 
+ * positions within the grid cells.
+ * 
+ * @tparam View Type of the field view.
+ * @tparam T Scalar type for coordinates and grid spacing.
+ * @tparam Dim Dimensionality of the position vector.
+ * 
+ * @param v The field view from which data is to be gathered.
+ * @param pos The position vector in physical coordinates where the data is to be gathered.
+ * @param origin The origin of the grid in physical coordinates.
+ * @param hr The grid spacing vector.
+ * @param lDom The local domain indices of the grid.
+ * 
+ * @return typename View::value_type The interpolated value at the specified position.
+ */
 template<typename View, typename T, unsigned Dim>
 KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector<T, Dim>& pos, const ippl::Vector<T, 3>& origin, const ippl::Vector<T, 3>& hr, const ippl::NDIndex<3>& lDom){
     using vector_type = ippl::Vector<T, 3>;
 
     vector_type l;
-    //vector_type origin = v.get_mesh().getOrigin();
-    //auto lDom = v.getLayout().getLocalNDIndex();
-    //vector_type hr = v.get_mesh().getMeshSpacing();
+
     for(unsigned k = 0;k < 3;k++){
-        l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong
+        l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented in a way such that this 1 is necessary here
     }                     
 
     ippl::Vector<int, 3> index{int(l[0]), int(l[1]), int(l[2])};
     ippl::Vector<T, 3> whi = l - index;
     ippl::Vector<T, 3> wlo(1.0);
     wlo -= whi;
+
     //TODO: nghost
     ippl::Vector<size_t, 3> args = index - lDom.first() + 1;
     for(unsigned k = 0;k < 3;k++){
@@ -1469,7 +1517,6 @@ KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, co
             return typename View::value_type(0);
         }
     }
-    //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n";
     return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/;
 
 }
@@ -1501,12 +1548,9 @@ scalar test_gauss_law(uint32_t n){
     auto pview = field_state.particles.R.getView();
     auto p1view = field_state.particles.R_nm1.getView();
 
-    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
     Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
-    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
     
     Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
-        //bunch.gammaBeta[i].fill(scalar(0));
         auto state = random_pool.get_state();
         pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
         pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
@@ -1526,12 +1570,10 @@ scalar test_gauss_law(uint32_t n){
     
     std::ofstream line("gauss_line.txt");
     typename ippl::FELSimulationState<scalar>::e_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.E.getView());
-    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
     for(unsigned i = 1;i < nr[2];i++){
         vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
         pos[2] += hx[2] * scalar(i);
         ippl::Vector<scalar, 3> ebg = gather_helper(view, pos, origin, hx, lDom);
-        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
         line << pos.norm() * unit_length_in_meters << " " << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
     }
     return 0.0f;
@@ -1546,10 +1588,10 @@ scalar test_amperes_law(uint32_t n){
     isParallel.fill(false);
     isParallel[2] = true;
         
-        // all parallel layout, standard domain, normal axis order
+    // all parallel layout, standard domain, normal axis order
     ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
 
-        //[-1, 1] box
+    //[-1, 1] box
     ippl::Vector<scalar, 3> hx;
     for(unsigned d = 0;d < 3;d++){
         hx[d] = extents[d] / (scalar)nr[d];
@@ -1566,9 +1608,7 @@ scalar test_amperes_law(uint32_t n){
     auto p1view = field_state.particles.R_nm1.getView();
     constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
     scalar timestep = field_state.field_solver.dt;
-    //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
     Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
-    //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end());
     
     Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
         //bunch.gammaBeta[i].fill(scalar(0));
@@ -1620,31 +1660,7 @@ int main(int argc, char* argv[]) {
         const scalar frame_gammabeta = frame_gamma * frame_beta;
         UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
         ippl::undulator_parameters<scalar> uparams(cfg);
-        /*const scalar k_u  = scalar(2.0 * M_PI) / uparams.lambda;
-        const scalar distance_to_entry  = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma);
-        auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector<scalar, 3>& position_in_lab_frame){
-            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret;
-            ret.first.fill(0);
-            ret.second.fill(0);
-
-            if(position_in_lab_frame[2] < distance_to_entry){
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator < 0);
-                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5));
-                ret.second[0] = 0;
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal;
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal;
-            }
-            else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator >= 0);
-                ret.second[0] = 0;
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator);
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator);
-            }
-            return ret;
 
-        };*/
         BunchInitialize<scalar> mithra_config = generate_mithra_config(cfg, frame_boost);
         ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
 
@@ -1689,8 +1705,6 @@ int main(int argc, char* argv[]) {
             });
         }
         fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO);
-        //fdtd_state.scatterBunch();
-        //std::cout << cfg.charge << "\n";
         
         size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt());
         uint64_t starttime =  nanoTime();
@@ -1706,23 +1720,16 @@ int main(int argc, char* argv[]) {
 
 
         for(size_t i = 0;i < timesteps_required;i++){
-
-            //fdtd_state.J = scalar(0.0);
-            //fdtd_state.playout.update(fdtd_state.particles);
-            //fdtd_state.scatterBunch();
-            std::cout << i << "\n";
+            if(ippl::Comm->rank() == 0){
+                std::cout << "Doing step: " << i << std::endl;
+            }
             fdtd_state.step();
-            //fdtd_state.fieldStep();
-            //fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field);
             auto ldom = layout.getLocalNDIndex();
             auto nrg = fdtd_state.nr_global;
             auto eview = fdtd_state.fieldsAndParticles.E.getView();
             auto bview = fdtd_state.fieldsAndParticles.B.getView();
-            //auto ebv = fdtd_state.EB.getView();
             double radiation = 0.0;
             Kokkos::parallel_reduce(ippl::getRangePolicy(eview, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
-                //uint32_t ig = i + ldom.first()[0];
-                //uint32_t jg = j + ldom.first()[1];
                 Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{eview(i,j,k), bview(i,j,k)};
                 ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
                 ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
@@ -1742,8 +1749,7 @@ int main(int argc, char* argv[]) {
                 frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i);
                 rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
             }
-            //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n";
-            //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n";
+            
             int rank = ippl::Comm->rank();
             int size = ippl::Comm->size();
             if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
@@ -1779,7 +1785,6 @@ int main(int argc, char* argv[]) {
                 Kokkos::deep_copy(eh, fdtd_state.fieldsAndParticles.E.getView());
                 Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView());
 
-                //double exp_avg = double(exp_sum) / double(acount);
                 {
                     for(int i = 1;i < img_width;i++){
                         for(int j = 1;j < img_height;j++){
@@ -1809,10 +1814,12 @@ int main(int argc, char* argv[]) {
                         }
                     }
                 }
+                /*
+                 * This code should be replaced by the Image Gathering anyway, but that's a different PR 
+                 */
                 int mask = 1;
                 while (mask < size) {
                     int partner = rank ^ mask;
-                    //if((rank & (mask - 1)) == 0)
                     {
                         if ((rank & mask) == 0) {
                             // Send data to partner
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index a1cecb131..52ec14529 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -57,7 +57,7 @@ namespace ippl {
     enum fdtd_bc{
         periodic, absorbing
     };
-
+    
     template <typename EMField, typename FourField, fdtd_bc boundary_conditions = periodic>
     class StandardFDTDSolver : Maxwell<EMField, FourField> {
         public:
@@ -678,8 +678,8 @@ namespace ippl {
             Kokkos::fence();
             for(int bts = 0;bts < 3;bts++){
                 
-                particles.E_gather.gather(E, particles.R);
-                particles.B_gather.gather(B, particles.R);
+                particles.E_gather.gather(E, particles.R, /*offset = */0.0);
+                particles.B_gather.gather(B, particles.R, /*offset = */0.0);
                 Kokkos::fence();
                 Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
                     const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h
index 093e2b519..0702489f3 100644
--- a/src/Particle/ParticleAttrib.h
+++ b/src/Particle/ParticleAttrib.h
@@ -124,7 +124,7 @@ namespace ippl {
                      const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp) const;
 
         template <typename Field, typename P2>
-        void gather(Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp);
+        void gather(Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp, const typename Field::Mesh_t::vector_type::value_type offset = 0.5);
 
         T sum();
         T max();
diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index 4a8f0010a..7e0a58fbf 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -155,7 +155,8 @@ namespace ippl {
     template <typename T, class... Properties>
     template <typename Field, typename P2>
     void ParticleAttrib<T, Properties...>::gather(
-        Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp) {
+        Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp,
+        const typename Field::Mesh_t::vector_type::value_type offset) {
         constexpr unsigned Dim = Field::dim;
         using PositionType     = typename Field::Mesh_t::value_type;
 
@@ -186,7 +187,7 @@ namespace ippl {
             "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)),
             KOKKOS_CLASS_LAMBDA(const size_t idx) {
                 // find nearest grid point
-                vector_type l                        = (pp(idx) - origin) * invdx + 1.0;
+                vector_type l                        = (pp(idx) - origin) * invdx + (1.0 - offset);
                 Vector<int, Field::dim> index        = l;
                 Vector<PositionType, Field::dim> whi = l - index;
                 Vector<PositionType, Field::dim> wlo = 1.0 - whi;
@@ -194,7 +195,7 @@ namespace ippl {
                 Vector<size_t, Field::dim> args = index - lDom.first() + nghost;
 
                 // gather
-                //std::cout << index << std::endl;
+                // std::cout << index << std::endl;
                 dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{},
                                                        view, wlo, whi, args);
             });

From 5d6c9743095c0643f548532e17389288e358f026 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Sat, 18 May 2024 14:49:50 +0200
Subject: [PATCH 29/32] File Split and clang-format

---
 alpine/FreeElectronLaser.cpp                | 1924 +++++++++----------
 alpine/Undulator.h                          |  238 +++
 alpine/units.h                              |   37 +
 src/MaxwellSolvers/FDTD.h                   |  784 ++++----
 test/solver/TestFDTDSolverWithParticles.cpp |    2 +-
 5 files changed, 1551 insertions(+), 1434 deletions(-)
 create mode 100644 alpine/Undulator.h
 create mode 100644 alpine/units.h

diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp
index f03190583..e2a0c8551 100644
--- a/alpine/FreeElectronLaser.cpp
+++ b/alpine/FreeElectronLaser.cpp
@@ -1,412 +1,238 @@
 #include "Ippl.h"
 
+#include <chrono>
+#include <cstdio>  // For popen
+
 #include "Types/Vector.h"
 
 #include "Field/Field.h"
+
 #include "MaxwellSolvers/FDTD.h"
-#include <chrono>
-#include <cstdio> // For popen
-#define JSON_HAS_RANGES 0 //Merlin compilation
-#include <json.hpp>
+#define JSON_HAS_RANGES 0  // Merlin compilation
+#include <Kokkos_Random.hpp>
 #include <fstream>
+#include <json.hpp>
 #include <list>
-#include <Kokkos_Random.hpp>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.hpp>
 
+#include "Undulator.h"
+#include "units.h"
 
 constexpr float turbo_cm[256][3] = {
-  {0.18995,0.07176,0.23217},
-  {0.19483,0.08339,0.26149},
-  {0.19956,0.09498,0.29024},
-  {0.20415,0.10652,0.31844},
-  {0.20860,0.11802,0.34607},
-  {0.21291,0.12947,0.37314},
-  {0.21708,0.14087,0.39964},
-  {0.22111,0.15223,0.42558},
-  {0.22500,0.16354,0.45096},
-  {0.22875,0.17481,0.47578},
-  {0.23236,0.18603,0.50004},
-  {0.23582,0.19720,0.52373},
-  {0.23915,0.20833,0.54686},
-  {0.24234,0.21941,0.56942},
-  {0.24539,0.23044,0.59142},
-  {0.24830,0.24143,0.61286},
-  {0.25107,0.25237,0.63374},
-  {0.25369,0.26327,0.65406},
-  {0.25618,0.27412,0.67381},
-  {0.25853,0.28492,0.69300},
-  {0.26074,0.29568,0.71162},
-  {0.26280,0.30639,0.72968},
-  {0.26473,0.31706,0.74718},
-  {0.26652,0.32768,0.76412},
-  {0.26816,0.33825,0.78050},
-  {0.26967,0.34878,0.79631},
-  {0.27103,0.35926,0.81156},
-  {0.27226,0.36970,0.82624},
-  {0.27334,0.38008,0.84037},
-  {0.27429,0.39043,0.85393},
-  {0.27509,0.40072,0.86692},
-  {0.27576,0.41097,0.87936},
-  {0.27628,0.42118,0.89123},
-  {0.27667,0.43134,0.90254},
-  {0.27691,0.44145,0.91328},
-  {0.27701,0.45152,0.92347},
-  {0.27698,0.46153,0.93309},
-  {0.27680,0.47151,0.94214},
-  {0.27648,0.48144,0.95064},
-  {0.27603,0.49132,0.95857},
-  {0.27543,0.50115,0.96594},
-  {0.27469,0.51094,0.97275},
-  {0.27381,0.52069,0.97899},
-  {0.27273,0.53040,0.98461},
-  {0.27106,0.54015,0.98930},
-  {0.26878,0.54995,0.99303},
-  {0.26592,0.55979,0.99583},
-  {0.26252,0.56967,0.99773},
-  {0.25862,0.57958,0.99876},
-  {0.25425,0.58950,0.99896},
-  {0.24946,0.59943,0.99835},
-  {0.24427,0.60937,0.99697},
-  {0.23874,0.61931,0.99485},
-  {0.23288,0.62923,0.99202},
-  {0.22676,0.63913,0.98851},
-  {0.22039,0.64901,0.98436},
-  {0.21382,0.65886,0.97959},
-  {0.20708,0.66866,0.97423},
-  {0.20021,0.67842,0.96833},
-  {0.19326,0.68812,0.96190},
-  {0.18625,0.69775,0.95498},
-  {0.17923,0.70732,0.94761},
-  {0.17223,0.71680,0.93981},
-  {0.16529,0.72620,0.93161},
-  {0.15844,0.73551,0.92305},
-  {0.15173,0.74472,0.91416},
-  {0.14519,0.75381,0.90496},
-  {0.13886,0.76279,0.89550},
-  {0.13278,0.77165,0.88580},
-  {0.12698,0.78037,0.87590},
-  {0.12151,0.78896,0.86581},
-  {0.11639,0.79740,0.85559},
-  {0.11167,0.80569,0.84525},
-  {0.10738,0.81381,0.83484},
-  {0.10357,0.82177,0.82437},
-  {0.10026,0.82955,0.81389},
-  {0.09750,0.83714,0.80342},
-  {0.09532,0.84455,0.79299},
-  {0.09377,0.85175,0.78264},
-  {0.09287,0.85875,0.77240},
-  {0.09267,0.86554,0.76230},
-  {0.09320,0.87211,0.75237},
-  {0.09451,0.87844,0.74265},
-  {0.09662,0.88454,0.73316},
-  {0.09958,0.89040,0.72393},
-  {0.10342,0.89600,0.71500},
-  {0.10815,0.90142,0.70599},
-  {0.11374,0.90673,0.69651},
-  {0.12014,0.91193,0.68660},
-  {0.12733,0.91701,0.67627},
-  {0.13526,0.92197,0.66556},
-  {0.14391,0.92680,0.65448},
-  {0.15323,0.93151,0.64308},
-  {0.16319,0.93609,0.63137},
-  {0.17377,0.94053,0.61938},
-  {0.18491,0.94484,0.60713},
-  {0.19659,0.94901,0.59466},
-  {0.20877,0.95304,0.58199},
-  {0.22142,0.95692,0.56914},
-  {0.23449,0.96065,0.55614},
-  {0.24797,0.96423,0.54303},
-  {0.26180,0.96765,0.52981},
-  {0.27597,0.97092,0.51653},
-  {0.29042,0.97403,0.50321},
-  {0.30513,0.97697,0.48987},
-  {0.32006,0.97974,0.47654},
-  {0.33517,0.98234,0.46325},
-  {0.35043,0.98477,0.45002},
-  {0.36581,0.98702,0.43688},
-  {0.38127,0.98909,0.42386},
-  {0.39678,0.99098,0.41098},
-  {0.41229,0.99268,0.39826},
-  {0.42778,0.99419,0.38575},
-  {0.44321,0.99551,0.37345},
-  {0.45854,0.99663,0.36140},
-  {0.47375,0.99755,0.34963},
-  {0.48879,0.99828,0.33816},
-  {0.50362,0.99879,0.32701},
-  {0.51822,0.99910,0.31622},
-  {0.53255,0.99919,0.30581},
-  {0.54658,0.99907,0.29581},
-  {0.56026,0.99873,0.28623},
-  {0.57357,0.99817,0.27712},
-  {0.58646,0.99739,0.26849},
-  {0.59891,0.99638,0.26038},
-  {0.61088,0.99514,0.25280},
-  {0.62233,0.99366,0.24579},
-  {0.63323,0.99195,0.23937},
-  {0.64362,0.98999,0.23356},
-  {0.65394,0.98775,0.22835},
-  {0.66428,0.98524,0.22370},
-  {0.67462,0.98246,0.21960},
-  {0.68494,0.97941,0.21602},
-  {0.69525,0.97610,0.21294},
-  {0.70553,0.97255,0.21032},
-  {0.71577,0.96875,0.20815},
-  {0.72596,0.96470,0.20640},
-  {0.73610,0.96043,0.20504},
-  {0.74617,0.95593,0.20406},
-  {0.75617,0.95121,0.20343},
-  {0.76608,0.94627,0.20311},
-  {0.77591,0.94113,0.20310},
-  {0.78563,0.93579,0.20336},
-  {0.79524,0.93025,0.20386},
-  {0.80473,0.92452,0.20459},
-  {0.81410,0.91861,0.20552},
-  {0.82333,0.91253,0.20663},
-  {0.83241,0.90627,0.20788},
-  {0.84133,0.89986,0.20926},
-  {0.85010,0.89328,0.21074},
-  {0.85868,0.88655,0.21230},
-  {0.86709,0.87968,0.21391},
-  {0.87530,0.87267,0.21555},
-  {0.88331,0.86553,0.21719},
-  {0.89112,0.85826,0.21880},
-  {0.89870,0.85087,0.22038},
-  {0.90605,0.84337,0.22188},
-  {0.91317,0.83576,0.22328},
-  {0.92004,0.82806,0.22456},
-  {0.92666,0.82025,0.22570},
-  {0.93301,0.81236,0.22667},
-  {0.93909,0.80439,0.22744},
-  {0.94489,0.79634,0.22800},
-  {0.95039,0.78823,0.22831},
-  {0.95560,0.78005,0.22836},
-  {0.96049,0.77181,0.22811},
-  {0.96507,0.76352,0.22754},
-  {0.96931,0.75519,0.22663},
-  {0.97323,0.74682,0.22536},
-  {0.97679,0.73842,0.22369},
-  {0.98000,0.73000,0.22161},
-  {0.98289,0.72140,0.21918},
-  {0.98549,0.71250,0.21650},
-  {0.98781,0.70330,0.21358},
-  {0.98986,0.69382,0.21043},
-  {0.99163,0.68408,0.20706},
-  {0.99314,0.67408,0.20348},
-  {0.99438,0.66386,0.19971},
-  {0.99535,0.65341,0.19577},
-  {0.99607,0.64277,0.19165},
-  {0.99654,0.63193,0.18738},
-  {0.99675,0.62093,0.18297},
-  {0.99672,0.60977,0.17842},
-  {0.99644,0.59846,0.17376},
-  {0.99593,0.58703,0.16899},
-  {0.99517,0.57549,0.16412},
-  {0.99419,0.56386,0.15918},
-  {0.99297,0.55214,0.15417},
-  {0.99153,0.54036,0.14910},
-  {0.98987,0.52854,0.14398},
-  {0.98799,0.51667,0.13883},
-  {0.98590,0.50479,0.13367},
-  {0.98360,0.49291,0.12849},
-  {0.98108,0.48104,0.12332},
-  {0.97837,0.46920,0.11817},
-  {0.97545,0.45740,0.11305},
-  {0.97234,0.44565,0.10797},
-  {0.96904,0.43399,0.10294},
-  {0.96555,0.42241,0.09798},
-  {0.96187,0.41093,0.09310},
-  {0.95801,0.39958,0.08831},
-  {0.95398,0.38836,0.08362},
-  {0.94977,0.37729,0.07905},
-  {0.94538,0.36638,0.07461},
-  {0.94084,0.35566,0.07031},
-  {0.93612,0.34513,0.06616},
-  {0.93125,0.33482,0.06218},
-  {0.92623,0.32473,0.05837},
-  {0.92105,0.31489,0.05475},
-  {0.91572,0.30530,0.05134},
-  {0.91024,0.29599,0.04814},
-  {0.90463,0.28696,0.04516},
-  {0.89888,0.27824,0.04243},
-  {0.89298,0.26981,0.03993},
-  {0.88691,0.26152,0.03753},
-  {0.88066,0.25334,0.03521},
-  {0.87422,0.24526,0.03297},
-  {0.86760,0.23730,0.03082},
-  {0.86079,0.22945,0.02875},
-  {0.85380,0.22170,0.02677},
-  {0.84662,0.21407,0.02487},
-  {0.83926,0.20654,0.02305},
-  {0.83172,0.19912,0.02131},
-  {0.82399,0.19182,0.01966},
-  {0.81608,0.18462,0.01809},
-  {0.80799,0.17753,0.01660},
-  {0.79971,0.17055,0.01520},
-  {0.79125,0.16368,0.01387},
-  {0.78260,0.15693,0.01264},
-  {0.77377,0.15028,0.01148},
-  {0.76476,0.14374,0.01041},
-  {0.75556,0.13731,0.00942},
-  {0.74617,0.13098,0.00851},
-  {0.73661,0.12477,0.00769},
-  {0.72686,0.11867,0.00695},
-  {0.71692,0.11268,0.00629},
-  {0.70680,0.10680,0.00571},
-  {0.69650,0.10102,0.00522},
-  {0.68602,0.09536,0.00481},
-  {0.67535,0.08980,0.00449},
-  {0.66449,0.08436,0.00424},
-  {0.65345,0.07902,0.00408},
-  {0.64223,0.07380,0.00401},
-  {0.63082,0.06868,0.00401},
-  {0.61923,0.06367,0.00410},
-  {0.60746,0.05878,0.00427},
-  {0.59550,0.05399,0.00453},
-  {0.58336,0.04931,0.00486},
-  {0.57103,0.04474,0.00529},
-  {0.55852,0.04028,0.00579},
-  {0.54583,0.03593,0.00638},
-  {0.53295,0.03169,0.00705},
-  {0.51989,0.02756,0.00780},
-  {0.50664,0.02354,0.00863},
-  {0.49321,0.01963,0.00955},
-  {0.47960,0.01583,0.01055}
-};
-
-
-
-
-uint64_t nanoTime(){
+    {0.18995, 0.07176, 0.23217}, {0.19483, 0.08339, 0.26149}, {0.19956, 0.09498, 0.29024},
+    {0.20415, 0.10652, 0.31844}, {0.20860, 0.11802, 0.34607}, {0.21291, 0.12947, 0.37314},
+    {0.21708, 0.14087, 0.39964}, {0.22111, 0.15223, 0.42558}, {0.22500, 0.16354, 0.45096},
+    {0.22875, 0.17481, 0.47578}, {0.23236, 0.18603, 0.50004}, {0.23582, 0.19720, 0.52373},
+    {0.23915, 0.20833, 0.54686}, {0.24234, 0.21941, 0.56942}, {0.24539, 0.23044, 0.59142},
+    {0.24830, 0.24143, 0.61286}, {0.25107, 0.25237, 0.63374}, {0.25369, 0.26327, 0.65406},
+    {0.25618, 0.27412, 0.67381}, {0.25853, 0.28492, 0.69300}, {0.26074, 0.29568, 0.71162},
+    {0.26280, 0.30639, 0.72968}, {0.26473, 0.31706, 0.74718}, {0.26652, 0.32768, 0.76412},
+    {0.26816, 0.33825, 0.78050}, {0.26967, 0.34878, 0.79631}, {0.27103, 0.35926, 0.81156},
+    {0.27226, 0.36970, 0.82624}, {0.27334, 0.38008, 0.84037}, {0.27429, 0.39043, 0.85393},
+    {0.27509, 0.40072, 0.86692}, {0.27576, 0.41097, 0.87936}, {0.27628, 0.42118, 0.89123},
+    {0.27667, 0.43134, 0.90254}, {0.27691, 0.44145, 0.91328}, {0.27701, 0.45152, 0.92347},
+    {0.27698, 0.46153, 0.93309}, {0.27680, 0.47151, 0.94214}, {0.27648, 0.48144, 0.95064},
+    {0.27603, 0.49132, 0.95857}, {0.27543, 0.50115, 0.96594}, {0.27469, 0.51094, 0.97275},
+    {0.27381, 0.52069, 0.97899}, {0.27273, 0.53040, 0.98461}, {0.27106, 0.54015, 0.98930},
+    {0.26878, 0.54995, 0.99303}, {0.26592, 0.55979, 0.99583}, {0.26252, 0.56967, 0.99773},
+    {0.25862, 0.57958, 0.99876}, {0.25425, 0.58950, 0.99896}, {0.24946, 0.59943, 0.99835},
+    {0.24427, 0.60937, 0.99697}, {0.23874, 0.61931, 0.99485}, {0.23288, 0.62923, 0.99202},
+    {0.22676, 0.63913, 0.98851}, {0.22039, 0.64901, 0.98436}, {0.21382, 0.65886, 0.97959},
+    {0.20708, 0.66866, 0.97423}, {0.20021, 0.67842, 0.96833}, {0.19326, 0.68812, 0.96190},
+    {0.18625, 0.69775, 0.95498}, {0.17923, 0.70732, 0.94761}, {0.17223, 0.71680, 0.93981},
+    {0.16529, 0.72620, 0.93161}, {0.15844, 0.73551, 0.92305}, {0.15173, 0.74472, 0.91416},
+    {0.14519, 0.75381, 0.90496}, {0.13886, 0.76279, 0.89550}, {0.13278, 0.77165, 0.88580},
+    {0.12698, 0.78037, 0.87590}, {0.12151, 0.78896, 0.86581}, {0.11639, 0.79740, 0.85559},
+    {0.11167, 0.80569, 0.84525}, {0.10738, 0.81381, 0.83484}, {0.10357, 0.82177, 0.82437},
+    {0.10026, 0.82955, 0.81389}, {0.09750, 0.83714, 0.80342}, {0.09532, 0.84455, 0.79299},
+    {0.09377, 0.85175, 0.78264}, {0.09287, 0.85875, 0.77240}, {0.09267, 0.86554, 0.76230},
+    {0.09320, 0.87211, 0.75237}, {0.09451, 0.87844, 0.74265}, {0.09662, 0.88454, 0.73316},
+    {0.09958, 0.89040, 0.72393}, {0.10342, 0.89600, 0.71500}, {0.10815, 0.90142, 0.70599},
+    {0.11374, 0.90673, 0.69651}, {0.12014, 0.91193, 0.68660}, {0.12733, 0.91701, 0.67627},
+    {0.13526, 0.92197, 0.66556}, {0.14391, 0.92680, 0.65448}, {0.15323, 0.93151, 0.64308},
+    {0.16319, 0.93609, 0.63137}, {0.17377, 0.94053, 0.61938}, {0.18491, 0.94484, 0.60713},
+    {0.19659, 0.94901, 0.59466}, {0.20877, 0.95304, 0.58199}, {0.22142, 0.95692, 0.56914},
+    {0.23449, 0.96065, 0.55614}, {0.24797, 0.96423, 0.54303}, {0.26180, 0.96765, 0.52981},
+    {0.27597, 0.97092, 0.51653}, {0.29042, 0.97403, 0.50321}, {0.30513, 0.97697, 0.48987},
+    {0.32006, 0.97974, 0.47654}, {0.33517, 0.98234, 0.46325}, {0.35043, 0.98477, 0.45002},
+    {0.36581, 0.98702, 0.43688}, {0.38127, 0.98909, 0.42386}, {0.39678, 0.99098, 0.41098},
+    {0.41229, 0.99268, 0.39826}, {0.42778, 0.99419, 0.38575}, {0.44321, 0.99551, 0.37345},
+    {0.45854, 0.99663, 0.36140}, {0.47375, 0.99755, 0.34963}, {0.48879, 0.99828, 0.33816},
+    {0.50362, 0.99879, 0.32701}, {0.51822, 0.99910, 0.31622}, {0.53255, 0.99919, 0.30581},
+    {0.54658, 0.99907, 0.29581}, {0.56026, 0.99873, 0.28623}, {0.57357, 0.99817, 0.27712},
+    {0.58646, 0.99739, 0.26849}, {0.59891, 0.99638, 0.26038}, {0.61088, 0.99514, 0.25280},
+    {0.62233, 0.99366, 0.24579}, {0.63323, 0.99195, 0.23937}, {0.64362, 0.98999, 0.23356},
+    {0.65394, 0.98775, 0.22835}, {0.66428, 0.98524, 0.22370}, {0.67462, 0.98246, 0.21960},
+    {0.68494, 0.97941, 0.21602}, {0.69525, 0.97610, 0.21294}, {0.70553, 0.97255, 0.21032},
+    {0.71577, 0.96875, 0.20815}, {0.72596, 0.96470, 0.20640}, {0.73610, 0.96043, 0.20504},
+    {0.74617, 0.95593, 0.20406}, {0.75617, 0.95121, 0.20343}, {0.76608, 0.94627, 0.20311},
+    {0.77591, 0.94113, 0.20310}, {0.78563, 0.93579, 0.20336}, {0.79524, 0.93025, 0.20386},
+    {0.80473, 0.92452, 0.20459}, {0.81410, 0.91861, 0.20552}, {0.82333, 0.91253, 0.20663},
+    {0.83241, 0.90627, 0.20788}, {0.84133, 0.89986, 0.20926}, {0.85010, 0.89328, 0.21074},
+    {0.85868, 0.88655, 0.21230}, {0.86709, 0.87968, 0.21391}, {0.87530, 0.87267, 0.21555},
+    {0.88331, 0.86553, 0.21719}, {0.89112, 0.85826, 0.21880}, {0.89870, 0.85087, 0.22038},
+    {0.90605, 0.84337, 0.22188}, {0.91317, 0.83576, 0.22328}, {0.92004, 0.82806, 0.22456},
+    {0.92666, 0.82025, 0.22570}, {0.93301, 0.81236, 0.22667}, {0.93909, 0.80439, 0.22744},
+    {0.94489, 0.79634, 0.22800}, {0.95039, 0.78823, 0.22831}, {0.95560, 0.78005, 0.22836},
+    {0.96049, 0.77181, 0.22811}, {0.96507, 0.76352, 0.22754}, {0.96931, 0.75519, 0.22663},
+    {0.97323, 0.74682, 0.22536}, {0.97679, 0.73842, 0.22369}, {0.98000, 0.73000, 0.22161},
+    {0.98289, 0.72140, 0.21918}, {0.98549, 0.71250, 0.21650}, {0.98781, 0.70330, 0.21358},
+    {0.98986, 0.69382, 0.21043}, {0.99163, 0.68408, 0.20706}, {0.99314, 0.67408, 0.20348},
+    {0.99438, 0.66386, 0.19971}, {0.99535, 0.65341, 0.19577}, {0.99607, 0.64277, 0.19165},
+    {0.99654, 0.63193, 0.18738}, {0.99675, 0.62093, 0.18297}, {0.99672, 0.60977, 0.17842},
+    {0.99644, 0.59846, 0.17376}, {0.99593, 0.58703, 0.16899}, {0.99517, 0.57549, 0.16412},
+    {0.99419, 0.56386, 0.15918}, {0.99297, 0.55214, 0.15417}, {0.99153, 0.54036, 0.14910},
+    {0.98987, 0.52854, 0.14398}, {0.98799, 0.51667, 0.13883}, {0.98590, 0.50479, 0.13367},
+    {0.98360, 0.49291, 0.12849}, {0.98108, 0.48104, 0.12332}, {0.97837, 0.46920, 0.11817},
+    {0.97545, 0.45740, 0.11305}, {0.97234, 0.44565, 0.10797}, {0.96904, 0.43399, 0.10294},
+    {0.96555, 0.42241, 0.09798}, {0.96187, 0.41093, 0.09310}, {0.95801, 0.39958, 0.08831},
+    {0.95398, 0.38836, 0.08362}, {0.94977, 0.37729, 0.07905}, {0.94538, 0.36638, 0.07461},
+    {0.94084, 0.35566, 0.07031}, {0.93612, 0.34513, 0.06616}, {0.93125, 0.33482, 0.06218},
+    {0.92623, 0.32473, 0.05837}, {0.92105, 0.31489, 0.05475}, {0.91572, 0.30530, 0.05134},
+    {0.91024, 0.29599, 0.04814}, {0.90463, 0.28696, 0.04516}, {0.89888, 0.27824, 0.04243},
+    {0.89298, 0.26981, 0.03993}, {0.88691, 0.26152, 0.03753}, {0.88066, 0.25334, 0.03521},
+    {0.87422, 0.24526, 0.03297}, {0.86760, 0.23730, 0.03082}, {0.86079, 0.22945, 0.02875},
+    {0.85380, 0.22170, 0.02677}, {0.84662, 0.21407, 0.02487}, {0.83926, 0.20654, 0.02305},
+    {0.83172, 0.19912, 0.02131}, {0.82399, 0.19182, 0.01966}, {0.81608, 0.18462, 0.01809},
+    {0.80799, 0.17753, 0.01660}, {0.79971, 0.17055, 0.01520}, {0.79125, 0.16368, 0.01387},
+    {0.78260, 0.15693, 0.01264}, {0.77377, 0.15028, 0.01148}, {0.76476, 0.14374, 0.01041},
+    {0.75556, 0.13731, 0.00942}, {0.74617, 0.13098, 0.00851}, {0.73661, 0.12477, 0.00769},
+    {0.72686, 0.11867, 0.00695}, {0.71692, 0.11268, 0.00629}, {0.70680, 0.10680, 0.00571},
+    {0.69650, 0.10102, 0.00522}, {0.68602, 0.09536, 0.00481}, {0.67535, 0.08980, 0.00449},
+    {0.66449, 0.08436, 0.00424}, {0.65345, 0.07902, 0.00408}, {0.64223, 0.07380, 0.00401},
+    {0.63082, 0.06868, 0.00401}, {0.61923, 0.06367, 0.00410}, {0.60746, 0.05878, 0.00427},
+    {0.59550, 0.05399, 0.00453}, {0.58336, 0.04931, 0.00486}, {0.57103, 0.04474, 0.00529},
+    {0.55852, 0.04028, 0.00579}, {0.54583, 0.03593, 0.00638}, {0.53295, 0.03169, 0.00705},
+    {0.51989, 0.02756, 0.00780}, {0.50664, 0.02354, 0.00863}, {0.49321, 0.01963, 0.00955},
+    {0.47960, 0.01583, 0.01055}};
+
+uint64_t nanoTime() {
     using namespace std;
     using namespace chrono;
     return duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count();
 }
-//CONFIG
-KOKKOS_INLINE_FUNCTION bool isNaN(float x){
-    #ifdef __CUDA_ARCH__
+// CONFIG
+KOKKOS_INLINE_FUNCTION bool isNaN(float x) {
+#ifdef __CUDA_ARCH__
     return isnan(x);
-    #else
+#else
     return std::isnan(x);
-    #endif
+#endif
 }
-KOKKOS_INLINE_FUNCTION bool isINF(float x){
-    #ifdef __CUDA_ARCH__
+KOKKOS_INLINE_FUNCTION bool isINF(float x) {
+#ifdef __CUDA_ARCH__
     return isinf(x);
-    #else
+#else
     return std::isinf(x);
-    #endif
+#endif
 }
-KOKKOS_INLINE_FUNCTION bool isNaN(double x){
-    #ifdef __CUDA_ARCH__
+KOKKOS_INLINE_FUNCTION bool isNaN(double x) {
+#ifdef __CUDA_ARCH__
     return isnan(x);
-    #else
+#else
     return std::isnan(x);
-    #endif
+#endif
 }
-KOKKOS_INLINE_FUNCTION bool isINF(double x){
-    #ifdef __CUDA_ARCH__
+KOKKOS_INLINE_FUNCTION bool isINF(double x) {
+#ifdef __CUDA_ARCH__
     return isinf(x);
-    #else
+#else
     return std::isinf(x);
-    #endif
+#endif
 }
-template<unsigned int Dim, typename callable, typename... Ts>
-KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> from, ippl::Vector<uint32_t, Dim> to, Ts... args){
-    if constexpr(sizeof...(Ts) == Dim){
+template <unsigned int Dim, typename callable, typename... Ts>
+KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> from,
+                                       ippl::Vector<uint32_t, Dim> to, Ts... args) {
+    if constexpr (sizeof...(Ts) == Dim) {
         c(args...);
-    }
-    else{
-        for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){
+    } else {
+        for (uint32_t i = from[sizeof...(Ts)]; i < to[sizeof...(Ts)]; i++) {
             serial_for(c, from, to, args..., i);
         }
     }
 }
 
-
-
-
-
 struct config {
     using scalar = double;
-    
+
     // GRID PARAMETERS
-    ippl::Vector<uint32_t, 3> resolution;               // Grid resolution in 3D
-    ippl::Vector<scalar, 3> extents;                    // Physical extents of the grid in each dimension
-    scalar total_time;                                  // Total simulation time
-    scalar timestep_ratio;                              // Ratio of timestep to some reference value
+    ippl::Vector<uint32_t, 3> resolution;  // Grid resolution in 3D
+    ippl::Vector<scalar, 3> extents;       // Physical extents of the grid in each dimension
+    scalar total_time;                     // Total simulation time
+    scalar timestep_ratio;                 // Ratio of timestep to some reference value
 
-    scalar length_scale_in_jobfile;                     // Length scale defined in the jobfile
-    scalar temporal_scale_in_jobfile;                   // Temporal scale defined in the jobfile
+    scalar length_scale_in_jobfile;    // Length scale defined in the jobfile
+    scalar temporal_scale_in_jobfile;  // Temporal scale defined in the jobfile
 
     // PARTICLE PARAMETERS
-    scalar charge;                                      // Particle charge in unit_charge
-    scalar mass;                                        // Particle mass in unit_mass
-    uint64_t num_particles;                             // Number of particles in the simulation
-    bool space_charge;                                  // Flag for considering space charge effects
+    scalar charge;           // Particle charge in unit_charge
+    scalar mass;             // Particle mass in unit_mass
+    uint64_t num_particles;  // Number of particles in the simulation
+    bool space_charge;       // Flag for considering space charge effects
 
     // BUNCH PARAMETERS
-    ippl::Vector<scalar, 3> mean_position;              // Mean initial position of the particle bunch
-    ippl::Vector<scalar, 3> sigma_position;             // Standard deviation of the initial position distribution
-    ippl::Vector<scalar, 3> position_truncations;       // Truncations of the position distribution
-    ippl::Vector<scalar, 3> sigma_momentum;             // Standard deviation of the initial momentum distribution
-    scalar bunch_gamma;                                 // Relativistic gamma factor of the bunch
+    ippl::Vector<scalar, 3> mean_position;  // Mean initial position of the particle bunch
+    ippl::Vector<scalar, 3>
+        sigma_position;  // Standard deviation of the initial position distribution
+    ippl::Vector<scalar, 3> position_truncations;  // Truncations of the position distribution
+    ippl::Vector<scalar, 3>
+        sigma_momentum;  // Standard deviation of the initial momentum distribution
+    scalar bunch_gamma;  // Relativistic gamma factor of the bunch
 
     // UNDULATOR PARAMETERS
-    scalar undulator_K;                                 // Undulator parameter K
-    scalar undulator_period;                            // Period of the undulator
-    scalar undulator_length;                            // Length of the undulator
+    scalar undulator_K;       // Undulator parameter K
+    scalar undulator_period;  // Period of the undulator
+    scalar undulator_length;  // Length of the undulator
 
-    uint32_t output_rhythm;                             // Frequency of output in timesteps
-    std::string output_path;                            // Path to output files
-    std::unordered_map<std::string, double> experiment_options; // Additional experimental options
+    uint32_t output_rhythm;                                      // Frequency of output in timesteps
+    std::string output_path;                                     // Path to output files
+    std::unordered_map<std::string, double> experiment_options;  // Additional experimental options
 };
 
-template<typename scalar, unsigned Dim>
-ippl::Vector<scalar, Dim> getVector(const nlohmann::json& j){
-    if(j.is_array()){
+template <typename scalar, unsigned Dim>
+ippl::Vector<scalar, Dim> getVector(const nlohmann::json& j) {
+    if (j.is_array()) {
         assert(j.size() == Dim);
         ippl::Vector<scalar, Dim> ret;
-        for(unsigned i = 0;i < Dim;i++)
+        for (unsigned i = 0; i < Dim; i++)
             ret[i] = (scalar)j[i];
         return ret;
-    }
-    else{
+    } else {
         std::cerr << "Warning: Obtaining Vector from scalar json\n";
         ippl::Vector<scalar, Dim> ret;
         ret.fill((scalar)j);
         return ret;
     }
 }
-template<size_t N, typename T>
+template <size_t N, typename T>
 struct DefaultedStringLiteral {
-    constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) {
+    constexpr DefaultedStringLiteral(const char (&str)[N], const T val)
+        : value(val) {
         std::copy_n(str, N, key);
     }
-    
+
     T value;
     char key[N];
 };
-template<size_t N>
+template <size_t N>
 struct StringLiteral {
-    constexpr StringLiteral(const char (&str)[N]) {
-        std::copy_n(str, N, value);
-    }
-    
+    constexpr StringLiteral(const char (&str)[N]) { std::copy_n(str, N, value); }
+
     char value[N];
-    constexpr DefaultedStringLiteral<N, int> operator>>(int t)const noexcept{
+    constexpr DefaultedStringLiteral<N, int> operator>>(int t) const noexcept {
         return DefaultedStringLiteral<N, int>(value, t);
     }
-    constexpr size_t size()const noexcept{return N - 1;}
+    constexpr size_t size() const noexcept { return N - 1; }
 };
-template<StringLiteral lit>
-constexpr size_t chash(){
+template <StringLiteral lit>
+constexpr size_t chash() {
     size_t hash = 5381;
     int c;
 
-    for(size_t i = 0;i < lit.size();i++){
-        c = lit.value[i];
-        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    for (size_t i = 0; i < lit.size(); i++) {
+        c    = lit.value[i];
+        hash = ((hash << 5) + hash) + c;  // hash * 33 + c
     }
 
     return hash;
@@ -416,18 +242,18 @@ size_t chash(const char* val) {
     int c;
 
     while ((c = *val++)) {
-        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+        hash = ((hash << 5) + hash) + c;  // hash * 33 + c
     }
 
     return hash;
 }
 size_t chash(const std::string& _val) {
-    size_t hash = 5381;
+    size_t hash     = 5381;
     const char* val = _val.c_str();
     int c;
 
     while ((c = *val++)) {
-        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+        hash = ((hash << 5) + hash) + c;  // hash * 33 + c
     }
 
     return hash;
@@ -443,39 +269,41 @@ std::string lowercase_singular(std::string str) {
 
     return str;
 }
-double get_time_multiplier(const nlohmann::json& j){
+double get_time_multiplier(const nlohmann::json& j) {
     std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]);
-    double time_factor = 1.0;
+    double time_factor              = 1.0;
     switch (chash(length_scale_string)) {
         case chash<"planck-time">():
         case chash<"plancktime">():
         case chash<"pt">():
         case chash<"natural">():
             time_factor = unit_time_in_seconds;
-        break;
+            break;
         case chash<"picosecond">():
             time_factor = 1e-12;
-        break;
+            break;
         case chash<"nanosecond">():
             time_factor = 1e-9;
-        break;
+            break;
         case chash<"microsecond">():
             time_factor = 1e-6;
-        break;
+            break;
         case chash<"millisecond">():
             time_factor = 1e-3;
-        break;
+            break;
         case chash<"second">():
             time_factor = 1.0;
-        break;
+            break;
         default:
-            std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n";
-        break;
+            std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"]
+                      << "\n";
+            break;
     }
     return time_factor;
 }
-double get_length_multiplier(const nlohmann::json& options){
-    std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]);
+double get_length_multiplier(const nlohmann::json& options) {
+    std::string length_scale_string =
+        lowercase_singular((std::string)options["mesh"]["length-scale"]);
     double length_factor = 1.0;
     switch (chash(length_scale_string)) {
         case chash<"planck-length">():
@@ -483,29 +311,30 @@ double get_length_multiplier(const nlohmann::json& options){
         case chash<"pl">():
         case chash<"natural">():
             length_factor = unit_length_in_meters;
-        break;
+            break;
         case chash<"picometer">():
             length_factor = 1e-12;
-        break;
+            break;
         case chash<"nanometer">():
             length_factor = 1e-9;
-        break;
+            break;
         case chash<"micrometer">():
             length_factor = 1e-6;
-        break;
+            break;
         case chash<"millimeter">():
             length_factor = 1e-3;
-        break;
+            break;
         case chash<"meter">():
             length_factor = 1.0;
-        break;
+            break;
         default:
-            std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n";
-        break;
+            std::cerr << "Unrecognized length scale: "
+                      << (std::string)options["mesh"]["length-scale"] << "\n";
+            break;
     }
     return length_factor;
 }
-config read_config(const char *filepath){
+config read_config(const char* filepath) {
     std::ifstream cfile(filepath);
     nlohmann::json j;
     cfile >> j;
@@ -518,493 +347,492 @@ config read_config(const char *filepath){
     ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters;
     ret.resolution = getVector<uint32_t, 3>(j["mesh"]["resolution"]);
 
-    //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n";
-    //std::cerr << "Tmult: " << tmult << "\n";
-    if(j.contains("timestep-ratio")){
+    // std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n";
+    // std::cerr << "Tmult: " << tmult << "\n";
+    if (j.contains("timestep-ratio")) {
         ret.timestep_ratio = (config::scalar)j["timestep-ratio"];
     }
 
-    else{
+    else {
         ret.timestep_ratio = 1;
     }
-    ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds;
+    ret.total_time   = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds;
     ret.space_charge = (bool)(j["mesh"]["space-charge"]);
-    ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]);
-    if(ret.bunch_gamma < config::scalar(1)){
+    ret.bunch_gamma  = (config::scalar)(j["bunch"]["gamma"]);
+    if (ret.bunch_gamma < config::scalar(1)) {
         std::cerr << "Gamma must be >= 1\n";
         exit(1);
     }
     assert(j.contains("undulator"));
     assert(j["undulator"].contains("static-undulator"));
 
-    ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"];
-    ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters;
-    ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters;
+    ret.undulator_K      = j["undulator"]["static-undulator"]["undulator-parameter"];
+    ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult)
+                           / unit_length_in_meters;
+    ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult)
+                           / unit_length_in_meters;
     assert(!std::isnan(ret.undulator_length));
     assert(!std::isnan(ret.undulator_period));
     assert(!std::isnan(ret.extents[0]));
     assert(!std::isnan(ret.extents[1]));
     assert(!std::isnan(ret.extents[2]));
     assert(!std::isnan(ret.total_time));
-    ret.length_scale_in_jobfile = get_length_multiplier(j);
+    ret.length_scale_in_jobfile   = get_length_multiplier(j);
     ret.temporal_scale_in_jobfile = get_time_multiplier(j);
-    ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges;
-    ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses;
+    ret.charge        = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges;
+    ret.mass          = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses;
     ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"];
-    ret.mean_position  = getVector<config::scalar, 3>(j["bunch"]["position"])                       * lmult / unit_length_in_meters;
-    ret.sigma_position = getVector<config::scalar, 3>(j["bunch"]["sigma-position"])                 * lmult / unit_length_in_meters;
-    ret.position_truncations = getVector<config::scalar, 3>(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters;
+    ret.mean_position =
+        getVector<config::scalar, 3>(j["bunch"]["position"]) * lmult / unit_length_in_meters;
+    ret.sigma_position =
+        getVector<config::scalar, 3>(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters;
+    ret.position_truncations = getVector<config::scalar, 3>(j["bunch"]["distribution-truncations"])
+                               * lmult / unit_length_in_meters;
     ret.sigma_momentum = getVector<config::scalar, 3>(j["bunch"]["sigma-momentum"]);
-    ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0;
-    ret.output_path = "../data/";
-    if(j["output"].contains("path")){
+    ret.output_rhythm  = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0;
+    ret.output_path    = "../data/";
+    if (j["output"].contains("path")) {
         ret.output_path = j["output"]["path"];
-        if(!ret.output_path.ends_with('/')){
+        if (!ret.output_path.ends_with('/')) {
             ret.output_path.push_back('/');
         }
     }
-    if(j.contains("experimentation")){
+    if (j.contains("experimentation")) {
         nlohmann::json je = j["experimentation"];
-        for(auto it = je.begin(); it!= je.end();it++){
+        for (auto it = je.begin(); it != je.end(); it++) {
             ret.experiment_options[it.key()] = double(it.value());
         }
     }
     return ret;
 }
-template<typename scalar>
+template <typename scalar>
 using FieldVector = ippl::Vector<scalar, 3>;
-template<typename scalar>
+template <typename scalar>
 struct BunchInitialize {
-
-    /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is
-     * manual the charge at points of the position vector will be produced.    				*/
+    /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If
+     * it is manual the charge at points of the position vector will be produced.
+     */
     // std::string     			bunchType_;
 
-    /* Type of the distributions (transverse or longitudinal) in the bunch.				*/
+    /* Type of the distributions (transverse or longitudinal) in the bunch.
+     */
     std::string distribution_;
 
-    /* Type of the generator for creating the bunch distribution.					*/
+    /* Type of the generator for creating the bunch distribution.
+     */
     std::string generator_;
 
-    /* Total number of macroparticles in the bunch.                                                     */
+    /* Total number of macroparticles in the bunch. */
     unsigned int numberOfParticles_;
 
-    /* Total charge of the bunch in pC.                                                                 */
+    /* Total charge of the bunch in pC. */
     scalar cloudCharge_;
 
-    /* Initial energy of the bunch in MeV.                                                              */
+    /* Initial energy of the bunch in MeV. */
     scalar initialGamma_;
 
-    /* Initial normalized speed of the bunch.                                                           */
+    /* Initial normalized speed of the bunch. */
     scalar initialBeta_;
 
-    /* Initial movement direction of the bunch, which is a unit vector.                                 */
+    /* Initial movement direction of the bunch, which is a unit vector. */
     FieldVector<scalar> initialDirection_;
 
-    /* Position of the center of the bunch in the unit of length scale.                           	*/
+    /* Position of the center of the bunch in the unit of length scale. */
     // std::vector<FieldVector<scalar> >	position_;
     FieldVector<scalar> position_;
 
-    /* Number of macroparticles in each direction for 3Dcrystal type.                                   */
+    /* Number of macroparticles in each direction for 3Dcrystal type. */
     FieldVector<unsigned int> numbers_;
 
-    /* Lattice constant in x, y, and z directions for 3D crystal type.                                  */
+    /* Lattice constant in x, y, and z directions for 3D crystal type. */
     FieldVector<scalar> latticeConstants_;
 
     /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal
-     * type, it will be the spread in position for each micro-bunch of the crystal.			*/
+     * type, it will be the spread in position for each micro-bunch of the crystal.
+     */
     FieldVector<scalar> sigmaPosition_;
 
-    /* Spread in energy in each direction.                                                              */
+    /* Spread in energy in each direction. */
     FieldVector<scalar> sigmaGammaBeta_;
 
-    /* Store the truncation transverse distance for the electron generation.				*/
+    /* Store the truncation transverse distance for the electron generation.
+     */
     scalar tranTrun_;
 
-    /* Store the truncation longitudinal distance for the electron generation.				*/
+    /* Store the truncation longitudinal distance for the electron generation.
+     */
     scalar longTrun_;
 
-    /* Name of the file for reading the electrons distribution from.					*/
+    /* Name of the file for reading the electrons distribution from.
+     */
     std::string fileName_;
 
-    /* The radiation wavelength corresponding to the bunch length outside the undulator			*/
+    /* The radiation wavelength corresponding to the bunch length outside the undulator
+     */
     scalar lambda_;
 
-    /* Bunching factor for the initialization of the bunch.						*/
+    /* Bunching factor for the initialization of the bunch.
+     */
     scalar bF_;
 
-    /* Phase of the bunching factor for the initialization of the bunch.				*/
+    /* Phase of the bunching factor for the initialization of the bunch.
+     */
     scalar bFP_;
 
-    /* Boolean flag determining the activation of shot-noise.						*/
+    /* Boolean flag determining the activation of shot-noise.
+     */
     bool shotNoise_;
 
-    /* Initial beta vector of the bunch, which is obtained as the product of beta and direction.	*/
+    /* Initial beta vector of the bunch, which is obtained as the product of beta and direction.
+     */
     FieldVector<scalar> betaVector_;
 
-    /* Initialize the parameters for the bunch initialization to some first values.                     */
+    /* Initialize the parameters for the bunch initialization to some first values. */
     // BunchInitialize ();
 };
 
-//LORENTZ FRAME AND UNDULATOR
-template<typename T, unsigned axis = 2>
-struct UniaxialLorentzframe{
-    constexpr static T c = 1.0;
-    using scalar = T;
-    using Vector3 = ippl::Vector<T, 3>;
-    scalar beta_m;
-    scalar gammaBeta_m;
-    scalar gamma_m;
-    KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma){
-
-        UniaxialLorentzframe ret;
-        ret.gamma_m = gamma;
-        scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma));
-        scalar gammabeta = gamma * beta;
-        ret.beta_m = beta;
-        ret.gammaBeta_m = gammabeta;
-        return ret;
-    }
-    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe<T, axis> negative()const noexcept{
-        UniaxialLorentzframe ret;
-        ret.beta_m = -beta_m;
-        ret.gammaBeta_m = -gammaBeta_m;
-        ret.gamma_m = gamma_m;
-        return ret;
-    }
+// LORENTZ FRAME AND UNDULATOR
 
-    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default;
-    KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){
-        using Kokkos::sqrt;
-        gammaBeta_m = gammaBeta;
-        beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta);
-        gamma_m = sqrt(1 + gammaBeta * gammaBeta);
-    }
-    KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{
-        arg[axis] = gamma_m * (arg[axis] + beta_m * time); 
-    }
-    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB)const noexcept{
-        
-        Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
-        ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
-        ret.first  = ippl::Vector<T, 3>(unprimedEB.first  + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
-        ret.second = ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
-        ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
-        ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
-        return ret;
-    }
-    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> inverse_transform_EB(const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB)const noexcept{
-        return negative().transform_EB(primedEB);
-    }
-};
-template<typename scalar>
-BunchInitialize<scalar> generate_mithra_config(const config& cfg, const UniaxialLorentzframe<scalar>& /*frame_boost unused*/) {
-    using vec3 = ippl::Vector<scalar, 3>;
+template <typename scalar>
+BunchInitialize<scalar> generate_mithra_config(
+    const config& cfg, const ippl::UniaxialLorentzframe<scalar>& /*frame_boost unused*/) {
+    using vec3         = ippl::Vector<scalar, 3>;
     scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K);
     BunchInitialize<scalar> init;
-    init.generator_ = "random";
-    init.distribution_ = "uniform";
+    init.generator_        = "random";
+    init.distribution_     = "uniform";
     init.initialDirection_ = vec3{0, 0, 1};
-    init.initialGamma_ = cfg.bunch_gamma;
-    init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma);
-    init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast<scalar>();
-    init.sigmaPosition_ = cfg.sigma_position.template cast<scalar>();
+    init.initialGamma_     = cfg.bunch_gamma;
+    init.initialBeta_      = cfg.bunch_gamma == scalar(1)
+                                 ? 0
+                                 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma);
+    init.sigmaGammaBeta_   = cfg.sigma_momentum.template cast<scalar>();
+    init.sigmaPosition_    = cfg.sigma_position.template cast<scalar>();
 
     // TODO: Initial bunching factor huh
-    init.bF_ = 0.01;
-    init.bFP_ = 0;
-    init.shotNoise_ = false;
-    init.cloudCharge_ = cfg.charge;
-    init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma);
-    init.longTrun_ = cfg.position_truncations[2];
-    init.tranTrun_ = cfg.position_truncations[0];
-    init.position_ = cfg.mean_position.cast<scalar>();
-    init.betaVector_ = ippl::Vector<scalar, 3>{0,0,init.initialBeta_};
+    init.bF_                = 0.01;
+    init.bFP_               = 0;
+    init.shotNoise_         = false;
+    init.cloudCharge_       = cfg.charge;
+    init.lambda_            = cfg.undulator_period / (2 * frame_gamma * frame_gamma);
+    init.longTrun_          = cfg.position_truncations[2];
+    init.tranTrun_          = cfg.position_truncations[0];
+    init.position_          = cfg.mean_position.cast<scalar>();
+    init.betaVector_        = ippl::Vector<scalar, 3>{0, 0, init.initialBeta_};
     init.numberOfParticles_ = cfg.num_particles;
 
-    init.numbers_ = 0;                      // UNUSED
-    init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED
+    init.numbers_          = 0;              // UNUSED
+    init.latticeConstants_ = vec3{0, 0, 0};  // UNUSED
 
     return init;
 }
 template <typename Double>
 struct Charge {
-
     Double q;                     /* Charge of the point in the unit of electron charge.	*/
     FieldVector<Double> rnp, rnm; /* Position vector of the charge.			*/
     FieldVector<Double> gb;       /* Normalized velocity vector of the charge.		*/
 
-    /* Double flag determining if the particle is passing the entrance point of the undulator. This flag
-     * can be used for better boosting the bunch to the moving frame. We need to consider it to be double,
-     * because this flag needs to be communicated during bunch update.					*/
+    /* Double flag determining if the particle is passing the entrance point of the undulator. This
+     * flag can be used for better boosting the bunch to the moving frame. We need to consider it to
+     * be double, because this flag needs to be communicated during bunch update.
+     */
     Double e;
 
     // Charge();
 };
 template <typename scalar>
 using ChargeVector = std::list<Charge<scalar>>;
-template<typename Double>
-void initializeBunchEllipsoid (BunchInitialize<Double> bunchInit, ChargeVector<Double> & chargeVector, int rank, int size, int ia){
-    /* Correct the number of particles if it is not a multiple of four.					*/
-    if ( bunchInit.numberOfParticles_ % 4 != 0 )
-      {
+template <typename Double>
+void initializeBunchEllipsoid(BunchInitialize<Double> bunchInit, ChargeVector<Double>& chargeVector,
+                              int rank, int size, int ia) {
+    /* Correct the number of particles if it is not a multiple of four.
+     */
+    if (bunchInit.numberOfParticles_ % 4 != 0) {
         unsigned int n = bunchInit.numberOfParticles_ % 4;
         bunchInit.numberOfParticles_ += 4 - n;
-        //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") +
-        //    std::string("It is corrected to ") +  std::to_string(bunchInit.numberOfParticles_) );
+        // printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of
+        // particles in the bunch is not a multiple of four. ") +
+        //     std::string("It is corrected to ") +  std::to_string(bunchInit.numberOfParticles_) );
     }
 
-    /* Save the initially given number of particles.							*/
-    unsigned int	Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size();
+    /* Save the initially given number of particles. */
+    unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size();
 
-    /* Declare the required parameters for the initialization of charge vectors.                      	*/
-    Charge<Double>         	charge; charge.q  = bunchInit.cloudCharge_ / Np;
+    /* Declare the required parameters for the initialization of charge vectors. */
+    Charge<Double> charge;
+    charge.q               = bunchInit.cloudCharge_ / Np;
     FieldVector<Double> gb = bunchInit.initialGamma_ * bunchInit.betaVector_;
-    FieldVector<Double> r  (0.0);
-    FieldVector<Double> t  (0.0);
-    Double            	t0;//, g;
-    Double		zmin = 1e100;
-    Double		Ne, bF, bFi;
-    unsigned int	bmi;
-    std::vector<Double>	randomNumbers;
-
-    /* The initialization in group of four particles should only be done if there exists an undulator in
-     * the interaction.											*/
-    unsigned int	ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4;
-
-    /* Check the bunching factor.                                                                     	*/
-    if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 )
-      {
-        //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") );
-        //exit(1);
-      }
-
-    /* If the generator is random we should make sure that different processors do not produce the same
-     * random numbers.											*/
-    if 	( bunchInit.generator_ == "random" )
-      {
-        /* Initialize the random number generator.								*/
-        srand ( time(NULL) );
-        /* Np / ng * 20 is the maximum number of particles.							*/
-        randomNumbers.resize( Np / ng * 20, 0.0);
-        for ( unsigned int ri = 0; ri < Np / ng * 20; ri++)
-          randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX));
-      }
-
-    /* Declare the generator function depending on the input.						*/
-    auto generate = [&] (unsigned int n, unsigned int m) {
-      //if 	( bunchInit.generator_ == "random" )
-        return  ( randomNumbers.at( n * 2 * Np/ng + m ) );
-      //else
-      //  return  ( randomNumbers[ n * 2 * Np/ng + m ] );
-    //TODO: Return halton properly
-        //return 	( halton(n,m) );
-    };
+    FieldVector<Double> r(0.0);
+    FieldVector<Double> t(0.0);
+    Double t0;  //, g;
+    Double zmin = 1e100;
+    Double Ne, bF, bFi;
+    unsigned int bmi;
+    std::vector<Double> randomNumbers;
+
+    /* The initialization in group of four particles should only be done if there exists an
+     * undulator in the interaction.
+     */
+    unsigned int ng = (bunchInit.lambda_ == 0.0) ? 1 : 4;
 
-    /* Declare the function for injecting the shot noise.						*/
-    auto insertCharge = [&] (Charge<Double> q) {
+    /* Check the bunching factor. */
+    if (bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0) {
+        // printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be
+        // larger than one or a negative value !!!") ); exit(1);
+    }
 
-      for ( unsigned int ii = 0; ii < ng; ii++ )
-        {
-          /* The random modulation is introduced depending on the shot-noise being activated.		*/
-          if ( bunchInit.shotNoise_ )
-            {
-              /* Obtain the number of beamlet.								*/
-              bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ );
+    /* If the generator is random we should make sure that different processors do not produce the
+     * same random numbers.
+     */
+    if (bunchInit.generator_ == "random") {
+        /* Initialize the random number generator.
+         */
+        srand(time(NULL));
+        /* Np / ng * 20 is the maximum number of particles.
+         */
+        randomNumbers.resize(Np / ng * 20, 0.0);
+        for (unsigned int ri = 0; ri < Np / ng * 20; ri++)
+            randomNumbers[ri] =
+                (float)std::min(1 - 1e-7, std::max(1e-7, ((double)rand()) / RAND_MAX));
+    }
 
-              /* Obtain the phase and amplitude of the modulation.					*/
-              bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) );
+    /* Declare the generator function depending on the input.
+     */
+    auto generate = [&](unsigned int n, unsigned int m) {
+        // if 	( bunchInit.generator_ == "random" )
+        return (randomNumbers.at(n * 2 * Np / ng + m));
+        // else
+        //   return  ( randomNumbers[ n * 2 * Np/ng + m ] );
+        // TODO: Return halton properly
+        // return 	( halton(n,m) );
+    };
 
-              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+    /* Declare the function for injecting the shot noise.
+     */
+    auto insertCharge = [&](Charge<Double> q) {
+        for (unsigned int ii = 0; ii < ng; ii++) {
+            /* The random modulation is introduced depending on the shot-noise being activated.
+             */
+            if (bunchInit.shotNoise_) {
+                /* Obtain the number of beamlet.
+                 */
+                bmi = int((charge.rnp[2] - zmin) / bunchInit.lambda_);
 
-              q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) );
-            }
-          else if ( bunchInit.lambda_ != 0.0)
-            {
-              q.rnp[2]  = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+                /* Obtain the phase and amplitude of the modulation.
+                 */
+                bFi = bF * sqrt(-2.0 * log(generate(8, bmi)));
+
+                q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+
+                q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi
+                            * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2]
+                                  + 2.0 * M_PI * generate(9, bmi));
+            } else if (bunchInit.lambda_ != 0.0) {
+                q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
 
-              q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 );
+                q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_
+                            * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2]
+                                  + bunchInit.bFP_ * M_PI / 180.0);
             }
 
-          /* Set this charge into the charge vector.							*/
-          chargeVector.push_back(q);
+            /* Set this charge into the charge vector. */
+            chargeVector.push_back(q);
         }
     };
 
     /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to
-     * calculate the FEL bucket number.									*/
-    if ( bunchInit.shotNoise_ )
-      {
-        for (i = 0; i < Np / ng; i++)
-          {
-            if ( bunchInit.distribution_ == "uniform" )
-              zmin = std::min(   Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin );
-            else if ( bunchInit.distribution_ == "gaussian" )
-              zmin = std::min(  (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin );
-            else
-              {
-                std::cout << std::string("The longitudinal type is not correctly given to the code !!!\n");
+     * calculate the FEL bucket number. */
+    if (bunchInit.shotNoise_) {
+        for (i = 0; i < Np / ng; i++) {
+            if (bunchInit.distribution_ == "uniform")
+                zmin = std::min(
+                    Double(2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2], zmin);
+            else if (bunchInit.distribution_ == "gaussian")
+                zmin = std::min(
+                    (Double)(bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0)))
+                             * sin(2.0 * M_PI * generate(3, i + Np0))),
+                    zmin);
+            else {
+                std::cout << std::string(
+                    "The longitudinal type is not correctly given to the code !!!\n");
                 exit(1);
-              }
-          }
-
-        if ( bunchInit.distribution_ == "uniform" )
-          for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++)
-            {
-              t0  = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) );
-              t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
+            }
+        }
 
-              zmin = std::min(   t0 , zmin );
+        if (bunchInit.distribution_ == "uniform")
+            for (; i < unsigned(Np / ng
+                                * (1.0
+                                   + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI)
+                                         / (2.0 * bunchInit.sigmaPosition_[2])));
+                 i++) {
+                t0 = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0)))
+                     * sin(2.0 * M_PI * generate(3, i + Np0));
+                t0 += (t0 < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]);
+
+                zmin = std::min(t0, zmin);
             }
 
         zmin = zmin + bunchInit.position_[2];
 
-        /* Obtain the average number of electrons per FEL beamlet.					*/
-        Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] );
-
-        /* Set the bunching factor level for the shot noise depending on the given values.		*/
-        bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_;
-
-      }
+        /* Obtain the average number of electrons per FEL beamlet.
+         */
+        Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / (2.0 * bunchInit.sigmaPosition_[2]);
 
-    /* Determine the properties of each charge point and add them to the charge vector.               	*/
-    for (i = rank; i < Np / ng; i += size)
-      {
-        /* Determine the transverse coordinate.								*/
-        r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
-        r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
+        /* Set the bunching factor level for the shot noise depending on the given values.
+         */
+        bF = (bunchInit.bF_ == 0.0) ? 1.0 / sqrt(Ne) : bunchInit.bF_;
+    }
 
-        /* Determine the longitudinal coordinate.							*/
-        if ( bunchInit.distribution_ == "uniform" )
-          r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2];
-        else if ( bunchInit.distribution_ == "gaussian" )
-          r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
-        else
-          {
+    /* Determine the properties of each charge point and add them to the charge vector. */
+    for (i = rank; i < Np / ng; i += size) {
+        /* Determine the transverse coordinate. */
+        r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0)))
+               * cos(2.0 * M_PI * generate(1, i + Np0));
+        r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0)))
+               * sin(2.0 * M_PI * generate(1, i + Np0));
+
+        /* Determine the longitudinal coordinate. */
+        if (bunchInit.distribution_ == "uniform")
+            r[2] = (2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2];
+        else if (bunchInit.distribution_ == "gaussian")
+            r[2] = bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0)))
+                   * sin(2.0 * M_PI * generate(3, i + Np0));
+        else {
             exit(1);
-          }
-
-        /* Determine the transverse momentum.								*/
-        t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
-        t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
-        t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
-
-        if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
-          {
-            /* Shift the generated charge to the center position and momentum space.			*/
-            charge.rnp    = bunchInit.position_;
-            charge.rnp   += r;
-
-            charge.gb   = gb;
-            charge.gb  += t;
-            if(std::isinf(gb[2])){
+        }
+
+        /* Determine the transverse momentum. */
+        t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0)))
+               * cos(2.0 * M_PI * generate(5, i + Np0));
+        t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0)))
+               * sin(2.0 * M_PI * generate(5, i + Np0));
+        t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0)))
+               * cos(2.0 * M_PI * generate(7, i + Np0));
+
+        if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_
+            && fabs(r[2]) < bunchInit.longTrun_) {
+            /* Shift the generated charge to the center position and momentum space.
+             */
+            charge.rnp = bunchInit.position_;
+            charge.rnp += r;
+
+            charge.gb = gb;
+            charge.gb += t;
+            if (std::isinf(gb[2])) {
                 std::cerr << "[Warning] Gammabeta obtained an klonked here\n";
             }
 
-            /* Insert this charge and the mirrored ones into the charge vector.				*/
+            /* Insert this charge and the mirrored ones into the charge vector.
+             */
             insertCharge(charge);
-          }
-      }
+        }
+    }
 
-    /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the
-     * CSE from the tail of the bunch.									*/
-    if ( bunchInit.distribution_ == "uniform" ){
-      for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size)
-        {
-            
-          r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) );
-          r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) );
-
-          /* Determine the longitudinal coordinate.							*/
-          r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) );
-          r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] );
-
-          /* Determine the transverse momentum.								*/
-          t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) );
-          t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) );
-          t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) );
-          if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_)
-            {
-              /* Shift the generated charge to the center position and momentum space.			*/
-              charge.rnp   = bunchInit.position_[ia];
-              charge.rnp  += r;
-
-              charge.gb  = gb;
-              
-              charge.gb += t;
-              /* Insert this charge and the mirrored ones into the charge vector.			*/
-              insertCharge(charge);
+    /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove
+     * the CSE from the tail of the bunch.
+     */
+    if (bunchInit.distribution_ == "uniform") {
+        for (; i < unsigned(uint32_t(Np / ng)
+                            * (1.0
+                               + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI)
+                                     / (2.0 * bunchInit.sigmaPosition_[2])));
+             i += size) {
+            r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0)))
+                   * cos(2.0 * M_PI * generate(1, i + Np0));
+            r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0)))
+                   * sin(2.0 * M_PI * generate(1, i + Np0));
+
+            /* Determine the longitudinal coordinate. */
+            r[2] = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0)))
+                   * sin(2.0 * M_PI * generate(3, i + Np0));
+            r[2] += (r[2] < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]);
+
+            /* Determine the transverse momentum.
+             */
+            t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0)))
+                   * cos(2.0 * M_PI * generate(5, i + Np0));
+            t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0)))
+                   * sin(2.0 * M_PI * generate(5, i + Np0));
+            t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0)))
+                   * cos(2.0 * M_PI * generate(7, i + Np0));
+            if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_
+                && fabs(r[2]) < bunchInit.longTrun_) {
+                /* Shift the generated charge to the center position and momentum space.
+                 */
+                charge.rnp = bunchInit.position_[ia];
+                charge.rnp += r;
+
+                charge.gb = gb;
+
+                charge.gb += t;
+                /* Insert this charge and the mirrored ones into the charge vector.
+                 */
+                insertCharge(charge);
             }
         }
     }
 
     /* Reset the value for the number of particle variable according to the installed number of
-     * macro-particles and perform the corresponding changes.                                         	*/
+     * macro-particles and perform the corresponding changes. */
     bunchInit.numberOfParticles_ = chargeVector.size();
 }
 
-template<typename Double>
-void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma){
+template <typename Double>
+void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma) {
     Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma);
-    Double zmaxL = -1.0e100, zmaxG;
-    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
-      {
-        Double g  	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
-        if(std::isinf(g)){
-            std::cerr << __FILE__  << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n";
+    Double zmaxL      = -1.0e100, zmaxG;
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) {
+        Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        if (std::isinf(g)) {
+            std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g
+                      << "\n";
             abort();
         }
-        Double bz 	= iterQ->gb[2] / g;
-        iterQ->rnp[2]  *= frame_gamma;
-        
-        iterQ->gb[2] 	= frame_gamma * g * ( bz - frame_beta );
-        
-        zmaxL 		= std::max( zmaxL , iterQ->rnp[2] );
-      }
+        Double bz = iterQ->gb[2] / g;
+        iterQ->rnp[2] *= frame_gamma;
+
+        iterQ->gb[2] = frame_gamma * g * (bz - frame_beta);
+
+        zmaxL = std::max(zmaxL, iterQ->rnp[2]);
+    }
     zmaxG = zmaxL;
     struct {
-       Double zu_;
-       Double beta_;
+        Double zu_;
+        Double beta_;
     } bunch_;
-    bunch_.zu_ 		= zmaxG;
-    bunch_.beta_ 	= frame_beta;
+    bunch_.zu_   = zmaxG;
+    bunch_.beta_ = frame_beta;
 
     /****************************************************************************************************/
 
-    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ )
-      {
-        Double g	= std::sqrt(1.0 + iterQ->gb.squaredNorm());
-        iterQ->rnp[0]  += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
-        iterQ->rnp[1]  += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
-        iterQ->rnp[2]  += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta;
-        if(std::isnan(iterQ->rnp[2])){
-            std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_  << ", " <<  frame_beta << "\n";
-            std::cerr << __FILE__  << ": " << __LINE__ << "   OOOOOF\n";
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) {
+        Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        iterQ->rnp[0] += iterQ->gb[0] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta;
+        iterQ->rnp[1] += iterQ->gb[1] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta;
+        iterQ->rnp[2] += iterQ->gb[2] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta;
+        if (std::isnan(iterQ->rnp[2])) {
+            std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_
+                      << ", " << frame_beta << "\n";
+            std::cerr << __FILE__ << ": " << __LINE__ << "   OOOOOF\n";
             abort();
         }
-      }
+    }
 }
 
-
-
-
-template<typename bunch_type, typename scalar>
-size_t initialize_bunch_mithra(
-    bunch_type& bunch,
-    const BunchInitialize<scalar>& bunchInit,
-    scalar frame_gamma){
-
+template <typename bunch_type, typename scalar>
+size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize<scalar>& bunchInit,
+                               scalar frame_gamma) {
     ChargeVector<scalar> oof;
     initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0);
-    for(auto& c : oof){
-        if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2]))
+    for (auto& c : oof) {
+        if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2]))
             std::cout << "Pos before boost: " << c.rnp << "\n";
-        if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2]))
+        if (std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2]))
             std::cout << "Pos before boost: " << c.rnp << "\n";
     }
     boost_bunch(oof, frame_gamma);
-    for(auto& c : oof){
-         if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){
+    for (auto& c : oof) {
+        if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) {
             std::cout << "Pos after boost: " << c.rnp << "\n";
             break;
         }
@@ -1022,60 +850,52 @@ size_t initialize_bunch_mithra(
         scalar bz = iterQ->gb[2] / g;
         assert_isreal(bz);
         (void)bz;
-        positions(i) = iterQ->rnp;
+        positions(i)  = iterQ->rnp;
         gammabetas(i) = iterQ->gb;
         ++iterQ;
     }
-    if(oof.size() > bunch.getLocalNum()){
+    if (oof.size() > bunch.getLocalNum()) {
         bunch.create(oof.size() - bunch.getLocalNum());
     }
-    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions ("", oof.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions("", oof.size());
     Kokkos::View<ippl::Vector<scalar, 3>*> dgammabetas("", oof.size());
-    
+
     Kokkos::deep_copy(dpositions, positions);
     Kokkos::deep_copy(dgammabetas, gammabetas);
     Kokkos::deep_copy(bunch.R_nm1.getView(), positions);
     Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas);
-    auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();;
-    Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){
-        rview(i) = dpositions(i);
-        rm1view(i) = dpositions(i);
-        gbview(i) = dgammabetas(i);
-    });
+    auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(),
+         gbview = bunch.gamma_beta.getView();
+    ;
+    Kokkos::parallel_for(
+        oof.size(), KOKKOS_LAMBDA(size_t i) {
+            rview(i)   = dpositions(i);
+            rm1view(i) = dpositions(i);
+            gbview(i)  = dgammabetas(i);
+        });
     Kokkos::fence();
-    
+
     return oof.size();
 }
 
+// END LORENTZ FRAME AND UNDULATOR AND BUNCH
 
+// PREAMBLE
 
-
-
-
-
-//END LORENTZ FRAME AND UNDULATOR AND BUNCH
-
-
-
-
-
-//PREAMBLE
-
-template<typename scalar1, typename... scalar>
+template <typename scalar1, typename... scalar>
     requires((std::is_floating_point_v<scalar1>))
-KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
+KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x) {
     uint32_t dim = sizeof...(scalar);
     ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
-    for(unsigned d = 0;d < dim;d++){
+    for (unsigned d = 0; d < dim; d++) {
         vec[d] = vec[d] * vec[d];
     }
-    #ifndef __CUDA_ARCH__
+#ifndef __CUDA_ARCH__
     using std::exp;
-    #endif
-    return exp(-(vec.sum()) / (stddev * stddev)); 
+#endif
+    return exp(-(vec.sum()) / (stddev * stddev));
 }
 
-
 template <typename T>
 KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a,
                                                      const ippl::Vector<T, 3>& b) {
@@ -1085,37 +905,34 @@ KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a
     ret[2] = a[0] * b[1] - a[1] * b[0];
     return ret;
 }
-template<typename T>
-KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos, int nghost = 1){
+template <typename T>
+KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(
+    const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos,
+    int nghost = 1) {
     Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
-    ippl::Vector<T, 3> relpos = pos - origin;
+    ippl::Vector<T, 3> relpos  = pos - origin;
     ippl::Vector<T, 3> gridpos = relpos / hr;
     ippl::Vector<int, 3> ipos;
     ipos = gridpos.template cast<int>();
     ippl::Vector<T, 3> fracpos;
-    for(unsigned k = 0;k < 3;k++){
+    for (unsigned k = 0; k < 3; k++) {
         fracpos[k] = gridpos[k] - (int)ipos[k];
     }
     ipos += ippl::Vector<int, 3>(nghost);
-    ret.first = ipos;
+    ret.first  = ipos;
     ret.second = fracpos;
     return ret;
 }
-template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value, int nghost = 1){
+template <typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                   ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                   const ippl::Vector<scalar, 3>& pos, const scalar value,
+                                   int nghost = 1) {
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost);
     ipos -= ldom.first();
-    if(
-        ipos[0] < 0
-        ||ipos[1] < 0
-        ||ipos[2] < 0
-        ||size_t(ipos[0]) >= view.extent(0) - 1
-        ||size_t(ipos[1]) >= view.extent(1) - 1
-        ||size_t(ipos[2]) >= view.extent(2) - 1
-        ||fracpos[0] < 0
-        ||fracpos[1] < 0
-        ||fracpos[2] < 0
-    ){
+    if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+        || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+        || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
         return;
     }
     assert(fracpos[0] >= 0.0f);
@@ -1132,11 +949,11 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
     assert(one_minus_fracpos[2] >= 0.0f);
     assert(one_minus_fracpos[2] <= 1.0f);
     scalar accum = 0;
-    
-    for(unsigned i = 0;i < 8;i++){
-        scalar weight = 1;
+
+    for (unsigned i = 0; i < 8; i++) {
+        scalar weight               = 1;
         ippl::Vector<int, 3> ipos_l = ipos;
-        for(unsigned d = 0;d < 3;d++){
+        for (unsigned d = 0; d < 3; d++) {
             weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
             ipos_l[d] += !!(i & (1 << d));
         }
@@ -1147,23 +964,18 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
     }
     assert(abs(accum - 1.0f) < 1e-6f);
 }
-template<typename view_type, typename scalar>
-KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value, int nghost = 1){
-    //std::cout << "Value: " << value << "\n";
+template <typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                   ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                   const ippl::Vector<scalar, 3>& pos,
+                                   const ippl::Vector<scalar, 3>& value, int nghost = 1) {
+    // std::cout << "Value: " << value << "\n";
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost);
     ipos -= ldom.first();
-    if(
-        ipos[0] < 0
-        ||ipos[1] < 0
-        ||ipos[2] < 0
-        ||size_t(ipos[0]) >= view.extent(0) - 1
-        ||size_t(ipos[1]) >= view.extent(1) - 1
-        ||size_t(ipos[2]) >= view.extent(2) - 1
-        ||fracpos[0] < 0
-        ||fracpos[1] < 0
-        ||fracpos[2] < 0
-    ){
-        //std::cout << "out of bounds\n";
+    if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+        || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+        || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+        // std::cout << "out of bounds\n";
         return;
     }
     assert(fracpos[0] >= 0.0f);
@@ -1180,11 +992,11 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
     assert(one_minus_fracpos[2] >= 0.0f);
     assert(one_minus_fracpos[2] <= 1.0f);
     scalar accum = 0;
-    
-    for(unsigned i = 0;i < 8;i++){
-        scalar weight = 1;
+
+    for (unsigned i = 0; i < 8; i++) {
+        scalar weight               = 1;
         ippl::Vector<int, 3> ipos_l = ipos;
-        for(unsigned d = 0;d < 3;d++){
+        for (unsigned d = 0; d < 3; d++) {
             weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
             ipos_l[d] += !!(i & (1 << d));
         }
@@ -1197,16 +1009,23 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
     assert(abs(accum - 1.0f) < 1e-6f);
 }
 
-template<typename view_type, typename scalar>
-KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor, int nghost = 1){ 
+template <typename view_type, typename scalar>
+KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview,
+                                              ippl::Vector<scalar, 3> hr,
+                                              ippl::Vector<scalar, 3> origin,
+                                              const ippl::Vector<scalar, 3>& from,
+                                              const ippl::Vector<scalar, 3>& to,
+                                              const scalar factor, int nghost = 1) {
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid =
+        gridCoordinatesOf(hr, origin, from);
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid =
+        gridCoordinatesOf(hr, origin, to);
+
+    if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1]
+        && from_grid.first[2] == to_grid.first[2]) {
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((to - from) * factor));
 
-    
-    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
-    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid   = gridCoordinatesOf(hr, origin, to  );
-    
-    if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
-        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
-        
         return;
     }
     ippl::Vector<scalar, 3> relay;
@@ -1214,34 +1033,22 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view
     using Kokkos::max;
     using Kokkos::min;
     for (unsigned int i = 0; i < 3; i++) {
-        relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i],
-                       max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i],
+        relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                           + scalar(1.0) * hr[i] + orig[i],
+                       max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                               + scalar(0.0) * hr[i] + orig[i],
                            scalar(0.5) * (to[i] + from[i])));
     }
-    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
-    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)),
+                  ippl::Vector<scalar, 3>((relay - from) * factor));
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5)),
+                  ippl::Vector<scalar, 3>((to - relay) * factor));
 }
 
-
-
 namespace ippl {
-    
-    template<typename scalar>
-    struct undulator_parameters{
-        scalar lambda_u; //MITHRA: lambda_u
-        scalar K; //Undulator parameter
-        scalar length;
-        scalar B_magnitude;
-        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda_u(lambda_u), K(K_undulator_parameter), length(_length){
-            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u);
-        }
-        undulator_parameters(const config& cfg): lambda_u(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){
-            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u);
-        }
-    };
 
-    template<typename scalar>
-    struct nondispersive{
+    template <typename scalar>
+    struct nondispersive {
         scalar a1;
         scalar a2;
         scalar a4;
@@ -1249,19 +1056,20 @@ namespace ippl {
         scalar a8;
     };
     template <typename _scalar, class PLayout>
-    struct  Bunch_eb : public ippl::ParticleBase<PLayout> {
+    struct Bunch_eb : public ippl::ParticleBase<PLayout> {
         using scalar = _scalar;
 
         // Constructor for the Bunch class, taking a PLayout reference
         Bunch_eb(PLayout& playout)
             : ippl::ParticleBase<PLayout>(playout) {
             // Add attributes to the particle bunch
-            this->addAttribute(Q);          // Charge attribute
-            this->addAttribute(mass);       // Mass attribute
-            this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(Q);     // Charge attribute
+            this->addAttribute(mass);  // Mass attribute
+            this->addAttribute(
+                gamma_beta);  // Gamma-beta attribute (product of relativistiv gamma and beta)
             this->addAttribute(R_np1);      // Position attribute for the next time step
             this->addAttribute(R_nm1);      // Position attribute for the next time step
-            this->addAttribute(EB_gather);   // Electric field attribute for particle gathering
+            this->addAttribute(EB_gather);  // Electric field attribute for particle gathering
         }
 
         // Destructor for the Bunch class
@@ -1271,85 +1079,23 @@ namespace ippl {
         using charge_container_type   = ippl::ParticleAttrib<scalar>;
         using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
         using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
-        using vector4_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+        using vector4_container_type  = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
 
         // Declare instances of the attribute containers
-        charge_container_type Q;          // Charge container
-        charge_container_type mass;       // Mass container
-        velocity_container_type gamma_beta; // Gamma-beta container
-        typename ippl::ParticleBase<PLayout>::particle_position_type R_np1; // Position container for the next time step
-        typename ippl::ParticleBase<PLayout>::particle_position_type R_nm1; // Position container for the previous time step
-        ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>> EB_gather;   // Electric field container for particle gathering
-
-    };
-
-
-    /**
-     * @brief Struct representing an undulator.
-     * 
-     * @tparam scalar Type of the scalar values (e.g., float, double).
-     */
-    template<typename scalar>
-    struct Undulator {
-        undulator_parameters<scalar> uparams; ///< Parameters of the undulator.
-        scalar distance_to_entry;             ///< Distance to the entry of the undulator.
-        scalar k_u;                           ///< Wavenumber of the undulator.
-
-        /**
-         * @brief Constructor to initialize undulator parameters and calculate k_u.
-         * 
-         * @param p Parameters of the undulator.
-         * @param dte Distance to the entry of the undulator.
-         */
-        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte) 
-            : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda_u) {}
-
-        /**
-         * @brief Overloaded operator() to compute magnetic field components.
-         * 
-         * @param position_in_lab_frame Position vector in the lab frame.
-         * @return Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> 
-         *         Pair containing magnetic field and its derivative.
-         */
-        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> 
-        operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame) const noexcept {
-            using Kokkos::sin;
-            using Kokkos::sinh;
-            using Kokkos::cos;
-            using Kokkos::cosh;
-            using Kokkos::exp;
-
-            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> ret; // Return pair containing magnetic field and its derivative.
-            ret.first.fill(0);  // Initialize magnetic field vector.
-            ret.second.fill(0); // Initialize derivative vector.
-
-            // If the position is before the undulator entry.
-            if (position_in_lab_frame[2] < distance_to_entry) {
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator < 0); // Ensure we are in the correct region.
-                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); // Gaussian decay factor.
-
-                ret.second[0] = 0; // No x-component.
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; // y-component.
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; // z-component.
-            }
-            // If the position is within the undulator.
-            else if (position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length) {
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator >= 0); // Ensure we are in the correct region.
-
-                ret.second[0] = 0; // No x-component.
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); // y-component.
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); // z-component.
-            }
-            return ret;
-        }
+        charge_container_type Q;             // Charge container
+        charge_container_type mass;          // Mass container
+        velocity_container_type gamma_beta;  // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_np1;  // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_nm1;  // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>>
+            EB_gather;  // Electric field container for particle gathering
     };
 
-
     /**
      * @brief Struct representing the state of an FEL (Free Electron Laser) simulation.
-     * 
+     *
      * @tparam scalar Type of the scalar values (e.g., float, double).
      */
     template <typename scalar>
@@ -1416,7 +1162,7 @@ namespace ippl {
             : fieldsAndParticles(layout, mesch, nparticles), 
               m_config(cfg), 
               ulb(UniaxialLorentzframe<scalar, 2>::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), 
-              uparams(cfg), 
+              uparams(cfg.undulator_K, cfg.undulator_period, cfg.undulator_length), 
               undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m) {
 
             hr_m = mesch.getMeshSpacing();
@@ -1459,8 +1205,8 @@ namespace ippl {
     // clang-format on
 }  // namespace ippl
 bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
-    const int channels = 3; // RGB
-    const int stride = width * channels;
+    const int channels = 3;  // RGB
+    const int stride   = width * channels;
     std::vector<unsigned char> flippedData(data, data + stride * height);
 
     // Use stb_image_write to write the BMP image to the file descriptor
@@ -1477,169 +1223,188 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
 }
 /**
  * @brief Helper function to gather data from a field using interpolation.
- * 
+ *
  * This function computes the value at a given position by interpolating the values in the field.
- * The position is transformed from physical coordinates to grid coordinates and then used to 
- * gather data from the field. The interpolation weights are computed based on the relative 
+ * The position is transformed from physical coordinates to grid coordinates and then used to
+ * gather data from the field. The interpolation weights are computed based on the relative
  * positions within the grid cells.
- * 
+ *
  * @tparam View Type of the field view.
  * @tparam T Scalar type for coordinates and grid spacing.
  * @tparam Dim Dimensionality of the position vector.
- * 
+ *
  * @param v The field view from which data is to be gathered.
  * @param pos The position vector in physical coordinates where the data is to be gathered.
  * @param origin The origin of the grid in physical coordinates.
  * @param hr The grid spacing vector.
  * @param lDom The local domain indices of the grid.
- * 
+ *
  * @return typename View::value_type The interpolated value at the specified position.
  */
-template<typename View, typename T, unsigned Dim>
-KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector<T, Dim>& pos, const ippl::Vector<T, 3>& origin, const ippl::Vector<T, 3>& hr, const ippl::NDIndex<3>& lDom){
+template <typename View, typename T, unsigned Dim>
+KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v,
+                                                               const ippl::Vector<T, Dim>& pos,
+                                                               const ippl::Vector<T, 3>& origin,
+                                                               const ippl::Vector<T, 3>& hr,
+                                                               const ippl::NDIndex<3>& lDom) {
     using vector_type = ippl::Vector<T, 3>;
 
     vector_type l;
 
-    for(unsigned k = 0;k < 3;k++){
-        l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented in a way such that this 1 is necessary here
-    }                     
+    for (unsigned k = 0; k < 3; k++) {
+        l[k] = (pos[k] - origin[k]) / hr[k]
+               + 1.0;  // gather is implemented in a way such that this 1 is necessary here
+    }
 
     ippl::Vector<int, 3> index{int(l[0]), int(l[1]), int(l[2])};
     ippl::Vector<T, 3> whi = l - index;
     ippl::Vector<T, 3> wlo(1.0);
     wlo -= whi;
 
-    //TODO: nghost
+    // TODO: nghost
     ippl::Vector<size_t, 3> args = index - lDom.first() + 1;
-    for(unsigned k = 0;k < 3;k++){
-        if(args[k] >= v.extent(k) || args[k] == 0){
+    for (unsigned k = 0; k < 3; k++) {
+        if (args[k] >= v.extent(k) || args[k] == 0) {
             return typename View::value_type(0);
         }
     }
-    return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/;
-
+    return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo,
+                                                    whi, args) /*}*/;
 }
-template<typename scalar>
-scalar test_gauss_law(uint32_t n){
-
+template <typename scalar>
+scalar test_gauss_law(uint32_t n) {
     ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
     ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
-    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths, meter_in_unit_lengths,
+                                    meter_in_unit_lengths};
     std::array<bool, 3> isParallel;
     isParallel.fill(false);
     isParallel[2] = true;
-        
-        // all parallel layout, standard domain, normal axis order
+
+    // all parallel layout, standard domain, normal axis order
     ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
 
-        //[-1, 1] box
+    //[-1, 1] box
     ippl::Vector<scalar, 3> hx     = extents / nr.cast<scalar>();
-    using vector_type = ippl::Vector<scalar, 3>;
-    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
+    using vector_type              = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths),
+                                      scalar(-0.5 * meter_in_unit_lengths),
+                                      scalar(-0.5 * meter_in_unit_lengths)};
     ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
 
     uint32_t pcount = 1 << 20;
-    //config cfg{};
-    //cfg.space_charge = true;
-    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh, pcount);
-    field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount;
-    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
-    auto pview = field_state.particles.R.getView();
-    auto p1view = field_state.particles.R_nm1.getView();
+    // config cfg{};
+    // cfg.space_charge = true;
+    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh,
+                                                                                pcount);
+    field_state.particles.Q    = scalar(coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount;  // Irrelefant
+    auto pview                 = field_state.particles.R.getView();
+    auto p1view                = field_state.particles.R_nm1.getView();
 
     Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
-    
-    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
-        auto state = random_pool.get_state();
-        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        p1view(i) = pview(i);
-        random_pool.free_state(state);
-    });
+
+    Kokkos::parallel_for(
+        pcount, KOKKOS_LAMBDA(size_t i) {
+            auto state  = random_pool.get_state();
+            pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            p1view(i)   = pview(i);
+            random_pool.free_state(state);
+        });
     Kokkos::fence();
     field_state.J = scalar(0.0);
     field_state.scatterBunch();
-    for(size_t i = 0;i < 8*n;i++){
+    for (size_t i = 0; i < 8 * n; i++) {
         field_state.field_solver.solve();
     }
 
     Kokkos::fence();
     auto lDom = field_state.E.getLayout().getLocalNDIndex();
-    
+
     std::ofstream line("gauss_line.txt");
-    typename ippl::FELSimulationState<scalar>::e_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.E.getView());
-    for(unsigned i = 1;i < nr[2];i++){
+    typename ippl::FELSimulationState<scalar>::e_view_type::host_mirror_type view =
+        Kokkos::create_mirror_view(field_state.E.getView());
+    for (unsigned i = 1; i < nr[2]; i++) {
         vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
         pos[2] += hx[2] * scalar(i);
         ippl::Vector<scalar, 3> ebg = gather_helper(view, pos, origin, hx, lDom);
-        line << pos.norm() * unit_length_in_meters << " " << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " "
+             << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
     }
     return 0.0f;
 }
-template<typename scalar>
-scalar test_amperes_law(uint32_t n){
-
+template <typename scalar>
+scalar test_amperes_law(uint32_t n) {
     ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
     ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
-    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths, (scalar)(4 * meter_in_unit_lengths),
+                                    meter_in_unit_lengths};
     std::array<bool, 3> isParallel;
     isParallel.fill(false);
     isParallel[2] = true;
-        
+
     // all parallel layout, standard domain, normal axis order
     ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
 
     //[-1, 1] box
     ippl::Vector<scalar, 3> hx;
-    for(unsigned d = 0;d < 3;d++){
+    for (unsigned d = 0; d < 3; d++) {
         hx[d] = extents[d] / (scalar)nr[d];
     }
-    using vector_type = ippl::Vector<scalar, 3>;
-    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)};
+    using vector_type              = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths),
+                                      scalar(-2.0 * meter_in_unit_lengths),
+                                      scalar(-0.5 * meter_in_unit_lengths)};
     ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
 
     uint32_t pcount = 1 << 20;
-    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh, pcount);
-    field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount;
-    field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant
-    auto pview = field_state.particles.R.getView();
-    auto p1view = field_state.particles.R_nm1.getView();
-    constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times;
-    scalar timestep = field_state.field_solver.dt;
+    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh,
+                                                                                pcount);
+    field_state.particles.Q    = scalar(4.0 * coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount;  // Irrelefant
+    auto pview                 = field_state.particles.R.getView();
+    auto p1view                = field_state.particles.R_nm1.getView();
+    constexpr scalar vy        = meter_in_unit_lengths / second_in_unit_times;
+    scalar timestep            = field_state.field_solver.dt;
     Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
-    
-    Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){
-        //bunch.gammaBeta[i].fill(scalar(0));
-        auto state = random_pool.get_state();
-        pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
-        pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1);
-        p1view(i) = pview(i);
-        p1view(i)[1] -= vy * timestep;
-        random_pool.free_state(state);
-    });
+
+    Kokkos::parallel_for(
+        pcount, KOKKOS_LAMBDA(size_t i) {
+            // bunch.gammaBeta[i].fill(scalar(0));
+            auto state  = random_pool.get_state();
+            pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1);
+            p1view(i)   = pview(i);
+            p1view(i)[1] -= vy * timestep;
+            random_pool.free_state(state);
+        });
     Kokkos::fence();
     field_state.J = scalar(0.0);
     field_state.scatterBunch();
-    for(size_t i = 0;i < 8*n;i++){
+    for (size_t i = 0; i < 8 * n; i++) {
         field_state.field_solver.solve();
     }
     field_state.field_solver.evaluate_EB();
     Kokkos::fence();
     auto lDom = field_state.B.getLayout().getLocalNDIndex();
-    
+
     std::ofstream line("ampere_line.txt");
-    
-    typename ippl::FELSimulationState<scalar>::b_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.B.getView());
-    //ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar, 3>{0,0,0}, origin, hx, lDom);
-    for(unsigned i = 1;i < nr[2];i++){
+
+    typename ippl::FELSimulationState<scalar>::b_view_type::host_mirror_type view =
+        Kokkos::create_mirror_view(field_state.B.getView());
+    // ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar,
+    // 3>{0,0,0}, origin, hx, lDom);
+    for (unsigned i = 1; i < nr[2]; i++) {
         vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
         pos[2] += hx[2] * scalar(i);
         ippl::Vector<scalar, 3> ebg = gather_helper(view, pos, origin, hx, lDom);
-        //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n";
-        line << pos.norm() * unit_length_in_meters << " " << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n";
+        // line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) *
+        // unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " "
+             << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n";
     }
     return 0.0f;
 }
@@ -1647,19 +1412,22 @@ int main(int argc, char* argv[]) {
     using scalar = float;
     ippl::initialize(argc, argv);
     {
-        
-        //test_gauss_law<scalar>(64);
-        //test_amperes_law<scalar>(64);
-        //goto exit;
-        config cfg = read_config("../config.json");
-        const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
+        // test_gauss_law<scalar>(64);
+        // test_amperes_law<scalar>(64);
+        // goto exit;
+        config cfg               = read_config("../config.json");
+        const scalar frame_gamma = std::max(
+            decltype(cfg)::scalar(1),
+            cfg.bunch_gamma
+                / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
         cfg.extents[2] *= frame_gamma;
         cfg.total_time /= frame_gamma;
-        
-        const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma));
+
+        const scalar frame_beta      = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma));
         const scalar frame_gammabeta = frame_gamma * frame_beta;
-        UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
-        ippl::undulator_parameters<scalar> uparams(cfg);
+        ippl::UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
+        ippl::undulator_parameters<scalar> uparams(cfg.undulator_K, cfg.undulator_period,
+                                                   cfg.undulator_length);
 
         BunchInitialize<scalar> mithra_config = generate_mithra_config(cfg, frame_boost);
         ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
@@ -1667,116 +1435,134 @@ int main(int argc, char* argv[]) {
         std::array<bool, 3> isParallel;
         isParallel.fill(false);
         isParallel[2] = true;
-        
+
         // all parallel layout, standard domain, normal axis order
         ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
 
         //[-1, 1] box
-        ippl::Vector<scalar, 3> hx     = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])};
-        ippl::Vector<scalar, 3> origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)};
+        ippl::Vector<scalar, 3> hx     = {scalar(cfg.extents[0] / cfg.resolution[0]),
+                                          scalar(cfg.extents[1] / cfg.resolution[1]),
+                                          scalar(cfg.extents[2] / cfg.resolution[2])};
+        ippl::Vector<scalar, 3> origin = {scalar(-cfg.extents[0] * 0.5),
+                                          scalar(-cfg.extents[1] * 0.5),
+                                          scalar(-cfg.extents[2] * 0.5)};
         ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
         std::cout << "hx: " << hx << "\n";
         std::cout << "origin: " << origin << "\n";
         std::cout << "extents: " << cfg.extents << std::endl;
-        if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){
+        if (sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1) {
             std::cerr << "Dispersion relation not satisfiable\n";
             abort();
         }
 
-        ippl::FELSimulationState<scalar> fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
-        
-        if(ippl::Comm->rank() == 0){
+        ippl::FELSimulationState<scalar> fdtd_state(
+            layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
+
+        if (ippl::Comm->rank() == 0) {
             std::cout << "Init particles: " << std::endl;
-            size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles, mithra_config, frame_gamma);
-            fdtd_state.fieldsAndParticles.particles.Q = cfg.charge / actual_pc;
+            size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles,
+                                                       mithra_config, frame_gamma);
+            fdtd_state.fieldsAndParticles.particles.Q    = cfg.charge / actual_pc;
             fdtd_state.fieldsAndParticles.particles.mass = cfg.mass / actual_pc;
-        }
-        else{
+        } else {
             fdtd_state.fieldsAndParticles.particles.create(0);
         }
         {
-            auto rview = fdtd_state.fieldsAndParticles.particles.R.getView();
+            auto rview   = fdtd_state.fieldsAndParticles.particles.R.getView();
             auto rm1view = fdtd_state.fieldsAndParticles.particles.R_nm1.getView();
-            ippl::Vector<scalar, 3> meanpos = fdtd_state.fieldsAndParticles.particles.R.sum() * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum());
-    
-            Kokkos::parallel_for(fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                rview(i) -= meanpos;
-                rm1view(i) -= meanpos;
-            });
+            ippl::Vector<scalar, 3> meanpos =
+                fdtd_state.fieldsAndParticles.particles.R.sum()
+                * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum());
+
+            Kokkos::parallel_for(
+                fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                    rview(i) -= meanpos;
+                    rm1view(i) -= meanpos;
+                });
         }
         fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO);
-        
+
         size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt());
-        uint64_t starttime =  nanoTime();
+        uint64_t starttime        = nanoTime();
         std::ofstream rad;
         FILE* ffmpeg_file = nullptr;
-        if(ippl::Comm->rank() == 0){
+        if (ippl::Comm->rank() == 0) {
             rad = std::ofstream("radiation.txt");
-            const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv";
-            if(cfg.output_rhythm != 0){
+            const char* ffmpegCmd =
+                "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf "
+                "scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p "
+                "-c:v libx264 -movflags +faststart ffmpeg_popen.mkv";
+            if (cfg.output_rhythm != 0) {
                 ffmpeg_file = popen(ffmpegCmd, "w");
             }
         }
 
-
-        for(size_t i = 0;i < timesteps_required;i++){
-            if(ippl::Comm->rank() == 0){
+        for (size_t i = 0; i < timesteps_required; i++) {
+            if (ippl::Comm->rank() == 0) {
                 std::cout << "Doing step: " << i << std::endl;
             }
             fdtd_state.step();
-            auto ldom = layout.getLocalNDIndex();
-            auto nrg = fdtd_state.nr_global;
-            auto eview = fdtd_state.fieldsAndParticles.E.getView();
-            auto bview = fdtd_state.fieldsAndParticles.B.getView();
+            auto ldom        = layout.getLocalNDIndex();
+            auto nrg         = fdtd_state.nr_global;
+            auto eview       = fdtd_state.fieldsAndParticles.E.getView();
+            auto bview       = fdtd_state.fieldsAndParticles.B.getView();
             double radiation = 0.0;
-            Kokkos::parallel_reduce(ippl::getRangePolicy(eview, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){
-                Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{eview(i,j,k), bview(i,j,k)};
-                ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
-                ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
-                uint32_t kg = k + ldom.first()[2];
-                if(kg == nrg[2] - 3){
-                    ref += Elab.cross(Blab)[2];
-                }
-
-            }, radiation);
-            double radiation_in_watt_on_this_rank = radiation *
-            double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) *
-            fdtd_state.hr_m[0] * fdtd_state.hr_m[1];
+            Kokkos::parallel_reduce(
+                ippl::getRangePolicy(eview, 1),
+                KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref) {
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{
+                        eview(i, j, k), bview(i, j, k)};
+                    ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
+                    ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
+                    uint32_t kg                  = k + ldom.first()[2];
+                    if (kg == nrg[2] - 3) {
+                        ref += Elab.cross(Blab)[2];
+                    }
+                },
+                radiation);
+            double radiation_in_watt_on_this_rank =
+                radiation
+                * double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters
+                         * unit_length_in_meters)
+                * fdtd_state.hr_m[0] * fdtd_state.hr_m[1];
             double radiation_in_watt_global = 0.0;
-            MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
-            if(ippl::Comm->rank() == 0){
-                ippl::Vector<scalar, 3> pos{0,0,(float)cfg.extents[2]};
+            MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE,
+                       MPI_SUM, 0, ippl::Comm->getCommunicator());
+            if (ippl::Comm->rank() == 0) {
+                ippl::Vector<scalar, 3> pos{0, 0, (float)cfg.extents[2]};
                 frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i);
                 rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
             }
-            
+
             int rank = ippl::Comm->rank();
             int size = ippl::Comm->size();
-            if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){
-
-                
-                int img_height = 400;
-                int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]);
+            if ((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)) {
+                int img_height   = 400;
+                int img_width    = int(400.0 * cfg.extents[2] / cfg.extents[0]);
                 float* imagedata = new float[img_width * img_height * 3];
                 std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
-                float* idata_recvbuffer = new float[img_width * img_height * 3];
-                int floatcount = img_width * img_height * 3;
+                float* idata_recvbuffer  = new float[img_width * img_height * 3];
+                int floatcount           = img_width * img_height * 3;
                 uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
                 std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float));
                 auto phmirror = fdtd_state.fieldsAndParticles.particles.R.getHostMirror();
                 Kokkos::deep_copy(phmirror, fdtd_state.fieldsAndParticles.particles.R.getView());
-                for(size_t hi = 0;hi < fdtd_state.fieldsAndParticles.particles.getLocalNum();hi++){
+                for (size_t hi = 0; hi < fdtd_state.fieldsAndParticles.particles.getLocalNum();
+                     hi++) {
                     ippl::Vector<scalar, 3> ppos = phmirror(hi);
                     ppos -= mesh.getOrigin();
                     ppos /= cfg.extents.cast<scalar>();
                     int x_imgcoord = ppos[2] * img_width;
                     int y_imgcoord = ppos[0] * img_height;
-                    //printf("%d, %d\n", x_imgcoord, y_imgcoord);
-                    if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){
-                        const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles);
-                        //std::cout << intensity << "\n";
-                        imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = 
-                        std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity);
+                    // printf("%d, %d\n", x_imgcoord, y_imgcoord);
+                    if (y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width
+                        && y_imgcoord < img_height) {
+                        const float intensity =
+                            std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles);
+                        // std::cout << intensity << "\n";
+                        imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] =
+                            std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1]
+                                                + intensity);
                     }
                 };
 
@@ -1786,36 +1572,63 @@ int main(int argc, char* argv[]) {
                 Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView());
 
                 {
-                    for(int i = 1;i < img_width;i++){
-                        for(int j = 1;j < img_height;j++){
-                            int i_remap = (double(i) / (img_width  - 1)) * (fdtd_state.nr_global[2] - 4) + 2;
-                            int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2;
-                            if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){
-                                if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){
-                                    ippl::Vector<scalar, 3> E = eh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
-                                    ippl::Vector<scalar, 3> B = bh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]);
-                                    
+                    for (int i = 1; i < img_width; i++) {
+                        for (int j = 1; j < img_height; j++) {
+                            int i_remap =
+                                (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2;
+                            int j_remap =
+                                (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2;
+                            if (i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]) {
+                                if (j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]) {
+                                    ippl::Vector<scalar, 3> E = eh(j_remap + 1 - ldom.first()[0],
+                                                                   fdtd_state.nr_global[1] / 2,
+                                                                   i_remap + 1 - ldom.first()[2]);
+                                    ippl::Vector<scalar, 3> B = bh(j_remap + 1 - ldom.first()[0],
+                                                                   fdtd_state.nr_global[1] / 2,
+                                                                   i_remap + 1 - ldom.first()[2]);
+
                                     ippl::Vector<scalar, 3> poynting = E.cross(B);
-                                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> eblab =  frame_boost.inverse_transform_EB(
-                                        Kokkos::make_pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>(E, B)
-                                    );
-                                    ippl::Vector<scalar, 3> poyntinglab = eblab.first.cross(eblab.second);
+                                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+                                        eblab = frame_boost.inverse_transform_EB(
+                                            Kokkos::make_pair<ippl::Vector<scalar, 3>,
+                                                              ippl::Vector<scalar, 3>>(E, B));
+                                    ippl::Vector<scalar, 3> poyntinglab =
+                                        eblab.first.cross(eblab.second);
                                     poynting = poyntinglab;
-                                    if(poynting.norm() > 0){
-                                        //std::cout << poynting.norm() << "\n";
+                                    if (poynting.norm() > 0) {
+                                        // std::cout << poynting.norm() << "\n";
                                     }
-                                    float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f;
-                                    int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f));
-                                    imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
-                                    imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
-                                    imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    float normalized_colorscale_value =
+                                        std::sqrt(poynting.norm()) * 0.00001f;
+                                    int index = (int)std::max(
+                                        0.0f,
+                                        std::min(normalized_colorscale_value * 255.0f, 255.0f));
+                                    imagedata[(j * img_width + i) * 3 + 0] +=
+                                        turbo_cm[index][0]
+                                        * 255.0f;  // * std::min(normalized_colorscale_value *
+                                                   // 100.0f + 50.0f, 150.0f);//(unsigned
+                                                   // char)(std::min(255u, (unsigned int)(0.5f *
+                                                   // std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 1] +=
+                                        turbo_cm[index][1]
+                                        * 255.0f;  // * std::min(normalized_colorscale_value *
+                                                   // 100.0f + 50.0f, 150.0f);//(unsigned
+                                                   // char)(std::min(255u, (unsigned int)(0.5f *
+                                                   // std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 2] +=
+                                        turbo_cm[index][2]
+                                        * 255.0f;  // * std::min(normalized_colorscale_value *
+                                                   // 100.0f + 50.0f, 150.0f);//(unsigned
+                                                   // char)(std::min(255u, (unsigned int)(0.5f *
+                                                   // std::sqrt(std::sqrt(poynting.squaredNorm())))));
                                 }
                             }
                         }
                     }
                 }
                 /*
-                 * This code should be replaced by the Image Gathering anyway, but that's a different PR 
+                 * This code should be replaced by the Image Gathering anyway, but that's a
+                 * different PR
                  */
                 int mask = 1;
                 while (mask < size) {
@@ -1823,36 +1636,41 @@ int main(int argc, char* argv[]) {
                     {
                         if ((rank & mask) == 0) {
                             // Send data to partner
-                            MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE);
+                            MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0,
+                                     ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE);
                             // Apply image summation
-                            for(int f = 0;f < floatcount;f++){
+                            for (int f = 0; f < floatcount; f++) {
                                 imagedata[f] += idata_recvbuffer[f];
                             }
                         } else {
-                            MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator());
+                            MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0,
+                                     ippl::Comm->getCommunicator());
                             // Receive data from partner and apply reduction
-                            
                         }
                     }
                     mask <<= 1;  // Move to next bit position for pairing
                 }
-                if(rank == 0){
+                if (rank == 0) {
                     char output[1024] = {0};
-                    
+
                     snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i);
-                    std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
-                    if(ffmpeg_file != nullptr)
+                    std::transform(imagedata, imagedata + img_height * img_width * 3,
+                                   imagedata_final, [](float x) {
+                                       return (unsigned char)std::min(255.0f, std::max(0.0f, x));
+                                   });
+                    if (ffmpeg_file != nullptr)
                         writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final);
                 }
                 delete[] imagedata;
                 delete[] idata_recvbuffer;
                 delete[] imagedata_final;
-            }    
+            }
         }
         uint64_t endtime = nanoTime();
-        if(ippl::Comm->rank() == 0)
-            std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl;
+        if (ippl::Comm->rank() == 0)
+            std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9
+                      << std::endl;
     }
-    //exit:
+    // exit:
     ippl::finalize();
 }
\ No newline at end of file
diff --git a/alpine/Undulator.h b/alpine/Undulator.h
new file mode 100644
index 000000000..981786b8e
--- /dev/null
+++ b/alpine/Undulator.h
@@ -0,0 +1,238 @@
+#ifndef UNDULATOR_H
+#define UNDULATOR_H
+#include <Kokkos_Core.hpp>
+
+#include <cmath>
+
+#include "Types/Vector.h"
+
+#include "units.h"
+namespace ippl {
+    /**
+     * @brief A template struct representing a uniaxial Lorentz frame.
+     *
+     * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis
+     * (default is the z-axis, axis 2). It includes methods for transforming vectors and
+     * electromagnetic fields between frames, as well as for computing gamma factors and beta
+     * velocities. The struct uses Kokkos for portability and performance.
+     *
+     * @tparam T The scalar type used for computations (e.g., float, double).
+     * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the
+     * z-axis).
+     */
+    template <typename T, unsigned axis = 2>
+    struct UniaxialLorentzframe {
+        /// Speed of light, set to 1 for natural units.
+        constexpr static T c = 1.0;
+        /// Alias for the scalar type used in the struct.
+        using scalar = T;
+        /// Alias for a 3-dimensional vector of the scalar type.
+        using Vector3 = ippl::Vector<T, 3>;
+
+        /// Beta velocity component along the specified axis.
+        scalar beta_m;
+        /// Product of gamma factor and beta velocity.
+        scalar gammaBeta_m;
+        /// Gamma factor for the Lorentz transformation.
+        scalar gamma_m;
+
+        /**
+         * @brief Create a UniaxialLorentzframe from a given gamma factor.
+         *
+         * This static member function constructs a UniaxialLorentzframe object using a given
+         * gamma factor. It computes the corresponding beta velocity and gamma*beta product.
+         *
+         * @param gamma The gamma factor for the Lorentz transformation.
+         * @return A UniaxialLorentzframe object with computed beta and gamma*beta.
+         */
+        KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) {
+            UniaxialLorentzframe ret;
+            ret.gamma_m      = gamma;
+            scalar beta      = Kokkos::sqrt(1 - double(1) / (gamma * gamma));
+            scalar gammabeta = gamma * beta;
+            ret.beta_m       = beta;
+            ret.gammaBeta_m  = gammabeta;
+            return ret;
+        }
+
+        /**
+         * @brief Get a UniaxialLorentzframe with negative beta velocity.
+         *
+         * This member function returns a new UniaxialLorentzframe object with the same gamma
+         * factor but with a negative beta velocity, effectively representing the inverse
+         * Lorentz transformation.
+         *
+         * @return A UniaxialLorentzframe object with negative beta velocity.
+         */
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe<T, axis> negative() const noexcept {
+            UniaxialLorentzframe ret;
+            ret.beta_m      = -beta_m;
+            ret.gammaBeta_m = -gammaBeta_m;
+            ret.gamma_m     = gamma_m;
+            return ret;
+        }
+
+        /// Default constructor.
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default;
+
+        /**
+         * @brief Construct a UniaxialLorentzframe from a gamma*beta value.
+         *
+         * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta
+         * value. It computes the corresponding beta velocity and gamma factor.
+         *
+         * @param gammaBeta The product of the gamma factor and beta velocity.
+         */
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) {
+            using Kokkos::sqrt;
+            gammaBeta_m = gammaBeta;
+            beta_m      = gammaBeta / sqrt(1 + gammaBeta * gammaBeta);
+            gamma_m     = sqrt(1 + gammaBeta * gammaBeta);
+        }
+
+        /**
+         * @brief Transform a spatial vector from the primed frame to the unprimed frame.
+         *
+         * This member function transforms a given spatial vector from the primed frame to the
+         * unprimed frame using the Lorentz transformation along the specified axis and the given
+         * time.
+         *
+         * @param arg The spatial vector to be transformed.
+         * @param time The time component for the transformation.
+         */
+        KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept {
+            arg[axis] = gamma_m * (arg[axis] + beta_m * time);
+        }
+
+        /**
+         * @brief Transform electric and magnetic fields from the unprimed to the primed frame.
+         *
+         * This member function transforms a pair of electric and magnetic field vectors from the
+         * unprimed frame to the primed frame using the Lorentz transformation along the specified
+         * axis.
+         *
+         * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the
+         * unprimed frame.
+         * @return A pair of vectors representing the electric and magnetic fields in the primed
+         * frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB) const noexcept {
+            Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
+            ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
+            ippl::Vector<scalar, 3> vnorm{axis == 0, axis == 1, axis == 2};
+            ret.first =
+                ippl::Vector<T, 3>(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m
+                - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
+            ret.second =
+                ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m
+                - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
+            ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
+            ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
+            return ret;
+        }
+
+        /**
+         * @brief Transform electric and magnetic fields from the primed to the unprimed frame.
+         *
+         * This member function transforms a pair of electric and magnetic field vectors from the
+         * primed frame to the unprimed frame using the inverse Lorentz transformation along the
+         * specified axis.
+         *
+         * @param primedEB A pair of vectors representing the electric and magnetic fields in the
+         * primed frame.
+         * @return A pair of vectors representing the electric and magnetic fields in the unprimed
+         * frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>
+        inverse_transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB) const noexcept {
+            return negative().transform_EB(primedEB);
+        }
+    };
+    template <typename scalar>
+    struct undulator_parameters {
+        scalar lambda_u;  // MITHRA: lambda_u
+        scalar K;         // Undulator parameter
+        scalar length;
+        scalar B_magnitude;
+        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length)
+            : lambda_u(lambda_u)
+            , K(K_undulator_parameter)
+            , length(_length) {
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K)
+                          / (electron_charge_in_unit_charges * lambda_u);
+        }
+    };
+    /**
+     * @brief Struct representing an undulator.
+     *
+     * @tparam scalar Type of the scalar values (e.g., float, double).
+     */
+    template <typename scalar>
+    struct Undulator {
+        undulator_parameters<scalar> uparams;  ///< Parameters of the undulator.
+        scalar distance_to_entry;              ///< Distance to the entry of the undulator.
+        scalar k_u;                            ///< Wavenumber of the undulator.
+
+        /**
+         * @brief Constructor to initialize undulator parameters and calculate k_u.
+         *
+         * @param p Parameters of the undulator.
+         * @param dte Distance to the entry of the undulator.
+         */
+        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte)
+            : uparams(p)
+            , distance_to_entry(dte)
+            , k_u(2 * M_PI / p.lambda_u) {}
+
+        /**
+         * @brief Overloaded operator() to compute magnetic field components.
+         *
+         * @param position_in_lab_frame Position vector in the lab frame.
+         * @return Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+         *         Pair containing magnetic field and its derivative.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+        operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame) const noexcept {
+            using Kokkos::cos;
+            using Kokkos::cosh;
+            using Kokkos::exp;
+            using Kokkos::sin;
+            using Kokkos::sinh;
+
+            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+                ret;             // Return pair containing magnetic field and its derivative.
+            ret.first.fill(0);   // Initialize magnetic field vector.
+            ret.second.fill(0);  // Initialize derivative vector.
+
+            // If the position is before the undulator entry.
+            if (position_in_lab_frame[2] < distance_to_entry) {
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator < 0);  // Ensure we are in the correct region.
+                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator)
+                                    * 0.5));  // Gaussian decay factor.
+
+                ret.second[0] = 0;  // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
+                                * z_in_undulator * k_u * scal;  // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
+                                * scal;  // z-component.
+            }
+            // If the position is within the undulator.
+            else if (position_in_lab_frame[2] > distance_to_entry
+                     && position_in_lab_frame[2] < distance_to_entry + uparams.length) {
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator >= 0);  // Ensure we are in the correct region.
+
+                ret.second[0] = 0;  // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
+                                * sin(k_u * z_in_undulator);  // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
+                                * cos(k_u * z_in_undulator);  // z-component.
+            }
+            return ret;
+        }
+    };
+}  // namespace ippl
+#endif  // UNDULATOR_H
\ No newline at end of file
diff --git a/alpine/units.h b/alpine/units.h
new file mode 100644
index 000000000..318892db8
--- /dev/null
+++ b/alpine/units.h
@@ -0,0 +1,37 @@
+#ifndef UNITS_HPP
+#define UNITS_HPP
+constexpr double sqrt_4pi = 3.54490770181103205459;
+constexpr double alpha_scaling_factor = 1e30;
+constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor;
+constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi;
+constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor;
+constexpr double electron_mass_in_kg = 9.1093837015e-31;
+constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor;
+constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
+constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg;
+constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters;
+constexpr double electron_charge_in_coulombs = 1.602176634e-19;
+constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs;
+
+constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges;
+constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds;
+constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses;
+constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
+
+constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges;
+constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges;
+constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges;
+constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds;
+constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes;
+constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters;
+constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters);
+constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters);
+constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters;
+constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor);
+constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts;
+constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters);
+constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes);
+constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds);
+constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G;
+constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons;
+#endif
\ No newline at end of file
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index 52ec14529..f9fb5fa5d 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -4,73 +4,38 @@
 using std::size_t;
 #include "Types/Vector.h"
 
-
-
 #include "FieldLayout/FieldLayout.h"
-#include "Meshes/UniformCartesian.h"
+#include "MaxwellSolvers/AbsorbingBC.h"
 #include "MaxwellSolvers/Maxwell.h"
+#include "Meshes/UniformCartesian.h"
 #include "Particle/ParticleBase.h"
-#include "MaxwellSolvers/AbsorbingBC.h"
-constexpr double sqrt_4pi = 3.54490770181103205459;
-constexpr double alpha_scaling_factor = 1e30;
-constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor;
-constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi;
-constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor;
-constexpr double electron_mass_in_kg = 9.1093837015e-31;
-constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor;
-constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
-constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg;
-constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters;
-constexpr double electron_charge_in_coulombs = 1.602176634e-19;
-constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs;
-
-constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges;
-constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds;
-constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses;
-constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
-
-constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges;
-constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges;
-constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges;
-constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds;
-constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes;
-constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters;
-constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters);
-constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters);
-constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters;
-constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor);
-constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts;
-constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters);
-constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes);
-constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds);
-constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G;
-constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons;
-
-constexpr double sq(double x){
+
+constexpr double sq(double x) {
     return x * x;
 }
-constexpr float sq(float x){
+constexpr float sq(float x) {
     return x * x;
 }
 #define assert_isreal(X) assert(!Kokkos::isnan(X) && !Kokkos::isinf(X))
 namespace ippl {
-    enum fdtd_bc{
-        periodic, absorbing
+    enum fdtd_bc {
+        periodic,
+        absorbing
     };
-    
+
     template <typename EMField, typename FourField, fdtd_bc boundary_conditions = periodic>
     class StandardFDTDSolver : Maxwell<EMField, FourField> {
-        public:
+    public:
         constexpr static unsigned Dim = EMField::dim;
-        using scalar = typename EMField::value_type::value_type;
-        using Vector_t = Vector<typename EMField::value_type::value_type, Dim>;
-        using FourVector_t = typename FourField::value_type;
+        using scalar                  = typename EMField::value_type::value_type;
+        using Vector_t                = Vector<typename EMField::value_type::value_type, Dim>;
+        using FourVector_t            = typename FourField::value_type;
         StandardFDTDSolver(FourField& source, EMField& E, EMField& B) {
             Maxwell<EMField, FourField>::setSource(source);
             Maxwell<EMField, FourField>::setEMFields(E, B);
             initialize();
         }
-        virtual void solve()override{
+        virtual void solve() override {
             step();
             timeShift();
             evaluate_EB();
@@ -87,128 +52,134 @@ namespace ippl {
                 (void)x;
             };
             bcsetter(std::make_index_sequence<Dim * 2>{});
-            A_n  .setFieldBC(vector_bcs);
+            A_n.setFieldBC(vector_bcs);
             A_np1.setFieldBC(vector_bcs);
             A_nm1.setFieldBC(vector_bcs);
         }
 
-
         FourField A_n;
         FourField A_np1;
         FourField A_nm1;
         scalar dt;
 
-        private:
-        void timeShift(){
-
-            //Look into this, maybe cyclic swap is better
+    private:
+        void timeShift() {
+            // Look into this, maybe cyclic swap is better
             Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
             Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
         }
-        void applyBCs(){
-            if constexpr(boundary_conditions == periodic){
+        void applyBCs() {
+            if constexpr (boundary_conditions == periodic) {
                 A_n.getFieldBC().apply(A_n);
                 A_nm1.getFieldBC().apply(A_nm1);
                 A_np1.getFieldBC().apply(A_np1);
-            }
-            else{
+            } else {
                 Vector<uint32_t, Dim> true_nr = nr_m;
                 true_nr += (A_n.getNghost() * 2);
                 second_order_mur_boundary_conditions bcs{};
                 bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
             }
         }
-        public:
-        void step(){
-            const auto& ldom    = layout_mp->getLocalNDIndex();
-            const int nghost    = A_n.getNghost();
-            const auto aview    = A_n  .getView();
-            const auto anp1view = A_np1.getView();
-            const auto anm1view = A_nm1.getView();
+
+    public:
+        void step() {
+            const auto& ldom       = layout_mp->getLocalNDIndex();
+            const int nghost       = A_n.getNghost();
+            const auto aview       = A_n.getView();
+            const auto anp1view    = A_np1.getView();
+            const auto anm1view    = A_nm1.getView();
             const auto source_view = Maxwell<EMField, FourField>::source_mp->getView();
 
-            const scalar a1 = scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2]));
-            const scalar a2 = sq(dt / hr_m[0]);
-            const scalar a4 = sq(dt / hr_m[1]);
-            const scalar a6 = sq(dt / hr_m[2]);
-            const scalar a8 = sq(dt);
+            const scalar a1 =
+                scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2]));
+            const scalar a2               = sq(dt / hr_m[0]);
+            const scalar a4               = sq(dt / hr_m[1]);
+            const scalar a6               = sq(dt / hr_m[2]);
+            const scalar a8               = sq(dt);
             Vector<uint32_t, Dim> true_nr = nr_m;
             true_nr += (nghost * 2);
-            constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0;
+            constexpr uint32_t one_if_absorbing_otherwise_0 =
+                boundary_conditions == absorbing ? 1 : 0;
             Kokkos::parallel_for(
-            "Four potential update", ippl::getRangePolicy(aview, nghost),
-            KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
-                // global indices
-                const uint32_t ig = i + ldom.first()[0];
-                const uint32_t jg = j + ldom.first()[1];
-                const uint32_t kg = k + ldom.first()[2];
-                uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
-                             + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
-                
-                if(val == 0){
-                    FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k)
-                                      + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k))
-                                      + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k))
-                                      + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1))
-                                      + a8 * (-source_view(i, j, k));
-                    anp1view(i, j, k) = interior;
-                }
-                else{
-                    //std::cout << i << ", " << j << ", " << k << "\n";
-                }
-            });
+                "Four potential update", ippl::getRangePolicy(aview, nghost),
+                KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
+                    // global indices
+                    const uint32_t ig = i + ldom.first()[0];
+                    const uint32_t jg = j + ldom.first()[1];
+                    const uint32_t kg = k + ldom.first()[2];
+                    uint32_t val =
+                        uint32_t(ig == one_if_absorbing_otherwise_0)
+                        + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1)
+                        + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
+                        + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3)
+                        + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4)
+                        + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
+
+                    if (val == 0) {
+                        FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k)
+                                                + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k))
+                                                + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k))
+                                                + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1))
+                                                + a8 * (-source_view(i, j, k));
+                        anp1view(i, j, k) = interior;
+                    } else {
+                        // std::cout << i << ", " << j << ", " << k << "\n";
+                    }
+                });
             Kokkos::fence();
             applyBCs();
             A_np1.fillHalo();
         }
-        void evaluate_EB(){
+        void evaluate_EB() {
             ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
-            const scalar idt = scalar(1.0) / dt;
-            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView();
+            const scalar idt                          = scalar(1.0) / dt;
+            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(),
+                 A_nm1  = this->A_nm1.getView();
             auto source = Maxwell<EMField, FourField>::source_mp->getView();
             auto Eview  = Maxwell<EMField, FourField>::En_mp->getView();
             auto Bview  = Maxwell<EMField, FourField>::Bn_mp->getView();
 
-            Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
-                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
-                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
-                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
-
-                ippl::Vector<scalar, 3> grad_phi{
-                    dAdx[0], dAdy[0], dAdz[0]
-                };
-                ippl::Vector<scalar, 3> curlA{
-                    dAdy[3] - dAdz[2],
-                    dAdz[1] - dAdx[3],
-                    dAdx[2] - dAdy[1],
-                };
-                Eview(i,j,k) = -dAdt - grad_phi;
-                Bview(i,j,k) = curlA;
-            });
+            Kokkos::parallel_for(
+                this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) {
+                    ippl::Vector<scalar, 3> dAdt =
+                        (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                    ippl::Vector<scalar, 4> dAdx =
+                        (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                    ippl::Vector<scalar, 4> dAdy =
+                        (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                    ippl::Vector<scalar, 4> dAdz =
+                        (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                    ippl::Vector<scalar, 3> grad_phi{dAdx[0], dAdy[0], dAdz[0]};
+                    ippl::Vector<scalar, 3> curlA{
+                        dAdy[3] - dAdz[2],
+                        dAdz[1] - dAdx[3],
+                        dAdx[2] - dAdy[1],
+                    };
+                    Eview(i, j, k) = -dAdt - grad_phi;
+                    Bview(i, j, k) = curlA;
+                });
             Kokkos::fence();
         }
 
-
         bool periodic_bc;
-        
+
         typename FourField::Mesh_t* mesh_mp;
         FieldLayout<Dim>* layout_mp;
         NDIndex<Dim> domain_m;
         Vector_t hr_m;
-        
-        
+
         Vector<int, Dim> nr_m;
-        
+
         void initialize() {
-        // get layout and mesh
+            // get layout and mesh
             layout_mp = &(this->source_mp->getLayout());
             mesh_mp   = &(this->source_mp->get_mesh());
-            
+
             // get mesh spacing, domain, and mesh size
-            hr_m     = mesh_mp->getMeshSpacing();
-            dt = hr_m[0] / 2;
-            for (unsigned int i = 0; i < Dim; ++i){
+            hr_m = mesh_mp->getMeshSpacing();
+            dt   = hr_m[0] / 2;
+            for (unsigned int i = 0; i < Dim; ++i) {
                 dt = std::min(dt, hr_m[i] / 2);
             }
             domain_m = layout_mp->getDomain();
@@ -227,20 +198,20 @@ namespace ippl {
     };
     /**
      * @brief Nonstandard Finite-Difference Time-Domain
-     * 
-     * @tparam EMField 
-     * @tparam FourField 
+     *
+     * @tparam EMField
+     * @tparam FourField
      */
     template <typename EMField, typename FourField, fdtd_bc boundary_conditions = periodic>
     class NonStandardFDTDSolver : Maxwell<EMField, FourField> {
-        public:
+    public:
         constexpr static unsigned Dim = EMField::dim;
-        using scalar = typename EMField::value_type::value_type;
-        using Vector_t = Vector<typename EMField::value_type::value_type, Dim>;
-        using FourVector_t = typename FourField::value_type;
+        using scalar                  = typename EMField::value_type::value_type;
+        using Vector_t                = Vector<typename EMField::value_type::value_type, Dim>;
+        using FourVector_t            = typename FourField::value_type;
         NonStandardFDTDSolver(FourField& source, EMField& E, EMField& B) {
             auto hx = source.get_mesh().getMeshSpacing();
-            if((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1){
+            if ((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1) {
                 std::cerr << "Dispersion-free CFL condition not satisfiable\n";
                 std::abort();
             }
@@ -248,7 +219,7 @@ namespace ippl {
             Maxwell<EMField, FourField>::setEMFields(E, B);
             initialize();
         }
-        virtual void solve()override{
+        virtual void solve() override {
             step();
             timeShift();
             evaluate_EB();
@@ -268,116 +239,135 @@ namespace ippl {
                 (void)x;
             };
             bcsetter(std::make_index_sequence<Dim * 2>{});
-            A_n  .setFieldBC(vector_bcs);
+            A_n.setFieldBC(vector_bcs);
             A_np1.setFieldBC(vector_bcs);
             A_nm1.setFieldBC(vector_bcs);
         }
 
-
-        private:
-        void timeShift(){
-
-            //Look into this, maybe cyclic swap is better
+    private:
+        void timeShift() {
+            // Look into this, maybe cyclic swap is better
             Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
             Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
         }
 
-        void applyBCs(){
-            if constexpr(boundary_conditions == periodic){
+        void applyBCs() {
+            if constexpr (boundary_conditions == periodic) {
                 A_n.getFieldBC().apply(A_n);
                 A_nm1.getFieldBC().apply(A_nm1);
                 A_np1.getFieldBC().apply(A_np1);
-            }
-            else{
+            } else {
                 Vector<uint32_t, Dim> true_nr = nr_m;
                 true_nr += (A_n.getNghost() * 2);
                 second_order_mur_boundary_conditions bcs{};
                 bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
             }
         }
-        
-        public:
-        template<typename scalar>
-        struct nondispersive{
+
+    public:
+        template <typename scalar>
+        struct nondispersive {
             scalar a1;
             scalar a2;
             scalar a4;
             scalar a6;
             scalar a8;
         };
-        void step(){
-            const auto& ldom    = layout_mp->getLocalNDIndex();
-            const int nghost    = A_n.getNghost();
-            const auto aview    = A_n  .getView();
-            const auto anp1view = A_np1.getView();
-            const auto anm1view = A_nm1.getView();
+        void step() {
+            const auto& ldom       = layout_mp->getLocalNDIndex();
+            const int nghost       = A_n.getNghost();
+            const auto aview       = A_n.getView();
+            const auto anp1view    = A_np1.getView();
+            const auto anm1view    = A_nm1.getView();
             const auto source_view = Maxwell<EMField, FourField>::source_mp->getView();
 
             const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1])));
             nondispersive<scalar> ndisp{
-                .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])),
+                .a1 = 2
+                      * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2 * calA) * sq(dt / hr_m[1])
+                         - sq(dt / hr_m[2])),
                 .a2 = sq(dt / hr_m[0]),
                 .a4 = sq(dt / hr_m[1]),
                 .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]),
-                .a8 = sq(dt)
-            };
+                .a8 = sq(dt)};
             Vector<uint32_t, Dim> true_nr = nr_m;
             true_nr += (nghost * 2);
-            constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0;
-            Kokkos::parallel_for(ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                uint32_t ig = i + ldom.first()[0];
-                uint32_t jg = j + ldom.first()[1];
-                uint32_t kg = k + ldom.first()[2];
-                uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
-                             + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
-                
-                if(!val){
-                    anp1view(i, j, k) = -anm1view(i,j,k)
-                            + ndisp.a1 * aview(i,j,k)
-                            + ndisp.a2 * (calA * aview(i + 1, j, k - 1) + (1 - 2 * calA) * aview(i + 1, j, k) + calA * aview(i + 1, j, k + 1))
-                            + ndisp.a2 * (calA * aview(i - 1, j, k - 1) + (1 - 2 * calA) * aview(i - 1, j, k) + calA * aview(i - 1, j, k + 1))
-                            + ndisp.a4 * (calA * aview(i, j + 1, k - 1) + (1 - 2 * calA) * aview(i, j + 1, k) + calA * aview(i, j + 1, k + 1))
-                            + ndisp.a4 * (calA * aview(i, j - 1, k - 1) + (1 - 2 * calA) * aview(i, j - 1, k) + calA * aview(i, j - 1, k + 1))
-                            + ndisp.a6 * aview(i, j, k + 1) + ndisp.a6 * aview(i, j, k - 1) + ndisp.a8 * source_view(i, j, k);
-                }
-            });
+            constexpr uint32_t one_if_absorbing_otherwise_0 =
+                boundary_conditions == absorbing ? 1 : 0;
+            Kokkos::parallel_for(
+                ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) {
+                    uint32_t ig = i + ldom.first()[0];
+                    uint32_t jg = j + ldom.first()[1];
+                    uint32_t kg = k + ldom.first()[2];
+                    uint32_t val =
+                        uint32_t(ig == one_if_absorbing_otherwise_0)
+                        + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1)
+                        + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
+                        + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3)
+                        + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4)
+                        + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
+
+                    if (!val) {
+                        anp1view(i, j, k) = -anm1view(i, j, k) + ndisp.a1 * aview(i, j, k)
+                                            + ndisp.a2
+                                                  * (calA * aview(i + 1, j, k - 1)
+                                                     + (1 - 2 * calA) * aview(i + 1, j, k)
+                                                     + calA * aview(i + 1, j, k + 1))
+                                            + ndisp.a2
+                                                  * (calA * aview(i - 1, j, k - 1)
+                                                     + (1 - 2 * calA) * aview(i - 1, j, k)
+                                                     + calA * aview(i - 1, j, k + 1))
+                                            + ndisp.a4
+                                                  * (calA * aview(i, j + 1, k - 1)
+                                                     + (1 - 2 * calA) * aview(i, j + 1, k)
+                                                     + calA * aview(i, j + 1, k + 1))
+                                            + ndisp.a4
+                                                  * (calA * aview(i, j - 1, k - 1)
+                                                     + (1 - 2 * calA) * aview(i, j - 1, k)
+                                                     + calA * aview(i, j - 1, k + 1))
+                                            + ndisp.a6 * aview(i, j, k + 1)
+                                            + ndisp.a6 * aview(i, j, k - 1)
+                                            + ndisp.a8 * source_view(i, j, k);
+                    }
+                });
             Kokkos::fence();
             A_np1.fillHalo();
             applyBCs();
         }
-        void evaluate_EB(){
-            *(Maxwell<EMField, FourField>::En_mp) = typename EMField::value_type(0);
-            *(Maxwell<EMField, FourField>::Bn_mp) = typename EMField::value_type(0);
+        void evaluate_EB() {
+            *(Maxwell<EMField, FourField>::En_mp)     = typename EMField::value_type(0);
+            *(Maxwell<EMField, FourField>::Bn_mp)     = typename EMField::value_type(0);
             ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
-            const scalar idt = scalar(1.0) / dt;
-            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView();
+            const scalar idt                          = scalar(1.0) / dt;
+            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(),
+                 A_nm1  = this->A_nm1.getView();
             auto source = Maxwell<EMField, FourField>::source_mp->getView();
             auto Eview  = Maxwell<EMField, FourField>::En_mp->getView();
             auto Bview  = Maxwell<EMField, FourField>::Bn_mp->getView();
 
-            Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
-                ippl::Vector<scalar, 3> dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
-                ippl::Vector<scalar, 4> dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
-                ippl::Vector<scalar, 4> dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
-                ippl::Vector<scalar, 4> dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
-
-                ippl::Vector<scalar, 3> grad_phi{
-                    dAdx[0], dAdy[0], dAdz[0]
-                };
-                ippl::Vector<scalar, 3> curlA{
-                    dAdy[3] - dAdz[2],
-                    dAdz[1] - dAdx[3],
-                    dAdx[2] - dAdy[1],
-                };
-                Eview(i,j,k) = -dAdt - grad_phi;
-                Bview(i,j,k) = curlA;
-            });
+            Kokkos::parallel_for(
+                this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) {
+                    ippl::Vector<scalar, 3> dAdt =
+                        (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                    ippl::Vector<scalar, 4> dAdx =
+                        (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                    ippl::Vector<scalar, 4> dAdy =
+                        (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                    ippl::Vector<scalar, 4> dAdz =
+                        (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                    ippl::Vector<scalar, 3> grad_phi{dAdx[0], dAdy[0], dAdz[0]};
+                    ippl::Vector<scalar, 3> curlA{
+                        dAdy[3] - dAdz[2],
+                        dAdz[1] - dAdx[3],
+                        dAdx[2] - dAdy[1],
+                    };
+                    Eview(i, j, k) = -dAdt - grad_phi;
+                    Bview(i, j, k) = curlA;
+                });
             Kokkos::fence();
         }
 
-
-
-        
         typename FourField::Mesh_t* mesh_mp;
         FieldLayout<Dim>* layout_mp;
         NDIndex<Dim> domain_m;
@@ -385,15 +375,15 @@ namespace ippl {
         scalar dt;
         Vector<int, Dim> nr_m;
         bool periodic_bc;
-        
+
         void initialize() {
-        // get layout and mesh
+            // get layout and mesh
             layout_mp = &(this->source_mp->getLayout());
             mesh_mp   = &(this->source_mp->get_mesh());
-            
+
             // get mesh spacing, domain, and mesh size
             hr_m     = mesh_mp->getMeshSpacing();
-            dt = hr_m[2];
+            dt       = hr_m[2];
             domain_m = layout_mp->getDomain();
             for (unsigned int i = 0; i < Dim; ++i)
                 nr_m[i] = domain_m[i].length();
@@ -408,22 +398,23 @@ namespace ippl {
             A_np1 = 0.0;
         };
     };
-    
+
     template <typename _scalar, class PLayout>
-    struct  Bunch : public ippl::ParticleBase<PLayout> {
+    struct Bunch : public ippl::ParticleBase<PLayout> {
         using scalar = _scalar;
 
         // Constructor for the Bunch class, taking a PLayout reference
         Bunch(PLayout& playout)
             : ippl::ParticleBase<PLayout>(playout) {
             // Add attributes to the particle bunch
-            this->addAttribute(Q);          // Charge attribute
-            this->addAttribute(mass);       // Mass attribute
-            this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta)
-            this->addAttribute(R_np1);      // Position attribute for the next time step
-            this->addAttribute(R_nm1);      // Position attribute for the next time step
-            this->addAttribute(E_gather);   // Electric field attribute for particle gathering
-            this->addAttribute(B_gather);   // Magnedit field attribute for particle gathering
+            this->addAttribute(Q);     // Charge attribute
+            this->addAttribute(mass);  // Mass attribute
+            this->addAttribute(
+                gamma_beta);  // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(R_np1);     // Position attribute for the next time step
+            this->addAttribute(R_nm1);     // Position attribute for the next time step
+            this->addAttribute(E_gather);  // Electric field attribute for particle gathering
+            this->addAttribute(B_gather);  // Magnedit field attribute for particle gathering
         }
 
         // Destructor for the Bunch class
@@ -433,54 +424,53 @@ namespace ippl {
         using charge_container_type   = ippl::ParticleAttrib<scalar>;
         using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
         using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
-        using vector4_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+        using vector4_container_type  = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
 
         // Declare instances of the attribute containers
-        charge_container_type Q;          // Charge container
-        charge_container_type mass;       // Mass container
-        velocity_container_type gamma_beta; // Gamma-beta container
-        typename ippl::ParticleBase<PLayout>::particle_position_type R_np1; // Position container for the next time step
-        typename ippl::ParticleBase<PLayout>::particle_position_type R_nm1; // Position container for the previous time step
-        ippl::ParticleAttrib<ippl::Vector<scalar, 3>> E_gather;   // Electric field container for particle gathering
-        ippl::ParticleAttrib<ippl::Vector<scalar, 3>> B_gather;   // Magnetio field container for particle gathering
-
+        charge_container_type Q;             // Charge container
+        charge_container_type mass;          // Mass container
+        velocity_container_type gamma_beta;  // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_np1;  // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_nm1;  // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
+            E_gather;  // Electric field container for particle gathering
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
+            B_gather;  // Magnetio field container for particle gathering
     };
-    template<typename T>
-    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos){
-        //return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5}, ippl::Vector<T, 3>{0,0,0}};
-        //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
+    template <typename T>
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(
+        const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos) {
+        // return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5},
+        // ippl::Vector<T, 3>{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing *
+        // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
         Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
-        //pos -= spacing * T(0.5);
-        ippl::Vector<T, 3> relpos = pos - origin;
+        // pos -= spacing * T(0.5);
+        ippl::Vector<T, 3> relpos  = pos - origin;
         ippl::Vector<T, 3> gridpos = relpos / hr;
         ippl::Vector<int, 3> ipos;
         ipos = gridpos.template cast<int>();
         ippl::Vector<T, 3> fracpos;
-        for(unsigned k = 0;k < 3;k++){
+        for (unsigned k = 0; k < 3; k++) {
             fracpos[k] = gridpos[k] - (int)ipos[k];
         }
-        //TODO: NGHOST!!!!!!!
+        // TODO: NGHOST!!!!!!!
         ipos += ippl::Vector<int, 3>(1);
-        ret.first = ipos;
+        ret.first  = ipos;
         ret.second = fracpos;
         return ret;
     }
-    template<typename view_type, typename scalar>
-    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const scalar value){
+    template <typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                       const ippl::Vector<scalar, 3>& pos, const scalar value) {
         auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
         ipos -= ldom.first();
-        //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
-        if(
-            ipos[0] < 0
-            ||ipos[1] < 0
-            ||ipos[2] < 0
-            ||size_t(ipos[0]) >= view.extent(0) - 1
-            ||size_t(ipos[1]) >= view.extent(1) - 1
-            ||size_t(ipos[2]) >= view.extent(2) - 1
-            ||fracpos[0] < 0
-            ||fracpos[1] < 0
-            ||fracpos[2] < 0
-        ){
+        // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
+        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
             return;
         }
         assert(fracpos[0] >= 0.0f);
@@ -498,10 +488,10 @@ namespace ippl {
         assert(one_minus_fracpos[2] <= 1.0f);
         scalar accum = 0;
 
-        for(unsigned i = 0;i < 8;i++){
-            scalar weight = 1;
+        for (unsigned i = 0; i < 8; i++) {
+            scalar weight               = 1;
             ippl::Vector<int, 3> ipos_l = ipos;
-            for(unsigned d = 0;d < 3;d++){
+            for (unsigned d = 0; d < 3; d++) {
                 weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
                 ipos_l[d] += !!(i & (1 << d));
             }
@@ -512,23 +502,18 @@ namespace ippl {
         }
         assert(abs(accum - 1.0f) < 1e-6f);
     }
-    template<typename view_type, typename scalar>
-    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig, const ippl::Vector<scalar, 3>& pos, const ippl::Vector<scalar, 3>& value){
-        //std::cout << "Value: " << value << "\n";
+    template <typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                       const ippl::Vector<scalar, 3>& pos,
+                                       const ippl::Vector<scalar, 3>& value) {
+        // std::cout << "Value: " << value << "\n";
         auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
         ipos -= ldom.first();
-        if(
-            ipos[0] < 0
-            ||ipos[1] < 0
-            ||ipos[2] < 0
-            ||size_t(ipos[0]) >= view.extent(0) - 1
-            ||size_t(ipos[1]) >= view.extent(1) - 1
-            ||size_t(ipos[2]) >= view.extent(2) - 1
-            ||fracpos[0] < 0
-            ||fracpos[1] < 0
-            ||fracpos[2] < 0
-        ){
-            //std::cout << "out of bounds\n";
+        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+            // std::cout << "out of bounds\n";
             return;
         }
         assert(fracpos[0] >= 0.0f);
@@ -546,10 +531,10 @@ namespace ippl {
         assert(one_minus_fracpos[2] <= 1.0f);
         scalar accum = 0;
 
-        for(unsigned i = 0;i < 8;i++){
-            scalar weight = 1;
+        for (unsigned i = 0; i < 8; i++) {
+            scalar weight               = 1;
             ippl::Vector<int, 3> ipos_l = ipos;
-            for(unsigned d = 0;d < 3;d++){
+            for (unsigned d = 0; d < 3; d++) {
                 weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
                 ipos_l[d] += !!(i & (1 << d));
             }
@@ -561,52 +546,71 @@ namespace ippl {
         }
         assert(abs(accum - 1.0f) < 1e-6f);
     }
-    template<typename view_type, typename scalar>
-    KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> origin, const ippl::Vector<scalar, 3>& from, const ippl::Vector<scalar, 3>& to, const scalar factor){ 
-
-
-        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid = gridCoordinatesOf(hr, origin, from);
-        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid   = gridCoordinatesOf(hr, origin, to  );
-        //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]);
-        for(int d = 0;d < 3;d++){
-            //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
-            //    std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " << from_grid.first << " " << to_grid.first << std::endl;
-            //}
-            //assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
+    template <typename view_type, typename scalar>
+    KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview,
+                                                  ippl::Vector<scalar, 3> hr,
+                                                  ippl::Vector<scalar, 3> origin,
+                                                  const ippl::Vector<scalar, 3>& from,
+                                                  const ippl::Vector<scalar, 3>& to,
+                                                  const scalar factor) {
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid =
+            gridCoordinatesOf(hr, origin, from);
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid =
+            gridCoordinatesOf(hr, origin, to);
+        // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1],
+        // from_grid.second[2]);
+        for (int d = 0; d < 3; d++) {
+            // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
+            //     std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " <<
+            //     from_grid.first << " " << to_grid.first << std::endl;
+            // }
+            // assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
         }
-        //const uint32_t nghost = g.nghost();
-        //from_ipos += ippl::Vector<int, 3>(nghost);
-        //to_ipos += ippl::Vector<int, 3>(nghost);
+        // const uint32_t nghost = g.nghost();
+        // from_ipos += ippl::Vector<int, 3>(nghost);
+        // to_ipos += ippl::Vector<int, 3>(nghost);
 
-        if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){
-            scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)), ippl::Vector<scalar, 3>((to - from) * factor));
+        if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1]
+            && from_grid.first[2] == to_grid.first[2]) {
+            scatterToGrid(ldom, Jview, hr, origin,
+                          ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
+                          ippl::Vector<scalar, 3>((to - from) * factor));
 
             return;
         }
         ippl::Vector<scalar, 3> relay;
-        const int nghost = 1;
+        const int nghost                   = 1;
         const ippl::Vector<scalar, 3> orig = origin;
         using Kokkos::max;
         using Kokkos::min;
         for (unsigned int i = 0; i < 3; i++) {
-            relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i],
-                           max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i],
+            relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                               + scalar(1.0) * hr[i] + orig[i],
+                           max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                                   + scalar(0.0) * hr[i] + orig[i],
                                scalar(0.5) * (to[i] + from[i])));
         }
-        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)), ippl::Vector<scalar, 3>((relay - from) * factor));
-        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5))  , ippl::Vector<scalar, 3>((to - relay) * factor));
+        scatterToGrid(ldom, Jview, hr, origin,
+                      ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((relay - from) * factor));
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((to - relay) * factor));
     }
-    template<typename scalar, fdtd_bc boundary_conditions>
-    class NSFDSolverWithParticles{
-        public:
+    template <typename scalar, fdtd_bc boundary_conditions>
+    class NSFDSolverWithParticles {
+    public:
         constexpr static unsigned dim = 3;
-        using vector_type = ippl::Vector<scalar, 3>;
-        using vector4_type = ippl::Vector<scalar, 4>;
-        using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using vector_type             = ippl::Vector<scalar, 3>;
+        using vector4_type            = ippl::Vector<scalar, 4>;
+        using FourField =
+            ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>,
+                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using ThreeField =
+            ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>,
+                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
         using playout_type = ParticleSpatialLayout<scalar, 3>;
-        using bunch_type = Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
-        using Mesh_t = ippl::UniformCartesian<scalar, dim>;
+        using bunch_type   = Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+        using Mesh_t       = ippl::UniformCartesian<scalar, dim>;
         FieldLayout<dim>* layout_mp;
         Mesh_t* mesh_mp;
         playout_type playout;
@@ -619,138 +623,158 @@ namespace ippl {
         ippl::Vector<uint32_t, 3> nr_global;
         ippl::Vector<scalar, 3> hr_m;
         size_t steps_taken;
-        NSFDSolverWithParticles(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles) : layout_mp(&layout), mesh_mp(&mesch), playout(layout, mesch), particles(playout), J(mesch, layout), E(mesch, layout), B(mesch, layout), field_solver(J, E, B){
+        NSFDSolverWithParticles(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles)
+            : layout_mp(&layout)
+            , mesh_mp(&mesch)
+            , playout(layout, mesch)
+            , particles(playout)
+            , J(mesch, layout)
+            , E(mesch, layout)
+            , B(mesch, layout)
+            , field_solver(J, E, B) {
             particles.create(nparticles);
             nr_global = ippl::Vector<uint32_t, 3>{
                 uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
                 uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
-                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)
-            };
-            hr_m = mesh_mp->getMeshSpacing();
+                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)};
+            hr_m        = mesh_mp->getMeshSpacing();
             steps_taken = 0;
-            //field_solver.setEMFields(E, B);
+            // field_solver.setEMFields(E, B);
         }
-        template<bool space_charge = false>
-        void scatterBunch(){
-            //ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
-            auto hr_m = mesh_mp->getMeshSpacing();
+        template <bool space_charge = false>
+        void scatterBunch() {
+            // ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
+            auto hr_m           = mesh_mp->getMeshSpacing();
             const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
             assert_isreal(volume);
             assert_isreal((scalar(1) / volume));
-            J = typename decltype(J)::value_type(0);
-            auto Jview = J.getView();
-            auto qview = particles.Q.getView();
-            auto rview = particles.R.getView();
-            auto rm1view = particles.R_nm1.getView();
-            auto orig = mesh_mp->getOrigin();
-            auto hr = mesh_mp->getMeshSpacing();
-            auto dt = field_solver.dt;
-            bool sc = space_charge;
+            J                       = typename decltype(J)::value_type(0);
+            auto Jview              = J.getView();
+            auto qview              = particles.Q.getView();
+            auto rview              = particles.R.getView();
+            auto rm1view            = particles.R_nm1.getView();
+            auto orig               = mesh_mp->getOrigin();
+            auto hr                 = mesh_mp->getMeshSpacing();
+            auto dt                 = field_solver.dt;
+            bool sc                 = space_charge;
             ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
-            Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                vector_type pos = rview(i);
-                vector_type to = rview(i);
-                vector_type from = rm1view(i);
-                if(sc){
-                    scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
-                }
-                scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt));
-            });
+            Kokkos::parallel_for(
+                particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                    vector_type pos  = rview(i);
+                    vector_type to   = rview(i);
+                    vector_type from = rm1view(i);
+                    if (sc) {
+                        scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
+                    }
+                    scatterLineToGrid(lDom, Jview, hr, orig, from, to,
+                                      scalar(qview(i)) / (volume * dt));
+                });
             Kokkos::fence();
             J.accumulateHalo();
         }
-        template<typename callable>
-        void updateBunch(scalar time, callable external_field){
-            
+        template <typename callable>
+        void updateBunch(scalar time, callable external_field) {
             Kokkos::fence();
-            auto gbview = particles.gamma_beta.getView();
-            auto eview = particles.E_gather.getView();
-            auto bview = particles.B_gather.getView();
-            auto qview = particles.Q.getView();
-            auto mview = particles.mass.getView();
-            auto rview = particles.R.getView();
-            auto rm1view = particles.R_nm1.getView();
-            auto rp1view = particles.R_np1.getView();
+            auto gbview     = particles.gamma_beta.getView();
+            auto eview      = particles.E_gather.getView();
+            auto bview      = particles.B_gather.getView();
+            auto qview      = particles.Q.getView();
+            auto mview      = particles.mass.getView();
+            auto rview      = particles.R.getView();
+            auto rm1view    = particles.R_nm1.getView();
+            auto rp1view    = particles.R_np1.getView();
             scalar bunch_dt = field_solver.dt / 3;
             Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
             E.fillHalo();
             B.fillHalo();
             Kokkos::fence();
-            for(int bts = 0;bts < 3;bts++){
-                
-                particles.E_gather.gather(E, particles.R, /*offset = */0.0);
-                particles.B_gather.gather(B, particles.R, /*offset = */0.0);
+            for (int bts = 0; bts < 3; bts++) {
+                particles.E_gather.gather(E, particles.R, /*offset = */ 0.0);
+                particles.B_gather.gather(B, particles.R, /*offset = */ 0.0);
                 Kokkos::fence();
-                Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
-                    const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
-                    ippl::Vector<scalar, 3> E_grid = eview(i);
-                    ippl::Vector<scalar, 3> B_grid = bview(i);
-                    //std::cout << "E_grid: " << E_grid << "\n";
-                    //std::cout << "B_grid: " << B_grid << "\n";
-                    ippl::Vector<scalar, 3> bunchpos = rview(i);
-                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb = external_field(bunchpos, time + bunch_dt * bts);
-                    
-                    ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{
-                        ippl::Vector<scalar, 3>(E_grid + external_eb.first), 
-                        ippl::Vector<scalar, 3>(B_grid + external_eb.second)
-                    };
-
-                    const scalar charge = qview(i);
-                    const scalar mass = mview(i);
-                    const ippl::Vector<scalar, 3> t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-                    const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
-                    const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
-                    const ippl::Vector<scalar, 3> t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0  + alpha * alpha * (EB[1].dot(EB[1])))));
-                    const ippl::Vector<scalar, 3> ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-
-                    rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
-                    gbview(i) = ngammabeta;
-                });
+                Kokkos::parallel_for(
+                    particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                        const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
+                        ippl::Vector<scalar, 3> E_grid           = eview(i);
+                        ippl::Vector<scalar, 3> B_grid           = bview(i);
+                        // std::cout << "E_grid: " << E_grid << "\n";
+                        // std::cout << "B_grid: " << B_grid << "\n";
+                        ippl::Vector<scalar, 3> bunchpos = rview(i);
+                        Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb =
+                            external_field(bunchpos, time + bunch_dt * bts);
+
+                        ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{
+                            ippl::Vector<scalar, 3>(E_grid + external_eb.first),
+                            ippl::Vector<scalar, 3>(B_grid + external_eb.second)};
+
+                        const scalar charge = qview(i);
+                        const scalar mass   = mview(i);
+                        const ippl::Vector<scalar, 3> t1 =
+                            pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+                        const scalar alpha =
+                            charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
+                        const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
+                        const ippl::Vector<scalar, 3> t3 =
+                            t1
+                            + t2.cross(scalar(2) * alpha
+                                       * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1])))));
+                        const ippl::Vector<scalar, 3> ngammabeta =
+                            t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+
+                        rview(i) =
+                            rview(i)
+                            + bunch_dt * ngammabeta
+                                  / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
+                        gbview(i) = ngammabeta;
+                    });
                 Kokkos::fence();
             }
             Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
             size_t invalid_count = 0;
-            auto origo = mesh_mp->getOrigin();
-            ippl::Vector<scalar, 3> extenz;//
+            auto origo           = mesh_mp->getOrigin();
+            ippl::Vector<scalar, 3> extenz;  //
             extenz[0] = nr_global[0] * hr_m[0];
             extenz[1] = nr_global[1] * hr_m[1];
             extenz[2] = nr_global[2] * hr_m[2];
             Kokkos::parallel_reduce(
-                Kokkos::RangePolicy<typename playout_type::RegionLayout_t::view_type::execution_space>(0, particles.getLocalNum()),
-                KOKKOS_LAMBDA(size_t i, size_t& ref){
-                    bool out_of_bounds = false;
+                Kokkos::RangePolicy<
+                    typename playout_type::RegionLayout_t::view_type::execution_space>(
+                    0, particles.getLocalNum()),
+                KOKKOS_LAMBDA(size_t i, size_t & ref) {
+                    bool out_of_bounds             = false;
                     ippl::Vector<scalar, dim> ppos = rview(i);
-                    for(size_t d = 0;d < dim;d++){
+                    for (size_t d = 0; d < dim; d++) {
                         out_of_bounds |= (ppos[d] <= origo[d]);
-                        out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain
+                        out_of_bounds |=
+                            (ppos[d] >= origo[d] + extenz[d]);  // Check against simulation domain
                     }
                     invalid(i) = out_of_bounds;
                     ref += out_of_bounds;
-                }, 
+                },
                 invalid_count);
             particles.destroy(invalid, invalid_count);
             Kokkos::fence();
             playout.update(particles);
-            
         }
-        void solve(){
+        void solve() {
             scatterBunch();
             field_solver.solve();
-            updateBunch(field_solver.dt * steps_taken, /*no external field*/[]KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/){return Kokkos::pair<vector_type, vector_type>{
-                vector_type(0),
-                vector_type(0)
-            };});
+            updateBunch(
+                field_solver.dt * steps_taken,
+                /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) {
+                    return Kokkos::pair<vector_type, vector_type>{vector_type(0), vector_type(0)};
+                });
             ++steps_taken;
         }
-        template<typename callable>
-        void solve(callable external_field){
+        template <typename callable>
+        void solve(callable external_field) {
             scatterBunch();
             field_solver.solve();
-            //std::cout << field_solver.dt * steps_taken << "\n";
+            // std::cout << field_solver.dt * steps_taken << "\n";
             updateBunch(field_solver.dt * steps_taken, external_field);
             ++steps_taken;
         }
     };
-}
+}  // namespace ippl
 
 #endif
\ No newline at end of file
diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp
index a48a829e1..d5d44a174 100644
--- a/test/solver/TestFDTDSolverWithParticles.cpp
+++ b/test/solver/TestFDTDSolverWithParticles.cpp
@@ -7,7 +7,7 @@ using std::size_t;
 #include "MaxwellSolvers/FDTD.h"
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.h>
-
+#include "../alpine/units.h"
 #include <Kokkos_Random.hpp>
 template<typename scalar1, typename... scalar>
     requires((std::is_floating_point_v<scalar1>))

From 7a803acfaec8ecb57a2afa065079dcad25b58a7a Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Fri, 24 May 2024 17:49:18 +0200
Subject: [PATCH 30/32] Move FEL stuff to separate directory, add matrix type
 and lorentz transforms

---
 CMakeLists.txt                        |   1 +
 alpine/CMakeLists.txt                 |   3 -
 alpine/Undulator.h                    | 238 -------------------
 fel/CMakeLists.txt                    |  13 ++
 {alpine => fel}/FreeElectronLaser.cpp |   0
 fel/LorentzTransform.h                | 318 ++++++++++++++++++++++++++
 fel/Undulator.h                       |  97 ++++++++
 {alpine => fel}/units.h               |   0
 src/Types/Matrix.h                    | 186 +++++++++++++++
 9 files changed, 615 insertions(+), 241 deletions(-)
 delete mode 100644 alpine/Undulator.h
 create mode 100644 fel/CMakeLists.txt
 rename {alpine => fel}/FreeElectronLaser.cpp (100%)
 create mode 100644 fel/LorentzTransform.h
 create mode 100644 fel/Undulator.h
 rename {alpine => fel}/units.h (100%)
 create mode 100644 src/Types/Matrix.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9ee3cc6c4..63f4f5060 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -234,6 +234,7 @@ if (ENABLE_ALPINE)
     endif ()
     message (STATUS "Enable Alpine")
     add_subdirectory (alpine)
+    add_subdirectory(fel)
 endif ()
 
 option (ENABLE_UNIT_TESTS "Enable unit tests" OFF)
diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt
index e2711a472..c045305a8 100644
--- a/alpine/CMakeLists.txt
+++ b/alpine/CMakeLists.txt
@@ -21,9 +21,6 @@ target_link_libraries (PenningTrap ${IPPL_LIBS})
 add_executable (LandauDamping LandauDamping.cpp)
 target_link_libraries (LandauDamping ${IPPL_LIBS})
 
-add_executable (FreeElectronLaser FreeElectronLaser.cpp)
-target_link_libraries (FreeElectronLaser ${IPPL_LIBS})
-
 add_executable (BumponTailInstability BumponTailInstability.cpp)
 target_link_libraries (BumponTailInstability ${IPPL_LIBS})
 
diff --git a/alpine/Undulator.h b/alpine/Undulator.h
deleted file mode 100644
index 981786b8e..000000000
--- a/alpine/Undulator.h
+++ /dev/null
@@ -1,238 +0,0 @@
-#ifndef UNDULATOR_H
-#define UNDULATOR_H
-#include <Kokkos_Core.hpp>
-
-#include <cmath>
-
-#include "Types/Vector.h"
-
-#include "units.h"
-namespace ippl {
-    /**
-     * @brief A template struct representing a uniaxial Lorentz frame.
-     *
-     * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis
-     * (default is the z-axis, axis 2). It includes methods for transforming vectors and
-     * electromagnetic fields between frames, as well as for computing gamma factors and beta
-     * velocities. The struct uses Kokkos for portability and performance.
-     *
-     * @tparam T The scalar type used for computations (e.g., float, double).
-     * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the
-     * z-axis).
-     */
-    template <typename T, unsigned axis = 2>
-    struct UniaxialLorentzframe {
-        /// Speed of light, set to 1 for natural units.
-        constexpr static T c = 1.0;
-        /// Alias for the scalar type used in the struct.
-        using scalar = T;
-        /// Alias for a 3-dimensional vector of the scalar type.
-        using Vector3 = ippl::Vector<T, 3>;
-
-        /// Beta velocity component along the specified axis.
-        scalar beta_m;
-        /// Product of gamma factor and beta velocity.
-        scalar gammaBeta_m;
-        /// Gamma factor for the Lorentz transformation.
-        scalar gamma_m;
-
-        /**
-         * @brief Create a UniaxialLorentzframe from a given gamma factor.
-         *
-         * This static member function constructs a UniaxialLorentzframe object using a given
-         * gamma factor. It computes the corresponding beta velocity and gamma*beta product.
-         *
-         * @param gamma The gamma factor for the Lorentz transformation.
-         * @return A UniaxialLorentzframe object with computed beta and gamma*beta.
-         */
-        KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) {
-            UniaxialLorentzframe ret;
-            ret.gamma_m      = gamma;
-            scalar beta      = Kokkos::sqrt(1 - double(1) / (gamma * gamma));
-            scalar gammabeta = gamma * beta;
-            ret.beta_m       = beta;
-            ret.gammaBeta_m  = gammabeta;
-            return ret;
-        }
-
-        /**
-         * @brief Get a UniaxialLorentzframe with negative beta velocity.
-         *
-         * This member function returns a new UniaxialLorentzframe object with the same gamma
-         * factor but with a negative beta velocity, effectively representing the inverse
-         * Lorentz transformation.
-         *
-         * @return A UniaxialLorentzframe object with negative beta velocity.
-         */
-        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe<T, axis> negative() const noexcept {
-            UniaxialLorentzframe ret;
-            ret.beta_m      = -beta_m;
-            ret.gammaBeta_m = -gammaBeta_m;
-            ret.gamma_m     = gamma_m;
-            return ret;
-        }
-
-        /// Default constructor.
-        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default;
-
-        /**
-         * @brief Construct a UniaxialLorentzframe from a gamma*beta value.
-         *
-         * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta
-         * value. It computes the corresponding beta velocity and gamma factor.
-         *
-         * @param gammaBeta The product of the gamma factor and beta velocity.
-         */
-        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) {
-            using Kokkos::sqrt;
-            gammaBeta_m = gammaBeta;
-            beta_m      = gammaBeta / sqrt(1 + gammaBeta * gammaBeta);
-            gamma_m     = sqrt(1 + gammaBeta * gammaBeta);
-        }
-
-        /**
-         * @brief Transform a spatial vector from the primed frame to the unprimed frame.
-         *
-         * This member function transforms a given spatial vector from the primed frame to the
-         * unprimed frame using the Lorentz transformation along the specified axis and the given
-         * time.
-         *
-         * @param arg The spatial vector to be transformed.
-         * @param time The time component for the transformation.
-         */
-        KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept {
-            arg[axis] = gamma_m * (arg[axis] + beta_m * time);
-        }
-
-        /**
-         * @brief Transform electric and magnetic fields from the unprimed to the primed frame.
-         *
-         * This member function transforms a pair of electric and magnetic field vectors from the
-         * unprimed frame to the primed frame using the Lorentz transformation along the specified
-         * axis.
-         *
-         * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the
-         * unprimed frame.
-         * @return A pair of vectors representing the electric and magnetic fields in the primed
-         * frame.
-         */
-        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(
-            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB) const noexcept {
-            Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
-            ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
-            ippl::Vector<scalar, 3> vnorm{axis == 0, axis == 1, axis == 2};
-            ret.first =
-                ippl::Vector<T, 3>(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m
-                - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
-            ret.second =
-                ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m
-                - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
-            ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
-            ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
-            return ret;
-        }
-
-        /**
-         * @brief Transform electric and magnetic fields from the primed to the unprimed frame.
-         *
-         * This member function transforms a pair of electric and magnetic field vectors from the
-         * primed frame to the unprimed frame using the inverse Lorentz transformation along the
-         * specified axis.
-         *
-         * @param primedEB A pair of vectors representing the electric and magnetic fields in the
-         * primed frame.
-         * @return A pair of vectors representing the electric and magnetic fields in the unprimed
-         * frame.
-         */
-        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>
-        inverse_transform_EB(
-            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB) const noexcept {
-            return negative().transform_EB(primedEB);
-        }
-    };
-    template <typename scalar>
-    struct undulator_parameters {
-        scalar lambda_u;  // MITHRA: lambda_u
-        scalar K;         // Undulator parameter
-        scalar length;
-        scalar B_magnitude;
-        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length)
-            : lambda_u(lambda_u)
-            , K(K_undulator_parameter)
-            , length(_length) {
-            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K)
-                          / (electron_charge_in_unit_charges * lambda_u);
-        }
-    };
-    /**
-     * @brief Struct representing an undulator.
-     *
-     * @tparam scalar Type of the scalar values (e.g., float, double).
-     */
-    template <typename scalar>
-    struct Undulator {
-        undulator_parameters<scalar> uparams;  ///< Parameters of the undulator.
-        scalar distance_to_entry;              ///< Distance to the entry of the undulator.
-        scalar k_u;                            ///< Wavenumber of the undulator.
-
-        /**
-         * @brief Constructor to initialize undulator parameters and calculate k_u.
-         *
-         * @param p Parameters of the undulator.
-         * @param dte Distance to the entry of the undulator.
-         */
-        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte)
-            : uparams(p)
-            , distance_to_entry(dte)
-            , k_u(2 * M_PI / p.lambda_u) {}
-
-        /**
-         * @brief Overloaded operator() to compute magnetic field components.
-         *
-         * @param position_in_lab_frame Position vector in the lab frame.
-         * @return Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
-         *         Pair containing magnetic field and its derivative.
-         */
-        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
-        operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame) const noexcept {
-            using Kokkos::cos;
-            using Kokkos::cosh;
-            using Kokkos::exp;
-            using Kokkos::sin;
-            using Kokkos::sinh;
-
-            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
-                ret;             // Return pair containing magnetic field and its derivative.
-            ret.first.fill(0);   // Initialize magnetic field vector.
-            ret.second.fill(0);  // Initialize derivative vector.
-
-            // If the position is before the undulator entry.
-            if (position_in_lab_frame[2] < distance_to_entry) {
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator < 0);  // Ensure we are in the correct region.
-                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator)
-                                    * 0.5));  // Gaussian decay factor.
-
-                ret.second[0] = 0;  // No x-component.
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
-                                * z_in_undulator * k_u * scal;  // y-component.
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
-                                * scal;  // z-component.
-            }
-            // If the position is within the undulator.
-            else if (position_in_lab_frame[2] > distance_to_entry
-                     && position_in_lab_frame[2] < distance_to_entry + uparams.length) {
-                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
-                assert(z_in_undulator >= 0);  // Ensure we are in the correct region.
-
-                ret.second[0] = 0;  // No x-component.
-                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
-                                * sin(k_u * z_in_undulator);  // y-component.
-                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
-                                * cos(k_u * z_in_undulator);  // z-component.
-            }
-            return ret;
-        }
-    };
-}  // namespace ippl
-#endif  // UNDULATOR_H
\ No newline at end of file
diff --git a/fel/CMakeLists.txt b/fel/CMakeLists.txt
new file mode 100644
index 000000000..dd8898dd7
--- /dev/null
+++ b/fel/CMakeLists.txt
@@ -0,0 +1,13 @@
+include_directories (
+    ${CMAKE_SOURCE_DIR}/src
+)
+
+link_directories (
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${Kokkos_DIR}/..
+)
+set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES})
+set (COMPILE_FLAGS ${OPAL_CXX_FLAGS})
+
+add_executable (FreeElectronLaser FreeElectronLaser.cpp)
+target_link_libraries (FreeElectronLaser ${IPPL_LIBS})
\ No newline at end of file
diff --git a/alpine/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp
similarity index 100%
rename from alpine/FreeElectronLaser.cpp
rename to fel/FreeElectronLaser.cpp
diff --git a/fel/LorentzTransform.h b/fel/LorentzTransform.h
new file mode 100644
index 000000000..876bb22a6
--- /dev/null
+++ b/fel/LorentzTransform.h
@@ -0,0 +1,318 @@
+#ifndef LORENTZ_TRANSFORM_H
+#define LORENTZ_TRANSFORM_H
+#include <Kokkos_Core.hpp>
+
+#include "Types/Matrix.h"
+#include "Types/Vector.h"
+namespace ippl {
+    /**
+     * @brief A template struct representing a uniaxial Lorentz frame.
+     *
+     * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis
+     * (default is the z-axis, axis 2). It includes methods for transforming vectors and
+     * electromagnetic fields between frames, as well as for computing gamma factors and beta
+     * velocities. The struct uses Kokkos for portability and performance.
+     *
+     * @tparam T The scalar type used for computations (e.g., float, double).
+     * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the
+     * z-axis).
+     */
+    template <typename T, unsigned axis = 2>
+    struct UniaxialLorentzframe {
+        /// Speed of light, set to 1 for natural units.
+        constexpr static T c = 1.0;
+        /// Alias for the scalar type used in the struct.
+        using scalar = T;
+        /// Alias for a 3-dimensional vector of the scalar type.
+        using Vector3 = ippl::Vector<T, 3>;
+
+        /// Beta velocity component along the specified axis.
+        scalar beta_m;
+        /// Product of gamma factor and beta velocity.
+        scalar gammaBeta_m;
+        /// Gamma factor for the Lorentz transformation.
+        scalar gamma_m;
+
+        /**
+         * @brief Create a UniaxialLorentzframe from a given gamma factor.
+         *
+         * This static member function constructs a UniaxialLorentzframe object using a given
+         * gamma factor. It computes the corresponding beta velocity and gamma*beta product.
+         *
+         * @param gamma The gamma factor for the Lorentz transformation.
+         * @return A UniaxialLorentzframe object with computed beta and gamma*beta.
+         */
+        KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) {
+            UniaxialLorentzframe ret;
+            ret.gamma_m      = gamma;
+            scalar beta      = Kokkos::sqrt(1 - double(1) / (gamma * gamma));
+            scalar gammabeta = gamma * beta;
+            ret.beta_m       = beta;
+            ret.gammaBeta_m  = gammabeta;
+            return ret;
+        }
+
+        /**
+         * @brief Get a UniaxialLorentzframe with negative beta velocity.
+         *
+         * This member function returns a new UniaxialLorentzframe object with the same gamma
+         * factor but with a negative beta velocity, effectively representing the inverse
+         * Lorentz transformation.
+         *
+         * @return A UniaxialLorentzframe object with negative beta velocity.
+         */
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe<T, axis> negative() const noexcept {
+            UniaxialLorentzframe ret;
+            ret.beta_m      = -beta_m;
+            ret.gammaBeta_m = -gammaBeta_m;
+            ret.gamma_m     = gamma_m;
+            return ret;
+        }
+
+        /// Default constructor.
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default;
+
+        /**
+         * @brief Construct a UniaxialLorentzframe from a gamma*beta value.
+         *
+         * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta
+         * value. It computes the corresponding beta velocity and gamma factor.
+         *
+         * @param gammaBeta The product of the gamma factor and beta velocity.
+         */
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) {
+            using Kokkos::sqrt;
+            gammaBeta_m = gammaBeta;
+            beta_m      = gammaBeta / sqrt(1 + gammaBeta * gammaBeta);
+            gamma_m     = sqrt(1 + gammaBeta * gammaBeta);
+        }
+
+        /**
+         * @brief Transform a spatial vector from the primed frame to the unprimed frame.
+         *
+         * This member function transforms a given spatial vector from the primed frame to the
+         * unprimed frame using the Lorentz transformation along the specified axis and the given
+         * time.
+         *
+         * @param arg The spatial vector to be transformed.
+         * @param time The time component for the transformation.
+         */
+        KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept {
+            arg[axis] = gamma_m * (arg[axis] + beta_m * time);
+        }
+
+        /**
+         * @brief Transform electric and magnetic fields from the unprimed to the primed frame.
+         *
+         * This member function transforms a pair of electric and magnetic field vectors from the
+         * unprimed frame to the primed frame using the Lorentz transformation along the specified
+         * axis.
+         *
+         * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the
+         * unprimed frame.
+         * @return A pair of vectors representing the electric and magnetic fields in the primed
+         * frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB) const noexcept {
+            Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
+            ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
+            ippl::Vector<scalar, 3> vnorm{axis == 0, axis == 1, axis == 2};
+            ret.first =
+                ippl::Vector<T, 3>(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m
+                - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
+            ret.second =
+                ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m
+                - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
+            ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
+            ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
+            return ret;
+        }
+
+        /**
+         * @brief Transform electric and magnetic fields from the primed to the unprimed frame.
+         *
+         * This member function transforms a pair of electric and magnetic field vectors from the
+         * primed frame to the unprimed frame using the inverse Lorentz transformation along the
+         * specified axis.
+         *
+         * @param primedEB A pair of vectors representing the electric and magnetic fields in the
+         * primed frame.
+         * @return A pair of vectors representing the electric and magnetic fields in the unprimed
+         * frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>
+        inverse_transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB) const noexcept {
+            return negative().transform_EB(primedEB);
+        }
+    };
+    /**
+     * @brief Represents a Lorentz frame with associated transformations and operations.
+     * 
+     * This template struct models a Lorentz frame used in special relativity to handle
+     * transformations between reference frames moving at a constant velocity relative to each other.
+     * The LorentzFrame supports various operations such as converting between frames and transforming
+     * vectors and electromagnetic fields.
+     * 
+     * @tparam T The type for scalar values, typically a floating-point type.
+     */
+    template <typename T>
+    struct LorentzFrame {
+        /// Speed of light in natural units (c = 1)
+        constexpr static T c = 1.0;
+    
+        /// Alias for the scalar type.
+        using scalar = T;
+    
+        /// Alias for a 3D vector of type T.
+        using Vector3 = ippl::Vector<T, 3>;
+    
+        /// Velocity vector (beta) in the frame, normalized by the speed of light.
+        ippl::Vector<T, 3> beta_m;
+    
+        /// Product of gamma and beta vectors.
+        ippl::Vector<T, 3> gammaBeta_m;
+    
+        /// Lorentz factor gamma.
+        T gamma_m;
+    
+        /**
+         * @brief Constructs a LorentzFrame from a given gammaBeta vector.
+         * 
+         * This constructor initializes the Lorentz frame using a vector that represents
+         * the product of the Lorentz factor gamma and the velocity vector beta.
+         * The velocity vector beta is then computed by normalizing this gammaBeta vector,
+         * and the Lorentz factor gamma is computed from the magnitude of gammaBeta.
+         * 
+         * @param gammaBeta The gammaBeta vector.
+         */
+        KOKKOS_INLINE_FUNCTION LorentzFrame(const ippl::Vector<T, 3>& gammaBeta) {
+            beta_m      = gammaBeta / sqrt(1 + gammaBeta.dot(gammaBeta));
+            gamma_m     = sqrt(1 + gammaBeta.dot(gammaBeta));
+            gammaBeta_m = gammaBeta;
+        }
+        /**
+         * @brief Constructs a uniaxial Lorentz frame given a gamma value and an axis.
+         * 
+         * This static member function creates a Lorentz frame that has motion along one of the principal
+         * axes (x, y, or z) based on the provided gamma value. The beta vector is calculated accordingly
+         * and aligned with the specified axis.
+         * 
+         * @tparam axis The axis along which the frame is moving ('x', 'y', or 'z').
+         * @param gamma The Lorentz factor gamma, which must be greater than or equal to 1.
+         * @return LorentzFrame A Lorentz frame with motion along the specified axis.
+         */
+        template <char axis>
+        static LorentzFrame uniaxialGamma(T gamma) {
+            static_assert(axis == 'x' || axis == 'y' || axis == 'z', "Only xyz axis suproted");
+            assert(gamma >= 1.0 && "Gamma must be >= 1");
+            using Kokkos::sqrt;
+
+            T beta = gamma == 1 ? T(0) : sqrt(gamma * gamma - 1) / gamma;
+            Vector3 arg{0, 0, 0};
+            arg[axis - 'x'] = gamma * beta;
+            return LorentzFrame<T>(arg);
+        }
+
+        /**
+         * @brief Constructs the Lorentz transformation matrix for converting from unprimed to primed frame.
+         * 
+         * This function computes the Lorentz transformation matrix that converts vectors
+         * from the unprimed reference frame to the primed reference frame. The transformation
+         * accounts for the relative velocity between the frames using the stored beta and gamma values.
+         * 
+         * If the magnitude of the beta vector is very small (less than 1e-10), the function returns
+         * an identity matrix, indicating no significant transformation.
+         * 
+         * @return matrix<T, 4, 4> The transformation matrix.
+         */
+        KOKKOS_INLINE_FUNCTION matrix<T, 4, 4> unprimedToPrimed() const noexcept {
+            T betaMagsq = beta_m.dot(beta_m);
+            using Kokkos::abs;
+            if (abs(betaMagsq) < 1e-10) {
+                return matrix<T, 4, 4>(T(1));
+            }
+            ippl::Vector<T, 3> betaSquared = beta_m * beta_m;
+
+            matrix<T, 4, 4> ret;
+
+            ret.data[0] = ippl::Vector<T, 4>{gamma_m, -gammaBeta_m[0], -gammaBeta_m[1], -gammaBeta_m[2]};
+            ret.data[1] = ippl::Vector<T, 4>{-gammaBeta_m[0], 1 + (gamma_m - 1) * betaSquared[0] / betaMagsq,
+                                   (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq,
+                                   (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq};
+            ret.data[2] = ippl::Vector<T, 4>{-gammaBeta_m[1],
+                                             (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq,
+                                             1 + (gamma_m - 1) * betaSquared[1] / betaMagsq,
+                                             (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq};
+            ret.data[3] = ippl::Vector<T, 4>{-gammaBeta_m[2],
+                                             (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq,
+                                             (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq,
+                                             1 + (gamma_m - 1) * betaSquared[2] / betaMagsq};
+            return ret;
+        }
+        /**
+         * @brief Constructs the Lorentz transformation matrix for converting from primed to unprimed frame.
+         * 
+         * This function computes the inverse of the Lorentz transformation matrix obtained
+         * from unprimedToPrimed(), allowing conversion of vectors from the primed reference frame
+         * back to the unprimed reference frame. It utilizes the matrix inversion function to achieve this.
+         * 
+         * @return matrix<T, 4, 4> The inverse transformation matrix. This could also be done by taking the negative velocity
+         */
+        KOKKOS_INLINE_FUNCTION matrix<T, 4, 4> primedToUnprimed() const noexcept {
+            return unprimedToPrimed().inverse();
+        }
+
+        KOKKOS_INLINE_FUNCTION Vector3 transformV(const Vector3& unprimedV) const noexcept {
+            T factor    = T(1.0) / (1.0 - unprimedV.dot(beta_m));
+            Vector3 ret = unprimedV * scalar(1.0 / gamma_m);
+            ret -= beta_m;
+            ret += beta_m * (unprimedV.dot(beta_m) * (gamma_m / (gamma_m + 1)));
+            return ret * factor;
+        }
+
+        KOKKOS_INLINE_FUNCTION Vector3 transformGammabeta(const Vector3& gammabeta) const noexcept {
+            using Kokkos::sqrt;
+            T gamma      = sqrt(T(1) + gammabeta.dot(gammabeta));
+            Vector3 beta = gammabeta;
+            beta /= gamma;
+            Vector3 betatrf = transformV(beta);
+            betatrf *= sqrt(1 - betatrf.dot(betatrf));
+            return betatrf;
+        }
+        /**
+         * @brief Transforms electric and magnetic fields from the unprimed to the primed frame.
+         * 
+         * This function applies the Lorentz transformation to a pair of 3D vectors representing
+         * the electric field (E) and magnetic field (B) in the unprimed reference frame,
+         * converting them to the corresponding fields in the primed reference frame. The transformation
+         * accounts for relativistic effects on the fields due to the relative motion of the frames.
+         * 
+         * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the unprimed frame.
+         * @return Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> A pair of transformed vectors representing the electric and magnetic fields in the primed frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB) const noexcept {
+            using Kokkos::sqrt;
+            Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
+            Vector3 vnorm = beta_m * (T(1.0) / sqrt(beta_m.dot(beta_m)));
+
+            ret.first =
+                (ippl::Vector<T, 3>(unprimedEB.first + beta_m.cross(unprimedEB.second)) * gamma_m)
+                - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
+            ret.second =
+                (ippl::Vector<T, 3>(unprimedEB.second - beta_m.cross(unprimedEB.first)) * gamma_m)
+                - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>
+        inverse_transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB) const noexcept {
+            ippl::Vector<T, 3> mgb(gammaBeta_m * scalar(-1.0));
+            return LorentzFrame<T>(mgb).transform_EB(primedEB);
+        }
+    };
+
+}  // namespace ippl
+#endif
diff --git a/fel/Undulator.h b/fel/Undulator.h
new file mode 100644
index 000000000..57cad337c
--- /dev/null
+++ b/fel/Undulator.h
@@ -0,0 +1,97 @@
+#ifndef UNDULATOR_H
+#define UNDULATOR_H
+#include <Kokkos_Core.hpp>
+
+#include <cmath>
+
+#include "Types/Vector.h"
+
+#include "units.h"
+#include "LorentzTransform.h"
+namespace ippl {
+    template <typename scalar>
+    struct undulator_parameters {
+        scalar lambda_u;  // MITHRA: lambda_u
+        scalar K;         // Undulator parameter
+        scalar length;
+        scalar B_magnitude;
+        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length)
+            : lambda_u(lambda_u)
+            , K(K_undulator_parameter)
+            , length(_length) {
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K)
+                          / (electron_charge_in_unit_charges * lambda_u);
+        }
+    };
+    /**
+     * @brief Struct representing an undulator.
+     *
+     * @tparam scalar Type of the scalar values (e.g., float, double).
+     */
+    template <typename scalar>
+    struct Undulator {
+        undulator_parameters<scalar> uparams;  ///< Parameters of the undulator.
+        scalar distance_to_entry;              ///< Distance to the entry of the undulator.
+        scalar k_u;                            ///< Wavenumber of the undulator.
+
+        /**
+         * @brief Constructor to initialize undulator parameters and calculate k_u.
+         *
+         * @param p Parameters of the undulator.
+         * @param dte Distance to the entry of the undulator.
+         */
+        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte)
+            : uparams(p)
+            , distance_to_entry(dte)
+            , k_u(2 * M_PI / p.lambda_u) {}
+
+        /**
+         * @brief Overloaded operator() to compute magnetic field components.
+         *
+         * @param position_in_lab_frame Position vector in the lab frame.
+         * @return Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+         *         Pair containing magnetic field and its derivative.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+        operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame) const noexcept {
+            using Kokkos::cos;
+            using Kokkos::cosh;
+            using Kokkos::exp;
+            using Kokkos::sin;
+            using Kokkos::sinh;
+
+            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+                ret;             // Return pair containing magnetic field and its derivative.
+            ret.first.fill(0);   // Initialize magnetic field vector.
+            ret.second.fill(0);  // Initialize derivative vector.
+
+            // If the position is before the undulator entry.
+            if (position_in_lab_frame[2] < distance_to_entry) {
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator < 0);  // Ensure we are in the correct region.
+                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator)
+                                    * 0.5));  // Gaussian decay factor.
+
+                ret.second[0] = 0;  // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
+                                * z_in_undulator * k_u * scal;  // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
+                                * scal;  // z-component.
+            }
+            // If the position is within the undulator.
+            else if (position_in_lab_frame[2] > distance_to_entry
+                     && position_in_lab_frame[2] < distance_to_entry + uparams.length) {
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator >= 0);  // Ensure we are in the correct region.
+
+                ret.second[0] = 0;  // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
+                                * sin(k_u * z_in_undulator);  // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
+                                * cos(k_u * z_in_undulator);  // z-component.
+            }
+            return ret;
+        }
+    };
+}  // namespace ippl
+#endif  // UNDULATOR_H
\ No newline at end of file
diff --git a/alpine/units.h b/fel/units.h
similarity index 100%
rename from alpine/units.h
rename to fel/units.h
diff --git a/src/Types/Matrix.h b/src/Types/Matrix.h
new file mode 100644
index 000000000..147286f99
--- /dev/null
+++ b/src/Types/Matrix.h
@@ -0,0 +1,186 @@
+#ifndef MATRIX_H
+#define MATRIX_H
+#include <Kokkos_Core.hpp>
+
+#include "Vector.h"
+namespace ippl {
+    /**
+     * @brief Column major
+     * 
+     * @tparam T Scalar type
+     * @tparam m Number of rows
+     * @tparam n Number of columns
+     */
+    template <typename T, int m, int n>
+    struct matrix {
+        
+        ippl::Vector<ippl::Vector<T, m>, n> data;
+        constexpr static bool squareMatrix = (m == n && m > 0);
+
+        constexpr KOKKOS_INLINE_FUNCTION matrix(T diag)
+            requires(m == n)
+        {
+            for (unsigned i = 0; i < n; i++) {
+                for (unsigned j = 0; j < n; j++) {
+                    data[i][j] = diag * T(i == j);
+                }
+            }
+        }
+        constexpr matrix() = default;
+        KOKKOS_INLINE_FUNCTION constexpr static matrix zero() {
+            matrix<T, m, n> ret;
+            for (unsigned i = 0; i < n; i++) {
+                for (unsigned j = 0; j < n; j++) {
+                    ret.data[i][j] = 0;
+                }
+            }
+            return ret;
+        };
+        KOKKOS_INLINE_FUNCTION void setZero() {
+            for (unsigned i = 0; i < n; i++) {
+                for (unsigned j = 0; j < n; j++) {
+                    data[i][j] = 0;
+                }
+            }
+        }
+
+        KOKKOS_INLINE_FUNCTION T operator()(int i, int j) const noexcept { return data[j][i]; }
+        KOKKOS_INLINE_FUNCTION T& operator()(int i, int j) noexcept { return data[j][i]; }
+        template <typename O>
+        KOKKOS_INLINE_FUNCTION matrix<O, m, n> cast() const noexcept {
+            matrix<O, m, n> ret;
+            for (unsigned i = 0; i < n; i++) {
+                ret.data[i] = data[i].template cast<O>();
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator+(const matrix<T, m, n>& other) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] + other.data[i];
+            }
+            return result;
+        }
+
+        // Implement matrix subtraction
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator-(const matrix<T, m, n>& other) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] - other.data[i];
+            }
+            return result;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator*(const T& factor) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] * factor;
+            }
+            return result;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator/(const T& divisor) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] / divisor;
+            }
+            return result;
+        }
+
+        // Implement matrix-vector multiplication
+        template <unsigned int other_m>
+        KOKKOS_INLINE_FUNCTION ippl::Vector<T, m> operator*(
+            const ippl::Vector<T, other_m>& vec) const {
+            static_assert((int)other_m == n);
+            ippl::Vector<T, m> result(0);
+            for (int i = 0; i < n; ++i) {
+                for(int j = 0;j < m;i++){
+                    //This could be a vector operation, but you never know with ippl::Vector
+                    result[j] += vec[i] * data[i][j];
+                }
+            }
+            return result;
+        }
+        template <int otherm, int othern>
+            requires(n == otherm)
+        KOKKOS_INLINE_FUNCTION matrix<T, m, othern> operator*(
+            const matrix<T, otherm, othern>& otherMat) const noexcept {
+            matrix<T, m, othern> ret(0);
+            for (int i = 0; i < m; i++) {
+                for (int j = 0; j < othern; j++) {
+                    for (int k = 0; k < n; k++) {
+                        ret(i, j) += (*this)(i, k) * otherMat(k, j);
+                    }
+                }
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION void addCol(int i, int j, T alpha = 1.0) {
+            data[j] += data[i] * alpha;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> inverse() const noexcept
+            requires(squareMatrix)
+        {
+            constexpr int N = m;
+
+            matrix<T, m, n> ret(1.0);
+            matrix<T, m, n> dis(*this);
+
+            for (int i = 0; i < N; i++) {
+                for (int j = i + 1; j < N; j++) {
+                    T alpha = -dis(i, j) / dis(i, i);
+                    dis.addCol(i, j, alpha);
+                    dis(i, j) = 0;
+                    ret.addCol(i, j, alpha);
+                }
+            }
+            for (int i = N - 1; i >= 0; i--) {
+                for (int j = i - 1; j >= 0; j--) {
+                    T alpha = -dis(i, j) / dis(i, i);
+                    dis.addCol(i, j, alpha);
+                    dis(i, j) = 0;
+                    ret.addCol(i, j, alpha);
+                }
+            }
+            for (int i = 0; i < N; i++) {
+                T d     = dis(i, i);
+                T oneod = T(1) / d;
+                dis.data[i] *= oneod;
+                ret.data[i] *= oneod;
+            }
+
+            return ret;
+        }
+
+        template <typename stream_t>
+        friend stream_t& operator<<(stream_t& str, const matrix<T, m, n>& mat) {
+            for (int i = 0; i < m; i++) {
+                for (int j = 0; j < n; j++) {
+                    str << mat.data[j][i] << " ";
+                }
+                str << "\n";
+            }
+            return str;
+        }
+    };
+    /**
+     * @brief Computes the outer product l r^T of two vectors
+     * 
+     * @tparam T Scalar type
+     * @tparam N Output rows
+     * @tparam M Output cols
+     * @param l left operand
+     * @param r right operand 
+     * @return KOKKOS_INLINE_FUNCTION the matrix l r^T 
+     */
+    template <typename T, unsigned N, unsigned M>
+    KOKKOS_INLINE_FUNCTION matrix<T, N, N> outer_product(const ippl::Vector<T, N>& l,
+                                                         const ippl::Vector<T, M>& r) {
+        matrix<T, N, M> ret;
+        for (unsigned i = 0; i < N; i++) {
+            for (unsigned j = 0; j < M; j++) {
+                ret.data[j][i] = l[i] * r[j];
+            }
+        }
+        return ret;
+    }
+}  // namespace ippl
+#endif
\ No newline at end of file

From b0ba9953fe462e06d1f05738f47ce1c6d295693d Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Wed, 19 Jun 2024 14:03:15 +0200
Subject: [PATCH 31/32] Move Particle stuff into fel/

---
 fel/FreeElectronLaser.cpp     |   2 +-
 fel/NSFDSolverWithParticles.h | 382 ++++++++++++++++++++++++++++++++++
 src/MaxwellSolvers/FDTD.h     | 377 ---------------------------------
 3 files changed, 383 insertions(+), 378 deletions(-)
 create mode 100644 fel/NSFDSolverWithParticles.h

diff --git a/fel/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp
index e2a0c8551..a33159c51 100644
--- a/fel/FreeElectronLaser.cpp
+++ b/fel/FreeElectronLaser.cpp
@@ -7,7 +7,7 @@
 
 #include "Field/Field.h"
 
-#include "MaxwellSolvers/FDTD.h"
+#include "NSFDSolverWithParticles.h"
 #define JSON_HAS_RANGES 0  // Merlin compilation
 #include <Kokkos_Random.hpp>
 #include <fstream>
diff --git a/fel/NSFDSolverWithParticles.h b/fel/NSFDSolverWithParticles.h
new file mode 100644
index 000000000..06aa2f748
--- /dev/null
+++ b/fel/NSFDSolverWithParticles.h
@@ -0,0 +1,382 @@
+#ifndef NSFDSOLVERWITHPARTICLES_H
+#define NSFDSOLVERWITHPARTICLES_H
+#include "MaxwellSolvers/FDTD.h"
+namespace ippl{
+    template <typename _scalar, class PLayout>
+    struct Bunch : public ippl::ParticleBase<PLayout> {
+        using scalar = _scalar;
+
+        // Constructor for the Bunch class, taking a PLayout reference
+        Bunch(PLayout& playout)
+            : ippl::ParticleBase<PLayout>(playout) {
+            // Add attributes to the particle bunch
+            this->addAttribute(Q);     // Charge attribute
+            this->addAttribute(mass);  // Mass attribute
+            this->addAttribute(
+                gamma_beta);  // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(R_np1);     // Position attribute for the next time step
+            this->addAttribute(R_nm1);     // Position attribute for the next time step
+            this->addAttribute(E_gather);  // Electric field attribute for particle gathering
+            this->addAttribute(B_gather);  // Magnedit field attribute for particle gathering
+        }
+
+        // Destructor for the Bunch class
+        ~Bunch() {}
+
+        // Define container types for various attributes
+        using charge_container_type   = ippl::ParticleAttrib<scalar>;
+        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector4_container_type  = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+
+        // Declare instances of the attribute containers
+        charge_container_type Q;             // Charge container
+        charge_container_type mass;          // Mass container
+        velocity_container_type gamma_beta;  // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_np1;  // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_nm1;  // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
+            E_gather;  // Electric field container for particle gathering
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
+            B_gather;  // Magnetic field container for particle gathering
+    };
+    template <typename T>
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(
+        const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos) {
+        // return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5},
+        // ippl::Vector<T, 3>{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing *
+        // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
+        // pos -= spacing * T(0.5);
+        ippl::Vector<T, 3> relpos  = pos - origin;
+        ippl::Vector<T, 3> gridpos = relpos / hr;
+        ippl::Vector<int, 3> ipos;
+        ipos = gridpos.template cast<int>();
+        ippl::Vector<T, 3> fracpos;
+        for (unsigned k = 0; k < 3; k++) {
+            fracpos[k] = gridpos[k] - (int)ipos[k];
+        }
+        // TODO: NGHOST!!!!!!!
+        ipos += ippl::Vector<int, 3>(1);
+        ret.first  = ipos;
+        ret.second = fracpos;
+        return ret;
+    }
+    template <typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                       const ippl::Vector<scalar, 3>& pos, const scalar value) {
+        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+        ipos -= ldom.first();
+        // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
+        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+            return;
+        }
+        assert(fracpos[0] >= 0.0f);
+        assert(fracpos[0] <= 1.0f);
+        assert(fracpos[1] >= 0.0f);
+        assert(fracpos[1] <= 1.0f);
+        assert(fracpos[2] >= 0.0f);
+        assert(fracpos[2] <= 1.0f);
+        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+        assert(one_minus_fracpos[0] >= 0.0f);
+        assert(one_minus_fracpos[0] <= 1.0f);
+        assert(one_minus_fracpos[1] >= 0.0f);
+        assert(one_minus_fracpos[1] <= 1.0f);
+        assert(one_minus_fracpos[2] >= 0.0f);
+        assert(one_minus_fracpos[2] <= 1.0f);
+        scalar accum = 0;
+
+        for (unsigned i = 0; i < 8; i++) {
+            scalar weight               = 1;
+            ippl::Vector<int, 3> ipos_l = ipos;
+            for (unsigned d = 0; d < 3; d++) {
+                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+                ipos_l[d] += !!(i & (1 << d));
+            }
+            assert_isreal(value);
+            assert_isreal(weight);
+            accum += weight;
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
+        }
+        assert(abs(accum - 1.0f) < 1e-6f);
+    }
+    template <typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                       const ippl::Vector<scalar, 3>& pos,
+                                       const ippl::Vector<scalar, 3>& value) {
+        // std::cout << "Value: " << value << "\n";
+        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+        ipos -= ldom.first();
+        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+            // std::cout << "out of bounds\n";
+            return;
+        }
+        assert(fracpos[0] >= 0.0f);
+        assert(fracpos[0] <= 1.0f);
+        assert(fracpos[1] >= 0.0f);
+        assert(fracpos[1] <= 1.0f);
+        assert(fracpos[2] >= 0.0f);
+        assert(fracpos[2] <= 1.0f);
+        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+        assert(one_minus_fracpos[0] >= 0.0f);
+        assert(one_minus_fracpos[0] <= 1.0f);
+        assert(one_minus_fracpos[1] >= 0.0f);
+        assert(one_minus_fracpos[1] <= 1.0f);
+        assert(one_minus_fracpos[2] >= 0.0f);
+        assert(one_minus_fracpos[2] <= 1.0f);
+        scalar accum = 0;
+
+        for (unsigned i = 0; i < 8; i++) {
+            scalar weight               = 1;
+            ippl::Vector<int, 3> ipos_l = ipos;
+            for (unsigned d = 0; d < 3; d++) {
+                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+                ipos_l[d] += !!(i & (1 << d));
+            }
+            assert_isreal(weight);
+            accum += weight;
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
+        }
+        assert(abs(accum - 1.0f) < 1e-6f);
+    }
+    template <typename view_type, typename scalar>
+    KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview,
+                                                  ippl::Vector<scalar, 3> hr,
+                                                  ippl::Vector<scalar, 3> origin,
+                                                  const ippl::Vector<scalar, 3>& from,
+                                                  const ippl::Vector<scalar, 3>& to,
+                                                  const scalar factor) {
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid =
+            gridCoordinatesOf(hr, origin, from);
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid =
+            gridCoordinatesOf(hr, origin, to);
+        // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1],
+        // from_grid.second[2]);
+        for (int d = 0; d < 3; d++) {
+            // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
+            //     std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " <<
+            //     from_grid.first << " " << to_grid.first << std::endl;
+            // }
+            // assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
+        }
+        // const uint32_t nghost = g.nghost();
+        // from_ipos += ippl::Vector<int, 3>(nghost);
+        // to_ipos += ippl::Vector<int, 3>(nghost);
+
+        if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1]
+            && from_grid.first[2] == to_grid.first[2]) {
+            scatterToGrid(ldom, Jview, hr, origin,
+                          ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
+                          ippl::Vector<scalar, 3>((to - from) * factor));
+
+            return;
+        }
+        ippl::Vector<scalar, 3> relay;
+        const int nghost                   = 1;
+        const ippl::Vector<scalar, 3> orig = origin;
+        using Kokkos::max;
+        using Kokkos::min;
+        for (unsigned int i = 0; i < 3; i++) {
+            relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                               + scalar(1.0) * hr[i] + orig[i],
+                           max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                                   + scalar(0.0) * hr[i] + orig[i],
+                               scalar(0.5) * (to[i] + from[i])));
+        }
+        scatterToGrid(ldom, Jview, hr, origin,
+                      ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((relay - from) * factor));
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((to - relay) * factor));
+    }
+    template <typename scalar, fdtd_bc boundary_conditions>
+    class NSFDSolverWithParticles {
+    public:
+        constexpr static unsigned dim = 3;
+        using vector_type             = ippl::Vector<scalar, 3>;
+        using vector4_type            = ippl::Vector<scalar, 4>;
+        using FourField =
+            ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>,
+                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using ThreeField =
+            ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>,
+                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using playout_type = ParticleSpatialLayout<scalar, 3>;
+        using bunch_type   = Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+        using Mesh_t       = ippl::UniformCartesian<scalar, dim>;
+        FieldLayout<dim>* layout_mp;
+        Mesh_t* mesh_mp;
+        playout_type playout;
+        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
+        FourField J;
+        ThreeField E;
+        ThreeField B;
+        NonStandardFDTDSolver<ThreeField, FourField, absorbing> field_solver;
+
+        ippl::Vector<uint32_t, 3> nr_global;
+        ippl::Vector<scalar, 3> hr_m;
+        size_t steps_taken;
+        NSFDSolverWithParticles(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles)
+            : layout_mp(&layout)
+            , mesh_mp(&mesch)
+            , playout(layout, mesch)
+            , particles(playout)
+            , J(mesch, layout)
+            , E(mesch, layout)
+            , B(mesch, layout)
+            , field_solver(J, E, B) {
+            particles.create(nparticles);
+            nr_global = ippl::Vector<uint32_t, 3>{
+                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
+                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
+                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)};
+            hr_m        = mesh_mp->getMeshSpacing();
+            steps_taken = 0;
+            // field_solver.setEMFields(E, B);
+        }
+        template <bool space_charge = false>
+        void scatterBunch() {
+            // ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
+            auto hr_m           = mesh_mp->getMeshSpacing();
+            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
+            assert_isreal(volume);
+            assert_isreal((scalar(1) / volume));
+            J                       = typename decltype(J)::value_type(0);
+            auto Jview              = J.getView();
+            auto qview              = particles.Q.getView();
+            auto rview              = particles.R.getView();
+            auto rm1view            = particles.R_nm1.getView();
+            auto orig               = mesh_mp->getOrigin();
+            auto hr                 = mesh_mp->getMeshSpacing();
+            auto dt                 = field_solver.dt;
+            bool sc                 = space_charge;
+            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
+            Kokkos::parallel_for(
+                particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                    vector_type pos  = rview(i);
+                    vector_type to   = rview(i);
+                    vector_type from = rm1view(i);
+                    if (sc) {
+                        scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
+                    }
+                    scatterLineToGrid(lDom, Jview, hr, orig, from, to,
+                                      scalar(qview(i)) / (volume * dt));
+                });
+            Kokkos::fence();
+            J.accumulateHalo();
+        }
+        template <typename callable>
+        void updateBunch(scalar time, callable external_field) {
+            Kokkos::fence();
+            auto gbview     = particles.gamma_beta.getView();
+            auto eview      = particles.E_gather.getView();
+            auto bview      = particles.B_gather.getView();
+            auto qview      = particles.Q.getView();
+            auto mview      = particles.mass.getView();
+            auto rview      = particles.R.getView();
+            auto rm1view    = particles.R_nm1.getView();
+            auto rp1view    = particles.R_np1.getView();
+            scalar bunch_dt = field_solver.dt / 3;
+            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
+            E.fillHalo();
+            B.fillHalo();
+            Kokkos::fence();
+            for (int bts = 0; bts < 3; bts++) {
+                particles.E_gather.gather(E, particles.R, /*offset = */ 0.0);
+                particles.B_gather.gather(B, particles.R, /*offset = */ 0.0);
+                Kokkos::fence();
+                Kokkos::parallel_for(
+                    particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                        const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
+                        ippl::Vector<scalar, 3> E_grid           = eview(i);
+                        ippl::Vector<scalar, 3> B_grid           = bview(i);
+                        // std::cout << "E_grid: " << E_grid << "\n";
+                        // std::cout << "B_grid: " << B_grid << "\n";
+                        ippl::Vector<scalar, 3> bunchpos = rview(i);
+                        Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb =
+                            external_field(bunchpos, time + bunch_dt * bts);
+
+                        ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{
+                            ippl::Vector<scalar, 3>(E_grid + external_eb.first),
+                            ippl::Vector<scalar, 3>(B_grid + external_eb.second)};
+
+                        const scalar charge = qview(i);
+                        const scalar mass   = mview(i);
+                        const ippl::Vector<scalar, 3> t1 =
+                            pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+                        const scalar alpha =
+                            charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
+                        const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
+                        const ippl::Vector<scalar, 3> t3 =
+                            t1
+                            + t2.cross(scalar(2) * alpha
+                                       * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1])))));
+                        const ippl::Vector<scalar, 3> ngammabeta =
+                            t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+
+                        rview(i) =
+                            rview(i)
+                            + bunch_dt * ngammabeta
+                                  / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
+                        gbview(i) = ngammabeta;
+                    });
+                Kokkos::fence();
+            }
+            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
+            size_t invalid_count = 0;
+            auto origo           = mesh_mp->getOrigin();
+            ippl::Vector<scalar, 3> extenz;  //
+            extenz[0] = nr_global[0] * hr_m[0];
+            extenz[1] = nr_global[1] * hr_m[1];
+            extenz[2] = nr_global[2] * hr_m[2];
+            Kokkos::parallel_reduce(
+                Kokkos::RangePolicy<
+                    typename playout_type::RegionLayout_t::view_type::execution_space>(
+                    0, particles.getLocalNum()),
+                KOKKOS_LAMBDA(size_t i, size_t & ref) {
+                    bool out_of_bounds             = false;
+                    ippl::Vector<scalar, dim> ppos = rview(i);
+                    for (size_t d = 0; d < dim; d++) {
+                        out_of_bounds |= (ppos[d] <= origo[d]);
+                        out_of_bounds |=
+                            (ppos[d] >= origo[d] + extenz[d]);  // Check against simulation domain
+                    }
+                    invalid(i) = out_of_bounds;
+                    ref += out_of_bounds;
+                },
+                invalid_count);
+            particles.destroy(invalid, invalid_count);
+            Kokkos::fence();
+            playout.update(particles);
+        }
+        void solve() {
+            scatterBunch();
+            field_solver.solve();
+            updateBunch(
+                field_solver.dt * steps_taken,
+                /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) {
+                    return Kokkos::pair<vector_type, vector_type>{vector_type(0), vector_type(0)};
+                });
+            ++steps_taken;
+        }
+        template <typename callable>
+        void solve(callable external_field) {
+            scatterBunch();
+            field_solver.solve();
+            // std::cout << field_solver.dt * steps_taken << "\n";
+            updateBunch(field_solver.dt * steps_taken, external_field);
+            ++steps_taken;
+        }
+    };
+}
+#endif
\ No newline at end of file
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
index f9fb5fa5d..018758db4 100644
--- a/src/MaxwellSolvers/FDTD.h
+++ b/src/MaxwellSolvers/FDTD.h
@@ -398,383 +398,6 @@ namespace ippl {
             A_np1 = 0.0;
         };
     };
-
-    template <typename _scalar, class PLayout>
-    struct Bunch : public ippl::ParticleBase<PLayout> {
-        using scalar = _scalar;
-
-        // Constructor for the Bunch class, taking a PLayout reference
-        Bunch(PLayout& playout)
-            : ippl::ParticleBase<PLayout>(playout) {
-            // Add attributes to the particle bunch
-            this->addAttribute(Q);     // Charge attribute
-            this->addAttribute(mass);  // Mass attribute
-            this->addAttribute(
-                gamma_beta);  // Gamma-beta attribute (product of relativistiv gamma and beta)
-            this->addAttribute(R_np1);     // Position attribute for the next time step
-            this->addAttribute(R_nm1);     // Position attribute for the next time step
-            this->addAttribute(E_gather);  // Electric field attribute for particle gathering
-            this->addAttribute(B_gather);  // Magnedit field attribute for particle gathering
-        }
-
-        // Destructor for the Bunch class
-        ~Bunch() {}
-
-        // Define container types for various attributes
-        using charge_container_type   = ippl::ParticleAttrib<scalar>;
-        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
-        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
-        using vector4_container_type  = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
-
-        // Declare instances of the attribute containers
-        charge_container_type Q;             // Charge container
-        charge_container_type mass;          // Mass container
-        velocity_container_type gamma_beta;  // Gamma-beta container
-        typename ippl::ParticleBase<PLayout>::particle_position_type
-            R_np1;  // Position container for the next time step
-        typename ippl::ParticleBase<PLayout>::particle_position_type
-            R_nm1;  // Position container for the previous time step
-        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
-            E_gather;  // Electric field container for particle gathering
-        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
-            B_gather;  // Magnetio field container for particle gathering
-    };
-    template <typename T>
-    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(
-        const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos) {
-        // return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5},
-        // ippl::Vector<T, 3>{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing *
-        // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
-        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
-        // pos -= spacing * T(0.5);
-        ippl::Vector<T, 3> relpos  = pos - origin;
-        ippl::Vector<T, 3> gridpos = relpos / hr;
-        ippl::Vector<int, 3> ipos;
-        ipos = gridpos.template cast<int>();
-        ippl::Vector<T, 3> fracpos;
-        for (unsigned k = 0; k < 3; k++) {
-            fracpos[k] = gridpos[k] - (int)ipos[k];
-        }
-        // TODO: NGHOST!!!!!!!
-        ipos += ippl::Vector<int, 3>(1);
-        ret.first  = ipos;
-        ret.second = fracpos;
-        return ret;
-    }
-    template <typename view_type, typename scalar>
-    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
-                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
-                                       const ippl::Vector<scalar, 3>& pos, const scalar value) {
-        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
-        ipos -= ldom.first();
-        // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
-        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
-            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
-            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
-            return;
-        }
-        assert(fracpos[0] >= 0.0f);
-        assert(fracpos[0] <= 1.0f);
-        assert(fracpos[1] >= 0.0f);
-        assert(fracpos[1] <= 1.0f);
-        assert(fracpos[2] >= 0.0f);
-        assert(fracpos[2] <= 1.0f);
-        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
-        assert(one_minus_fracpos[0] >= 0.0f);
-        assert(one_minus_fracpos[0] <= 1.0f);
-        assert(one_minus_fracpos[1] >= 0.0f);
-        assert(one_minus_fracpos[1] <= 1.0f);
-        assert(one_minus_fracpos[2] >= 0.0f);
-        assert(one_minus_fracpos[2] <= 1.0f);
-        scalar accum = 0;
-
-        for (unsigned i = 0; i < 8; i++) {
-            scalar weight               = 1;
-            ippl::Vector<int, 3> ipos_l = ipos;
-            for (unsigned d = 0; d < 3; d++) {
-                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
-                ipos_l[d] += !!(i & (1 << d));
-            }
-            assert_isreal(value);
-            assert_isreal(weight);
-            accum += weight;
-            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
-        }
-        assert(abs(accum - 1.0f) < 1e-6f);
-    }
-    template <typename view_type, typename scalar>
-    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
-                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
-                                       const ippl::Vector<scalar, 3>& pos,
-                                       const ippl::Vector<scalar, 3>& value) {
-        // std::cout << "Value: " << value << "\n";
-        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
-        ipos -= ldom.first();
-        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
-            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
-            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
-            // std::cout << "out of bounds\n";
-            return;
-        }
-        assert(fracpos[0] >= 0.0f);
-        assert(fracpos[0] <= 1.0f);
-        assert(fracpos[1] >= 0.0f);
-        assert(fracpos[1] <= 1.0f);
-        assert(fracpos[2] >= 0.0f);
-        assert(fracpos[2] <= 1.0f);
-        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
-        assert(one_minus_fracpos[0] >= 0.0f);
-        assert(one_minus_fracpos[0] <= 1.0f);
-        assert(one_minus_fracpos[1] >= 0.0f);
-        assert(one_minus_fracpos[1] <= 1.0f);
-        assert(one_minus_fracpos[2] >= 0.0f);
-        assert(one_minus_fracpos[2] <= 1.0f);
-        scalar accum = 0;
-
-        for (unsigned i = 0; i < 8; i++) {
-            scalar weight               = 1;
-            ippl::Vector<int, 3> ipos_l = ipos;
-            for (unsigned d = 0; d < 3; d++) {
-                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
-                ipos_l[d] += !!(i & (1 << d));
-            }
-            assert_isreal(weight);
-            accum += weight;
-            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
-            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
-            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
-        }
-        assert(abs(accum - 1.0f) < 1e-6f);
-    }
-    template <typename view_type, typename scalar>
-    KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview,
-                                                  ippl::Vector<scalar, 3> hr,
-                                                  ippl::Vector<scalar, 3> origin,
-                                                  const ippl::Vector<scalar, 3>& from,
-                                                  const ippl::Vector<scalar, 3>& to,
-                                                  const scalar factor) {
-        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid =
-            gridCoordinatesOf(hr, origin, from);
-        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid =
-            gridCoordinatesOf(hr, origin, to);
-        // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1],
-        // from_grid.second[2]);
-        for (int d = 0; d < 3; d++) {
-            // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
-            //     std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " <<
-            //     from_grid.first << " " << to_grid.first << std::endl;
-            // }
-            // assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
-        }
-        // const uint32_t nghost = g.nghost();
-        // from_ipos += ippl::Vector<int, 3>(nghost);
-        // to_ipos += ippl::Vector<int, 3>(nghost);
-
-        if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1]
-            && from_grid.first[2] == to_grid.first[2]) {
-            scatterToGrid(ldom, Jview, hr, origin,
-                          ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
-                          ippl::Vector<scalar, 3>((to - from) * factor));
-
-            return;
-        }
-        ippl::Vector<scalar, 3> relay;
-        const int nghost                   = 1;
-        const ippl::Vector<scalar, 3> orig = origin;
-        using Kokkos::max;
-        using Kokkos::min;
-        for (unsigned int i = 0; i < 3; i++) {
-            relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
-                               + scalar(1.0) * hr[i] + orig[i],
-                           max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
-                                   + scalar(0.0) * hr[i] + orig[i],
-                               scalar(0.5) * (to[i] + from[i])));
-        }
-        scatterToGrid(ldom, Jview, hr, origin,
-                      ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)),
-                      ippl::Vector<scalar, 3>((relay - from) * factor));
-        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5)),
-                      ippl::Vector<scalar, 3>((to - relay) * factor));
-    }
-    template <typename scalar, fdtd_bc boundary_conditions>
-    class NSFDSolverWithParticles {
-    public:
-        constexpr static unsigned dim = 3;
-        using vector_type             = ippl::Vector<scalar, 3>;
-        using vector4_type            = ippl::Vector<scalar, 4>;
-        using FourField =
-            ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>,
-                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        using ThreeField =
-            ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>,
-                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        using playout_type = ParticleSpatialLayout<scalar, 3>;
-        using bunch_type   = Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
-        using Mesh_t       = ippl::UniformCartesian<scalar, dim>;
-        FieldLayout<dim>* layout_mp;
-        Mesh_t* mesh_mp;
-        playout_type playout;
-        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
-        FourField J;
-        ThreeField E;
-        ThreeField B;
-        NonStandardFDTDSolver<ThreeField, FourField, absorbing> field_solver;
-
-        ippl::Vector<uint32_t, 3> nr_global;
-        ippl::Vector<scalar, 3> hr_m;
-        size_t steps_taken;
-        NSFDSolverWithParticles(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles)
-            : layout_mp(&layout)
-            , mesh_mp(&mesch)
-            , playout(layout, mesch)
-            , particles(playout)
-            , J(mesch, layout)
-            , E(mesch, layout)
-            , B(mesch, layout)
-            , field_solver(J, E, B) {
-            particles.create(nparticles);
-            nr_global = ippl::Vector<uint32_t, 3>{
-                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
-                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
-                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)};
-            hr_m        = mesh_mp->getMeshSpacing();
-            steps_taken = 0;
-            // field_solver.setEMFields(E, B);
-        }
-        template <bool space_charge = false>
-        void scatterBunch() {
-            // ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
-            auto hr_m           = mesh_mp->getMeshSpacing();
-            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
-            assert_isreal(volume);
-            assert_isreal((scalar(1) / volume));
-            J                       = typename decltype(J)::value_type(0);
-            auto Jview              = J.getView();
-            auto qview              = particles.Q.getView();
-            auto rview              = particles.R.getView();
-            auto rm1view            = particles.R_nm1.getView();
-            auto orig               = mesh_mp->getOrigin();
-            auto hr                 = mesh_mp->getMeshSpacing();
-            auto dt                 = field_solver.dt;
-            bool sc                 = space_charge;
-            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
-            Kokkos::parallel_for(
-                particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
-                    vector_type pos  = rview(i);
-                    vector_type to   = rview(i);
-                    vector_type from = rm1view(i);
-                    if (sc) {
-                        scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
-                    }
-                    scatterLineToGrid(lDom, Jview, hr, orig, from, to,
-                                      scalar(qview(i)) / (volume * dt));
-                });
-            Kokkos::fence();
-            J.accumulateHalo();
-        }
-        template <typename callable>
-        void updateBunch(scalar time, callable external_field) {
-            Kokkos::fence();
-            auto gbview     = particles.gamma_beta.getView();
-            auto eview      = particles.E_gather.getView();
-            auto bview      = particles.B_gather.getView();
-            auto qview      = particles.Q.getView();
-            auto mview      = particles.mass.getView();
-            auto rview      = particles.R.getView();
-            auto rm1view    = particles.R_nm1.getView();
-            auto rp1view    = particles.R_np1.getView();
-            scalar bunch_dt = field_solver.dt / 3;
-            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
-            E.fillHalo();
-            B.fillHalo();
-            Kokkos::fence();
-            for (int bts = 0; bts < 3; bts++) {
-                particles.E_gather.gather(E, particles.R, /*offset = */ 0.0);
-                particles.B_gather.gather(B, particles.R, /*offset = */ 0.0);
-                Kokkos::fence();
-                Kokkos::parallel_for(
-                    particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
-                        const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
-                        ippl::Vector<scalar, 3> E_grid           = eview(i);
-                        ippl::Vector<scalar, 3> B_grid           = bview(i);
-                        // std::cout << "E_grid: " << E_grid << "\n";
-                        // std::cout << "B_grid: " << B_grid << "\n";
-                        ippl::Vector<scalar, 3> bunchpos = rview(i);
-                        Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb =
-                            external_field(bunchpos, time + bunch_dt * bts);
-
-                        ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{
-                            ippl::Vector<scalar, 3>(E_grid + external_eb.first),
-                            ippl::Vector<scalar, 3>(B_grid + external_eb.second)};
-
-                        const scalar charge = qview(i);
-                        const scalar mass   = mview(i);
-                        const ippl::Vector<scalar, 3> t1 =
-                            pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-                        const scalar alpha =
-                            charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
-                        const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
-                        const ippl::Vector<scalar, 3> t3 =
-                            t1
-                            + t2.cross(scalar(2) * alpha
-                                       * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1])))));
-                        const ippl::Vector<scalar, 3> ngammabeta =
-                            t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
-
-                        rview(i) =
-                            rview(i)
-                            + bunch_dt * ngammabeta
-                                  / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
-                        gbview(i) = ngammabeta;
-                    });
-                Kokkos::fence();
-            }
-            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
-            size_t invalid_count = 0;
-            auto origo           = mesh_mp->getOrigin();
-            ippl::Vector<scalar, 3> extenz;  //
-            extenz[0] = nr_global[0] * hr_m[0];
-            extenz[1] = nr_global[1] * hr_m[1];
-            extenz[2] = nr_global[2] * hr_m[2];
-            Kokkos::parallel_reduce(
-                Kokkos::RangePolicy<
-                    typename playout_type::RegionLayout_t::view_type::execution_space>(
-                    0, particles.getLocalNum()),
-                KOKKOS_LAMBDA(size_t i, size_t & ref) {
-                    bool out_of_bounds             = false;
-                    ippl::Vector<scalar, dim> ppos = rview(i);
-                    for (size_t d = 0; d < dim; d++) {
-                        out_of_bounds |= (ppos[d] <= origo[d]);
-                        out_of_bounds |=
-                            (ppos[d] >= origo[d] + extenz[d]);  // Check against simulation domain
-                    }
-                    invalid(i) = out_of_bounds;
-                    ref += out_of_bounds;
-                },
-                invalid_count);
-            particles.destroy(invalid, invalid_count);
-            Kokkos::fence();
-            playout.update(particles);
-        }
-        void solve() {
-            scatterBunch();
-            field_solver.solve();
-            updateBunch(
-                field_solver.dt * steps_taken,
-                /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) {
-                    return Kokkos::pair<vector_type, vector_type>{vector_type(0), vector_type(0)};
-                });
-            ++steps_taken;
-        }
-        template <typename callable>
-        void solve(callable external_field) {
-            scatterBunch();
-            field_solver.solve();
-            // std::cout << field_solver.dt * steps_taken << "\n";
-            updateBunch(field_solver.dt * steps_taken, external_field);
-            ++steps_taken;
-        }
-    };
 }  // namespace ippl
 
 #endif
\ No newline at end of file

From 3906349a7ac9133be1e74341f9d5e98145f568d5 Mon Sep 17 00:00:00 2001
From: Manuel <manuel.winkler@gmx.ch>
Date: Fri, 28 Jun 2024 13:25:45 +0200
Subject: [PATCH 32/32] Remove some dead code, move everything particle related
 into fel

---
 fel/CMakeLists.txt                            |  5 +-
 fel/FreeElectronLaser.cpp                     | 50 +++++++--------
 fel/NSFDSolverWithParticles.h                 | 63 ++++++++++---------
 .../TestFDTDSolverWithParticles.cpp           |  9 +--
 test/solver/CMakeLists.txt                    |  6 --
 5 files changed, 63 insertions(+), 70 deletions(-)
 rename {test/solver => fel}/TestFDTDSolverWithParticles.cpp (87%)

diff --git a/fel/CMakeLists.txt b/fel/CMakeLists.txt
index dd8898dd7..e986ce8b9 100644
--- a/fel/CMakeLists.txt
+++ b/fel/CMakeLists.txt
@@ -10,4 +10,7 @@ set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES})
 set (COMPILE_FLAGS ${OPAL_CXX_FLAGS})
 
 add_executable (FreeElectronLaser FreeElectronLaser.cpp)
-target_link_libraries (FreeElectronLaser ${IPPL_LIBS})
\ No newline at end of file
+target_link_libraries (FreeElectronLaser ${IPPL_LIBS})
+
+add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp)
+target_link_libraries (TestFDTDSolverWithParticles ${IPPL_LIBS})
\ No newline at end of file
diff --git a/fel/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp
index a33159c51..ce05c8c3e 100644
--- a/fel/FreeElectronLaser.cpp
+++ b/fel/FreeElectronLaser.cpp
@@ -813,7 +813,7 @@ void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma) {
         if (std::isnan(iterQ->rnp[2])) {
             std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_
                       << ", " << frame_beta << "\n";
-            std::cerr << __FILE__ << ": " << __LINE__ << "   OOOOOF\n";
+            std::cerr << __FILE__ << ": " << __LINE__ << " Particle has NaN velocity or position\n";
             abort();
         }
     }
@@ -822,25 +822,25 @@ void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma) {
 template <typename bunch_type, typename scalar>
 size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize<scalar>& bunchInit,
                                scalar frame_gamma) {
-    ChargeVector<scalar> oof;
-    initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0);
-    for (auto& c : oof) {
+    ChargeVector<scalar> temporary_charge_list;
+    initializeBunchEllipsoid(bunchInit, temporary_charge_list, 0, 1, 0);
+    for (auto& c : temporary_charge_list) {
         if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2]))
             std::cout << "Pos before boost: " << c.rnp << "\n";
         if (std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2]))
             std::cout << "Pos before boost: " << c.rnp << "\n";
     }
-    boost_bunch(oof, frame_gamma);
-    for (auto& c : oof) {
+    boost_bunch(temporary_charge_list, frame_gamma);
+    for (auto& c : temporary_charge_list) {
         if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) {
             std::cout << "Pos after boost: " << c.rnp << "\n";
             break;
         }
     }
-    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> positions("", oof.size());
-    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> gammabetas("", oof.size());
-    auto iterQ = oof.begin();
-    for (size_t i = 0; i < oof.size(); i++) {
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> positions("", temporary_charge_list.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> gammabetas("", temporary_charge_list.size());
+    auto iterQ = temporary_charge_list.begin();
+    for (size_t i = 0; i < temporary_charge_list.size(); i++) {
         assert_isreal(iterQ->gb[0]);
         assert_isreal(iterQ->gb[1]);
         assert_isreal(iterQ->gb[2]);
@@ -854,11 +854,11 @@ size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize<scalar>&
         gammabetas(i) = iterQ->gb;
         ++iterQ;
     }
-    if (oof.size() > bunch.getLocalNum()) {
-        bunch.create(oof.size() - bunch.getLocalNum());
+    if (temporary_charge_list.size() > bunch.getLocalNum()) {
+        bunch.create(temporary_charge_list.size() - bunch.getLocalNum());
     }
-    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions("", oof.size());
-    Kokkos::View<ippl::Vector<scalar, 3>*> dgammabetas("", oof.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions("", temporary_charge_list.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*> dgammabetas("", temporary_charge_list.size());
 
     Kokkos::deep_copy(dpositions, positions);
     Kokkos::deep_copy(dgammabetas, gammabetas);
@@ -868,14 +868,14 @@ size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize<scalar>&
          gbview = bunch.gamma_beta.getView();
     ;
     Kokkos::parallel_for(
-        oof.size(), KOKKOS_LAMBDA(size_t i) {
+        temporary_charge_list.size(), KOKKOS_LAMBDA(size_t i) {
             rview(i)   = dpositions(i);
             rm1view(i) = dpositions(i);
             gbview(i)  = dgammabetas(i);
         });
     Kokkos::fence();
 
-    return oof.size();
+    return temporary_charge_list.size();
 }
 
 // END LORENTZ FRAME AND UNDULATOR AND BUNCH
@@ -969,14 +969,13 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view
                                    ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
                                    const ippl::Vector<scalar, 3>& pos,
                                    const ippl::Vector<scalar, 3>& value, int nghost = 1) {
-    // std::cout << "Value: " << value << "\n";
     auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost);
     ipos -= ldom.first();
     if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
         || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
         || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
-        // std::cout << "out of bounds\n";
-        return;
+        
+        return; // Return in case of out of bounds
     }
     assert(fracpos[0] >= 0.0f);
     assert(fracpos[0] <= 1.0f);
@@ -1246,14 +1245,16 @@ KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v,
                                                                const ippl::Vector<T, Dim>& pos,
                                                                const ippl::Vector<T, 3>& origin,
                                                                const ippl::Vector<T, 3>& hr,
-                                                               const ippl::NDIndex<3>& lDom) {
+                                                               const ippl::NDIndex<3>& lDom,
+                                                               int nghost = 1) {
     using vector_type = ippl::Vector<T, 3>;
 
     vector_type l;
 
     for (unsigned k = 0; k < 3; k++) {
         l[k] = (pos[k] - origin[k]) / hr[k]
-               + 1.0;  // gather is implemented in a way such that this 1 is necessary here
+               + 1.0;  // gather is implemented in a way such that this 1 is necessary here. 
+                       // This is NOT the nghost, this needs to be a hardcoded 1.0
     }
 
     ippl::Vector<int, 3> index{int(l[0]), int(l[1]), int(l[2])};
@@ -1261,15 +1262,14 @@ KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v,
     ippl::Vector<T, 3> wlo(1.0);
     wlo -= whi;
 
-    // TODO: nghost
-    ippl::Vector<size_t, 3> args = index - lDom.first() + 1;
+    ippl::Vector<size_t, 3> args = index - lDom.first() + nghost;
     for (unsigned k = 0; k < 3; k++) {
         if (args[k] >= v.extent(k) || args[k] == 0) {
             return typename View::value_type(0);
         }
     }
-    return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo,
-                                                    whi, args) /*}*/;
+    return ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo,
+                                                    whi, args);
 }
 template <typename scalar>
 scalar test_gauss_law(uint32_t n) {
diff --git a/fel/NSFDSolverWithParticles.h b/fel/NSFDSolverWithParticles.h
index 06aa2f748..28f9668bc 100644
--- a/fel/NSFDSolverWithParticles.h
+++ b/fel/NSFDSolverWithParticles.h
@@ -44,33 +44,51 @@ namespace ippl{
     };
     template <typename T>
     KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(
-        const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos) {
-        // return pear<ippl::Vector<int, 3>, ippl::Vector<T, 3>>{ippl::Vector<int, 3>{5,5,5},
-        // ippl::Vector<T, 3>{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing *
-        // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]);
+        const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos, int nghost = 1) {
+
+        // Declare a pair to hold the resulting grid coordinates (integer part) and fractional part
         Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
-        // pos -= spacing * T(0.5);
-        ippl::Vector<T, 3> relpos  = pos - origin;
+
+        // Calculate the relative position of pos with respect to the origin
+        ippl::Vector<T, 3> relpos = pos - origin;
+
+        // Convert the relative position to grid coordinates by dividing by the grid spacing (hr)
         ippl::Vector<T, 3> gridpos = relpos / hr;
+
+        // Declare an integer vector to hold the integer part of the grid coordinates
         ippl::Vector<int, 3> ipos;
+
+        // Cast the grid position to an integer vector, which gives us the integer part of the coordinates
         ipos = gridpos.template cast<int>();
+
+        // Declare a vector to hold the fractional part of the grid coordinates
         ippl::Vector<T, 3> fracpos;
+
+        // Calculate the fractional part of the grid coordinates
         for (unsigned k = 0; k < 3; k++) {
-            fracpos[k] = gridpos[k] - (int)ipos[k];
+            fracpos[k] = gridpos[k] - static_cast<int>(ipos[k]);
         }
-        // TODO: NGHOST!!!!!!!
-        ipos += ippl::Vector<int, 3>(1);
-        ret.first  = ipos;
+
+        // Add the number of ghost cells to the integer part of the coordinates
+        ipos += ippl::Vector<int, 3>(nghost);
+
+        // Set the integer part of the coordinates in the return pair
+        ret.first = ipos;
+
+        // Set the fractional part of the coordinates in the return pair
         ret.second = fracpos;
+
+        // Return the pair containing both the integer and fractional parts of the grid coordinates
         return ret;
     }
+
     template <typename view_type, typename scalar>
     KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
                                        ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
                                        const ippl::Vector<scalar, 3>& pos, const scalar value) {
         auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
         ipos -= ldom.first();
-        // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n";
+        // std::cout << pos << " 's scatter args (will have 1, or nghost in general added): " << ipos << "\n";
         if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
             || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
             || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
@@ -110,13 +128,12 @@ namespace ippl{
                                        ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
                                        const ippl::Vector<scalar, 3>& pos,
                                        const ippl::Vector<scalar, 3>& value) {
-        // std::cout << "Value: " << value << "\n";
         auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
         ipos -= ldom.first();
         if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
             || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
             || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
-            // std::cout << "out of bounds\n";
+            // Out of bounds case (you'll do nothing)
             return;
         }
         assert(fracpos[0] >= 0.0f);
@@ -160,24 +177,12 @@ namespace ippl{
             gridCoordinatesOf(hr, origin, from);
         Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid =
             gridCoordinatesOf(hr, origin, to);
-        // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1],
-        // from_grid.second[2]);
-        for (int d = 0; d < 3; d++) {
-            // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){
-            //     std::cout <<abs(from_grid.first[d] - to_grid.first[d]) << " violation " <<
-            //     from_grid.first << " " << to_grid.first << std::endl;
-            // }
-            // assert(abs(from_grid.first[d] - to_grid.first[d]) <= 1);
-        }
-        // const uint32_t nghost = g.nghost();
-        // from_ipos += ippl::Vector<int, 3>(nghost);
-        // to_ipos += ippl::Vector<int, 3>(nghost);
 
         if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1]
             && from_grid.first[2] == to_grid.first[2]) {
             scatterToGrid(ldom, Jview, hr, origin,
-                          ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
-                          ippl::Vector<scalar, 3>((to - from) * factor));
+                          /*Scatter point, geometric average */         ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
+                          /*Scatter value, factor=charge / timestep */ ippl::Vector<scalar, 3>((to - from) * factor));
 
             return;
         }
@@ -242,11 +247,9 @@ namespace ippl{
                 uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)};
             hr_m        = mesh_mp->getMeshSpacing();
             steps_taken = 0;
-            // field_solver.setEMFields(E, B);
         }
         template <bool space_charge = false>
         void scatterBunch() {
-            // ippl::Vector<scalar, 3>* gammaBeta = this->gammaBeta;
             auto hr_m           = mesh_mp->getMeshSpacing();
             const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
             assert_isreal(volume);
@@ -300,8 +303,6 @@ namespace ippl{
                         const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
                         ippl::Vector<scalar, 3> E_grid           = eview(i);
                         ippl::Vector<scalar, 3> B_grid           = bview(i);
-                        // std::cout << "E_grid: " << E_grid << "\n";
-                        // std::cout << "B_grid: " << B_grid << "\n";
                         ippl::Vector<scalar, 3> bunchpos = rview(i);
                         Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb =
                             external_field(bunchpos, time + bunch_dt * bts);
diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/fel/TestFDTDSolverWithParticles.cpp
similarity index 87%
rename from test/solver/TestFDTDSolverWithParticles.cpp
rename to fel/TestFDTDSolverWithParticles.cpp
index d5d44a174..33c516918 100644
--- a/test/solver/TestFDTDSolverWithParticles.cpp
+++ b/fel/TestFDTDSolverWithParticles.cpp
@@ -4,10 +4,10 @@ using std::size_t;
 #include "Ippl.h"
 #include "Types/Vector.h"
 #include "Field/Field.h"
-#include "MaxwellSolvers/FDTD.h"
+#include "NSFDSolverWithParticles.h"
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.h>
-#include "../alpine/units.h"
+#include "units.h"
 #include <Kokkos_Random.hpp>
 template<typename scalar1, typename... scalar>
     requires((std::is_floating_point_v<scalar1>))
@@ -29,11 +29,6 @@ int main(int argc, char* argv[]){
     ippl::initialize(argc, argv);
     {
         using scalar = float;
-        //const unsigned dim = 3;
-        //using vector_type = ippl::Vector<scalar, 3>;
-        //using vector4_type = ippl::Vector<scalar, 4>;
-        //using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
-        //using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
         constexpr size_t n = 100;
         ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
         ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 7bf9f853b..dda292fff 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -31,12 +31,6 @@ target_link_libraries (
     ${IPPL_LIBS}
     ${MPI_CXX_LIBRARIES}
 )
-add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp)
-target_link_libraries (
-    TestFDTDSolverWithParticles
-    ${IPPL_LIBS}
-    ${MPI_CXX_LIBRARIES}
-)
 
 if (ENABLE_FFT)
     add_executable (TestGaussian_convergence TestGaussian_convergence.cpp)