From b5c8668de0c94b99cc5cbfad1ad1757274211f1f Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 13 Apr 2024 14:45:52 +0200 Subject: [PATCH 01/32] FEL-IR-a works --- CMakeLists.txt | 114 +- src/Expression/IpplOperations.h | 2 +- src/MaxwellSolvers/CMakeLists.txt | 1 + src/MaxwellSolvers/FDTD.h | 51 + src/Particle/ParticleAttrib.hpp | 24 +- src/Types/Vector.h | 306 +++-- test/solver/CMakeLists.txt | 6 + test/solver/TestFDTDSolver.cpp | 1774 +++++++++++++++++++++++++++++ 8 files changed, 2169 insertions(+), 109 deletions(-) create mode 100644 src/MaxwellSolvers/FDTD.h create mode 100644 test/solver/TestFDTDSolver.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9483cc6cf..46a2ac866 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,19 +1,33 @@ cmake_minimum_required (VERSION 3.20.3) project (IPPL CXX) set (IPPL_VERSION_MAJOR 3) -set (IPPL_VERSION_MINOR 0.1) - +set (IPPL_VERSION_MINOR 0.2) +include(FetchContent) set (IPPL_VERSION_NAME "V${IPPL_VERSION_MAJOR}.${IPPL_VERSION_MINOR}") - +option(DOWNLOAD_EXTRACT_TIMESTAMP ON FORCE) set (IPPL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +if(NOT WIN32) + string(ASCII 27 Esc) + set(ColourReset "${Esc}[m") + set(Red "${Esc}[31m") + set(Green "${Esc}[32m") + set(Yellow "${Esc}[1;33m") +endif() + if (NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) + set (CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE - ) -endif () - + ) + message(STATUS "${Yellow}CMAKE_BUILD_TYPE not set, defaulting to ${CMAKE_BUILD_TYPE} ${ColourReset}") +else () + message (STATUS "${Green}Build type is: ${CMAKE_BUILD_TYPE}${ColourReset}") +endif() +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() if (${CMAKE_BUILD_TYPE} STREQUAL "Release") add_definitions (-DNOPAssert) endif () @@ -21,27 +35,85 @@ endif () add_compile_options (-Wall) add_compile_options (-Wunused) add_compile_options (-Wextra) -add_compile_options (-Werror) +#add_compile_options (-Werror) # allow deprecated functions add_compile_options (-Wno-deprecated-declarations) - +file( + DOWNLOAD + https://github.com/nlohmann/json/releases/download/v3.11.3/json.hpp + "${CMAKE_BINARY_DIR}/downloaded_headers/json.hpp" +) +file( + DOWNLOAD + https://raw.githubusercontent.com/nothings/stb/master/stb_image_write.h + "${CMAKE_BINARY_DIR}/downloaded_headers/stb_image_write.hpp" +) +include_directories("${CMAKE_BINARY_DIR}/downloaded_headers/") option (USE_STATIC_LIBRARIES "Link with static libraries if available" ON) -message (STATUS "Build type is: " ${CMAKE_BUILD_TYPE}) +if(NOT IPPL_PLATFORMS) + message(STATUS "${Yellow}IPPL_PLATFORMS flag is not set, defaulting to serial backend${ColourReset}") + set(IPPL_PLATFORMS "SERIAL") +endif() -find_package(Kokkos 4.0.0 REQUIRED) +if(IPPL_PLATFORMS) + #message(STATUS "IPPL_PLATFORMS flag is set to: ${IPPL_PLATFORMS}") + string(TOUPPER "${IPPL_PLATFORMS}" IPPL_PLATFORMS) + # Perform case distinction + if("${IPPL_PLATFORMS}" STREQUAL "CUDA") + message(STATUS "${Green}CUDA backend enabled${ColourReset}") + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Enable Kokkos CUDA Backend") + set(Heffte_ENABLE_CUDA ON CACHE BOOL "Set Heffte CUDA Backend") + elseif("${IPPL_PLATFORMS}" STREQUAL "OPENMP") + message(STATUS "${Green}OpenMP backend enabled${ColourReset}") + if(NOT Heffte_ENABLE_FFTW) + message(STATUS "${Yellow}Because Heffte_ENABLE_FFTW is not set defaulting to Stock with AVX${ColourReset}") + set(Heffte_ENABLE_AVX2 ON CACHE BOOL "Enable Heffte Stock backend lol") + else() + message(STATUS "${Green}Enabled FFTW${ColourReset}") + endif() + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Enable Kokkos OpenMP Backend") + elseif("${IPPL_PLATFORMS}" STREQUAL "SERIAL") + + message(STATUS "${Green}Serial backend enabled${ColourReset}") + if(NOT Heffte_ENABLE_FFTW) + message(STATUS "${Yellow}Because Heffte_ENABLE_FFTW is not set defaulting to Stock with AVX${ColourReset}") + set(Heffte_ENABLE_AVX2 ON CACHE BOOL "Enable Heffte Stock backend lol") + endif() + + elseif("${IPPL_PLATFORMS}" STREQUAL "CUDA;OPENMP" OR "${IPPL_PLATFORMS}" STREQUAL "OPENMP;CUDA") + message(STATUS "${Green}CUDA and OpenMP backend enabled${ColourReset}") + set(Heffte_ENABLE_CUDA ON CACHE BOOL "Set Heffte CUDA Backend") + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Enable Kokkos OpenMP Backend") + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Enable Kokkos CUDA Backend") + else() + message(FATAL_ERROR "Unknown or unsupported platform: ${IPPL_PLATFORMS}") + # Handle unknown or unsupported platform + endif() +endif() +find_package(Kokkos 4.0.0 QUIET) +if(NOT Kokkos_FOUND) + FetchContent_Declare( + #DOWNLOAD_EXTRACT_TIMESTAMP + kokko + URL https://github.com/kokkos/kokkos/archive/refs/tags/4.1.00.tar.gz + ) + FetchContent_MakeAvailable(kokko) +endif() # Select flags. set (CMAKE_CXX_STANDARD 20) +set (CMAKE_CUDA_EXTENSIONS OFF) +set (CMAKE_CXX_EXTENSIONS OFF) set (CMAKE_CXX_STANDARD_REQUIRED ON) -set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g ") -set (CMAKE_CXX_FLAGS_RELEASE "-O3") -set (CMAKE_CXX_FLAGS_DEBUG "-O0 -g") +set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINGO} -O3 -g ") +set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") +#set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") # Use sanitizer flags for GCC or Clang only - set (CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address,undefined") + set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address,undefined") endif () if ("${Kokkos_DEVICES}" MATCHES "CUDA") @@ -73,7 +145,16 @@ endif() option (ENABLE_FFT "Enable FFT transform" OFF) if (ENABLE_FFT) add_definitions (-DENABLE_FFT) - find_package (Heffte 2.2.0 REQUIRED) + find_package (Heffte 2.2.0 QUIET) + if(NOT Heffte_FOUND) + message(STATUS "Using FetchContent for Heffte") + FetchContent_Declare( + Heffte + #DOWNLOAD_EXTRACT_TIMESTAMP + GIT_REPOSITORY https://github.com/icl-utk-edu/heffte + ) + FetchContent_MakeAvailable(Heffte) + endif() message (STATUS "Found Heffte_DIR: ${Heffte_DIR}") endif () @@ -100,6 +181,9 @@ endif () option (ENABLE_UNIT_TESTS "Enable unit tests" OFF) if (ENABLE_UNIT_TESTS) + if (NOT ENABLE_FFT) + message(WARNING "Enabling unit tests even though FFT not enabled (-DENABLE_FFT=ON)!") + endif () include(FetchContent) include(GoogleTest) diff --git a/src/Expression/IpplOperations.h b/src/Expression/IpplOperations.h index dad442901..198f20b44 100644 --- a/src/Expression/IpplOperations.h +++ b/src/Expression/IpplOperations.h @@ -282,7 +282,7 @@ namespace ippl { */ template KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const { - return dot(u_m(args...), v_m(args...)).apply(); + return dot(u_m(args...), v_m(args...)); } private: diff --git a/src/MaxwellSolvers/CMakeLists.txt b/src/MaxwellSolvers/CMakeLists.txt index 62d93dd36..d961415ea 100644 --- a/src/MaxwellSolvers/CMakeLists.txt +++ b/src/MaxwellSolvers/CMakeLists.txt @@ -3,6 +3,7 @@ set (_SRCS set (_HDRS Maxwell.h + FDTD.h ) include_DIRECTORIES ( diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h new file mode 100644 index 000000000..f4a1d5b9a --- /dev/null +++ b/src/MaxwellSolvers/FDTD.h @@ -0,0 +1,51 @@ +#ifndef IPPL_FDTD_H +#define IPPL_FDTD_H +#include "Types/Vector.h" + +#include "Field/Field.h" + +#include "FieldLayout/FieldLayout.h" +#include "Meshes/UniformCartesian.h" +constexpr double sqrt_4pi = 3.54490770181103205459; +constexpr double alpha_scaling_factor = 1e30; +constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor; +constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi; +constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor; +constexpr double electron_mass_in_kg = 9.1093837015e-31; +constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor; +constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); +constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg; +constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters; +constexpr double electron_charge_in_coulombs = 1.602176634e-19; +constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs; + +constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges; +constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds; +constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses; +constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); + +constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges; +constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges; +constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges; +constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds; +constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes; +constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters; +constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters); +constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters); +constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters; +constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor); +constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts; +constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters); +constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes); +constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds); +constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G; +constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons; + +namespace ippl { + //template + //struct FDTDFieldState{ + // + //}; +} + +#endif \ No newline at end of file diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 3b37f920a..9f22b72be 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -121,7 +121,7 @@ namespace ippl { const vector_type& dx = mesh.getMeshSpacing(); const vector_type& origin = mesh.getOrigin(); - const vector_type invdx = 1.0 / dx; + const vector_type invdx = vector_type(1.0) / dx; const FieldLayout& layout = f.getLayout(); const NDIndex& lDom = layout.getLocalNDIndex(); @@ -132,12 +132,12 @@ namespace ippl { "ParticleAttrib::scatter", policy_type(0, *(this->localNum_mp)), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + 0.5; - Vector index = l; - Vector whi = l - index; - Vector wlo = 1.0 - whi; + vector_type l = (pp(idx) - origin) * invdx + vector_type(1.0); // Terribile; + Vector index = l.template cast(); + Vector whi = l - index.template cast(); + Vector wlo = vector_type(1.0) - whi; - Vector args = index - lDom.first() + nghost; + Vector args = (index - lDom.first() + nghost).template cast(); // scatter const value_type& val = dview_m(idx); @@ -175,7 +175,7 @@ namespace ippl { const vector_type& dx = mesh.getMeshSpacing(); const vector_type& origin = mesh.getOrigin(); - const vector_type invdx = 1.0 / dx; + const vector_type invdx = vector_type(1.0) / dx; const FieldLayout& layout = f.getLayout(); const NDIndex& lDom = layout.getLocalNDIndex(); @@ -186,12 +186,12 @@ namespace ippl { "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + 0.5; - Vector index = l; - Vector whi = l - index; - Vector wlo = 1.0 - whi; + vector_type l = (pp(idx) - origin) * invdx + vector_type(1.0); //terribile + Vector index = l.template cast(); + Vector whi = l - index.template cast(); + Vector wlo = vector_type(1.0) - whi; - Vector args = index - lDom.first() + nghost; + Vector args = (index - lDom.first() + nghost).template cast(); // gather dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{}, diff --git a/src/Types/Vector.h b/src/Types/Vector.h index 4380fc8cf..56963f864 100644 --- a/src/Types/Vector.h +++ b/src/Types/Vector.h @@ -20,95 +20,239 @@ namespace ippl { * @tparam Dim vector dimension */ template - class Vector : public detail::Expression, sizeof(T) * Dim> { - public: - typedef T value_type; - static constexpr unsigned dim = Dim; + struct Vector { + using value_type = T; - KOKKOS_FUNCTION - Vector() - : Vector(value_type(0)) {} - - template ::type = true> - explicit KOKKOS_FUNCTION Vector(const Args&... args); - - template - KOKKOS_FUNCTION Vector(const detail::Expression& expr); - - KOKKOS_DEFAULTED_FUNCTION - Vector(const Vector& v) = default; - - KOKKOS_FUNCTION - Vector(const T& val); - - - Vector(const std::array& a); - - Vector(const std::array, Dim>& a); - - /*! - * @param list of values - */ - KOKKOS_FUNCTION - Vector(const std::initializer_list& list); - - KOKKOS_FUNCTION - ~Vector() {} - - // Get and Set Operations - KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i); - - KOKKOS_INLINE_FUNCTION value_type operator[](unsigned int i) const; - - KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i); - - KOKKOS_INLINE_FUNCTION value_type operator()(unsigned int i) const; - - // Assignment Operators - template - KOKKOS_INLINE_FUNCTION Vector& operator=(const detail::Expression& expr); - - template - KOKKOS_INLINE_FUNCTION Vector& operator+=(const detail::Expression& expr); - - template - KOKKOS_INLINE_FUNCTION Vector& operator-=(const detail::Expression& expr); - - template - KOKKOS_INLINE_FUNCTION Vector& operator*=(const detail::Expression& expr); - - template - KOKKOS_INLINE_FUNCTION Vector& operator/=(const detail::Expression& expr); - - KOKKOS_INLINE_FUNCTION Vector& operator+=(const T& val); - - KOKKOS_INLINE_FUNCTION Vector& operator-=(const T& val); - - KOKKOS_INLINE_FUNCTION Vector& operator*=(const T& val); - - KOKKOS_INLINE_FUNCTION Vector& operator/=(const T& val); + constexpr static unsigned dim = Dim; + T data[Dim]; + KOKKOS_INLINE_FUNCTION constexpr Vector(const std::initializer_list& list) { + // PAssert(list.size() == Dim); + unsigned int i = 0; + for (auto& l : list) { + data[i] = l; + ++i; + } + } + Vector() = default; + constexpr KOKKOS_INLINE_FUNCTION Vector(T v) { fill(v); } + KOKKOS_INLINE_FUNCTION value_type dot(const Vector& v) const noexcept { + value_type ret = 0; + for (unsigned i = 0; i < dim; i++) { + ret += data[i] * v[i]; + } + return ret; + } + template + KOKKOS_INLINE_FUNCTION Vector tail() const noexcept { + Vector ret; + static_assert(N <= Dim, "N must be smaller than Dim"); + constexpr unsigned diff = Dim - N; + for (unsigned i = 0; i < N; i++) { + ret[i] = (*this)[i + diff]; + } + return ret; + } + template + KOKKOS_INLINE_FUNCTION Vector head() const noexcept { + Vector ret; + static_assert(N <= Dim, "N must be smaller than Dim"); + for (unsigned i = 0; i < N; i++) { + ret[i] = (*this)[i]; + } + return ret; + } + KOKKOS_INLINE_FUNCTION value_type squaredNorm() const noexcept { return dot(*this); } + KOKKOS_INLINE_FUNCTION value_type norm() { +#ifndef __CUDA_ARCH__ + using std::sqrt; +#endif + return sqrt(squaredNorm()); + } + KOKKOS_INLINE_FUNCTION Vector normalized() const noexcept { return *this / norm(); } + KOKKOS_INLINE_FUNCTION value_type sum() const noexcept { + value_type ret = 0; + for (unsigned i = 0; i < dim; i++) { + ret += data[i]; + } + return ret; + } + KOKKOS_INLINE_FUNCTION value_type average() const noexcept { + value_type ret = 0; + for (unsigned i = 0; i < dim; i++) { + ret += data[i]; + } + return ret / dim; + } + KOKKOS_INLINE_FUNCTION Vector cross(const Vector& v) const noexcept + requires(Dim == 3) + { + Vector ret(0); + ret[0] = data[1] * v[2] - data[2] * v[1]; + ret[1] = data[2] * v[0] - data[0] * v[2]; + ret[2] = data[0] * v[1] - data[1] * v[0]; + return ret; + } + KOKKOS_INLINE_FUNCTION bool operator==(const Vector& o) const noexcept { + for (unsigned i = 0; i < dim; i++) { + if (data[i] != o[i]) + return false; + } + return true; + } + KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i) noexcept { + assert(i < dim); + return data[i]; + } + KOKKOS_INLINE_FUNCTION T* begin() noexcept { return data; } + KOKKOS_INLINE_FUNCTION T* end() noexcept { return data + dim; } + KOKKOS_INLINE_FUNCTION const T* begin() const noexcept { return data; } + KOKKOS_INLINE_FUNCTION const T* end() const noexcept { return data + dim; } + KOKKOS_INLINE_FUNCTION constexpr void fill(value_type x) { + for (unsigned i = 0; i < dim; i++) { + data[i] = value_type(x); + } + } + KOKKOS_INLINE_FUNCTION const value_type& operator[](unsigned int i) const noexcept { + assert(i < dim); + return data[i]; + } - using iterator = T*; - using const_iterator = const T*; - KOKKOS_INLINE_FUNCTION constexpr iterator begin(); - KOKKOS_INLINE_FUNCTION constexpr iterator end(); - KOKKOS_INLINE_FUNCTION constexpr const_iterator begin() const; - KOKKOS_INLINE_FUNCTION constexpr const_iterator end() const; + KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i) noexcept { + assert(i < dim); + return data[i]; + } - KOKKOS_INLINE_FUNCTION T dot(const Vector& rhs) const; + KOKKOS_INLINE_FUNCTION const value_type& operator()(unsigned int i) const noexcept { + assert(i < dim); + return data[i]; + } + KOKKOS_INLINE_FUNCTION Vector operator-() const noexcept { + Vector ret; + for (unsigned i = 0; i < dim; i++) { + ret[i] = -(*this)[i]; + } + return ret; + } + KOKKOS_INLINE_FUNCTION Vector decompose(Vector* integral) { +#ifndef __CUDA_ARCH__ + using std::modf; +#endif + Vector ret; + for (unsigned i = 0; i < dim; i++) { + if constexpr (std::is_same_v) { + float tmp; + ret[i] = modff((*this)[i], &tmp); + (*integral)[i] = (int)tmp; + } else if constexpr (std::is_same_v) { + double tmp; + ret[i] = modf((*this)[i], &tmp); + (*integral)[i] = (int)tmp; + } + } + return ret; + } + template + constexpr KOKKOS_INLINE_FUNCTION Vector cast() const noexcept { + Vector ret; + for (unsigned i = 0; i < dim; i++) { + ret.data[i] = (O)(data[i]); + } + return ret; + } + KOKKOS_INLINE_FUNCTION Vector operator*(const value_type& o) const noexcept { + Vector ret; + for (unsigned i = 0; i < dim; i++) { + ret[i] = (*this)[i] * o; + } + return ret; + } +#define defop_kf(OP) \ + constexpr KOKKOS_INLINE_FUNCTION Vector operator OP(const Vector& o) const noexcept { \ + Vector ret; \ + for (unsigned i = 0; i < dim; i++) { \ + ret.data[i] = data[i] OP o.data[i]; \ + } \ + return ret; \ + } + defop_kf(+) + defop_kf(-) + defop_kf(*) + defop_kf(/) - //Needs to be public to be a standard-layout type - //private: - T data_m[Dim]; +#define def_aop_kf(OP) \ + KOKKOS_INLINE_FUNCTION Vector& operator OP(const Vector& o) noexcept { \ + for (unsigned i = 0; i < dim; i++) { \ + (*this)[i] OP o[i]; \ + } \ + return *this; \ + } + def_aop_kf(+=) + def_aop_kf(-=) + def_aop_kf(*=) + def_aop_kf(/=) + template + friend stream_t& operator<<(stream_t& str, const Vector& v) { + // tr << "{"; + for (unsigned i = 0; i < Dim; i++) { + str << v[i]; + if (i < Dim - 1) + str << ", "; + } + // str << "}"; + return str; + } + // defop_kf(*) + // defop_kf(/) }; - + + //template + //KOKKOS_INLINE_FUNCTION Vector min(const Vector& a, const Vector& b); + //template + //KOKKOS_INLINE_FUNCTION Vector max(const Vector& a, const Vector& b); template - KOKKOS_INLINE_FUNCTION Vector min(const Vector& a, const Vector& b); + KOKKOS_INLINE_FUNCTION Vector min(const Vector& a, const Vector& b){ + using Kokkos::min; + Vector ret; + for(unsigned d = 0; d < Dim;d++){ + ret[d] = min(a[d], b[d]); + } + return ret; + } template - KOKKOS_INLINE_FUNCTION Vector max(const Vector& a, const Vector& b); + KOKKOS_INLINE_FUNCTION Vector max(const Vector& a, const Vector& b){ + using Kokkos::max; + Vector ret; + for(unsigned d = 0; d < Dim;d++){ + ret[d] = max(a[d], b[d]); + } + return ret; + } + template + KOKKOS_INLINE_FUNCTION ippl::Vector operator *(const O &o, const ippl::Vector& v) noexcept { + ippl::Vector ret; + for (unsigned i = 0; i < N; i++) { + ret[i] = v[i] * o; + } + return ret; + } + template + KOKKOS_INLINE_FUNCTION Vector cross(const ippl::Vector& a, const ippl::Vector& b) { + ippl::Vector ret{0.0, 0.0, 0.0}; + ret[0] = a[1] * b[2] - a[2] * b[1]; + ret[1] = a[2] * b[0] - a[0] * b[2]; + ret[2] = a[0] * b[1] - a[1] * b[0]; + return ret; + } + template + KOKKOS_INLINE_FUNCTION T dot(const ippl::Vector& a, const ippl::Vector& b) { + T ret = 0; + for(unsigned k = 0;k < dim;k++){ + ret += a[k] * b[k]; + } + return ret; + } } // namespace ippl -#include "Vector.hpp" +//#include "Vector.hpp" #endif diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt index 358c475cf..dda292fff 100644 --- a/test/solver/CMakeLists.txt +++ b/test/solver/CMakeLists.txt @@ -25,6 +25,12 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestFDTDSolver TestFDTDSolver.cpp) +target_link_libraries ( + TestFDTDSolver + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) if (ENABLE_FFT) add_executable (TestGaussian_convergence TestGaussian_convergence.cpp) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp new file mode 100644 index 000000000..65446b03f --- /dev/null +++ b/test/solver/TestFDTDSolver.cpp @@ -0,0 +1,1774 @@ +#include "Ippl.h" + +#include "Types/Vector.h" + +#include "Field/Field.h" + +#include "MaxwellSolvers/FDTD.h" +#include +#include +#include +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include +//CONFIG +KOKKOS_INLINE_FUNCTION bool isNaN(float x){ + #ifdef __CUDA_ARCH__ + return isnan(x); + #else + return std::isnan(x); + #endif +} +KOKKOS_INLINE_FUNCTION bool isINF(float x){ + #ifdef __CUDA_ARCH__ + return isinf(x); + #else + return std::isinf(x); + #endif +} +KOKKOS_INLINE_FUNCTION bool isNaN(double x){ + #ifdef __CUDA_ARCH__ + return isnan(x); + #else + return std::isnan(x); + #endif +} +KOKKOS_INLINE_FUNCTION bool isINF(double x){ + #ifdef __CUDA_ARCH__ + return isinf(x); + #else + return std::isinf(x); + #endif +} +#define assert_isreal(X) assert(!isNaN(X) && !isINF(X)) + + + + + + +struct config { + using scalar = double; + + //using length_unit = funits::length; + //using duration_unit = funits::time; + // GRID PARAMETERS + ippl::Vector resolution; + + ippl::Vector extents; + scalar total_time; + scalar timestep_ratio; + + scalar length_scale_in_jobfile, temporal_scale_in_jobfile; + + // All in unit_charge, or unit_mass + scalar charge, mass; + + uint64_t num_particles; + bool space_charge; + + // BUNCH PARAMETERS + ippl::Vector mean_position; + ippl::Vector sigma_position; + ippl::Vector position_truncations; + ippl::Vector sigma_momentum; + scalar bunch_gamma; + + scalar undulator_K; + scalar undulator_period; + scalar undulator_length; + + uint32_t output_rhythm; + std::unordered_map experiment_options; +}; +template +ippl::Vector getVector(const nlohmann::json& j){ + if(j.is_array()){ + assert(j.size() == Dim); + ippl::Vector ret; + for(unsigned i = 0;i < Dim;i++) + ret[i] = (scalar)j[i]; + return ret; + } + else{ + std::cerr << "Warning: Obtaining Vector from scalar json\n"; + ippl::Vector ret; + ret.fill((scalar)j); + return ret; + } +} +template +struct DefaultedStringLiteral { + constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) { + std::copy_n(str, N, key); + } + + T value; + char key[N]; +}; +template +struct StringLiteral { + constexpr StringLiteral(const char (&str)[N]) { + std::copy_n(str, N, value); + } + + char value[N]; + constexpr DefaultedStringLiteral operator>>(int t)const noexcept{ + return DefaultedStringLiteral(value, t); + } + constexpr size_t size()const noexcept{return N - 1;} +}; +template +constexpr size_t chash(){ + size_t hash = 5381; + int c; + + for(size_t i = 0;i < lit.size();i++){ + c = lit.value[i]; + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +size_t chash(const char* val) { + size_t hash = 5381; + int c; + + while ((c = *val++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +size_t chash(const std::string& _val) { + size_t hash = 5381; + const char* val = _val.c_str(); + int c; + + while ((c = *val++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +std::string lowercase_singular(std::string str) { + // Convert string to lowercase + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + + // Check if the string ends with "s" and remove it if it does + if (!str.empty() && str.back() == 's') { + str.pop_back(); + } + + return str; +} +double get_time_multiplier(const nlohmann::json& j){ + std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]); + double time_factor = 1.0; + switch (chash(length_scale_string)) { + case chash<"planck-time">(): + case chash<"plancktime">(): + case chash<"pt">(): + case chash<"natural">(): + time_factor = unit_time_in_seconds; + break; + case chash<"picosecond">(): + time_factor = 1e-12; + break; + case chash<"nanosecond">(): + time_factor = 1e-9; + break; + case chash<"microsecond">(): + time_factor = 1e-6; + break; + case chash<"millisecond">(): + time_factor = 1e-3; + break; + case chash<"second">(): + time_factor = 1.0; + break; + default: + std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n"; + break; + } + return time_factor; +} +double get_length_multiplier(const nlohmann::json& options){ + std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]); + double length_factor = 1.0; + switch (chash(length_scale_string)) { + case chash<"planck-length">(): + case chash<"plancklength">(): + case chash<"pl">(): + case chash<"natural">(): + length_factor = unit_length_in_meters; + break; + case chash<"picometer">(): + length_factor = 1e-12; + break; + case chash<"nanometer">(): + length_factor = 1e-9; + break; + case chash<"micrometer">(): + length_factor = 1e-6; + break; + case chash<"millimeter">(): + length_factor = 1e-3; + break; + case chash<"meter">(): + length_factor = 1.0; + break; + default: + std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n"; + break; + } + return length_factor; +} +config read_config(const char *filepath){ + std::ifstream cfile(filepath); + nlohmann::json j; + cfile >> j; + config::scalar lmult = get_length_multiplier(j); + config::scalar tmult = get_time_multiplier(j); + config ret; + + ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters; + ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters; + ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters; + ret.resolution = getVector(j["mesh"]["resolution"]); + + //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n"; + //std::cerr << "Tmult: " << tmult << "\n"; + if(j.contains("timestep-ratio")){ + ret.timestep_ratio = (config::scalar)j["timestep-ratio"]; + } + + else{ + ret.timestep_ratio = 1; + } + ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds; + ret.space_charge = (bool)(j["mesh"]["space-charge"]); + ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]); + if(ret.bunch_gamma < config::scalar(1)){ + std::cerr << "Gamma must be >= 1\n"; + exit(1); + } + assert(j.contains("undulator")); + assert(j["undulator"].contains("static-undulator")); + + ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"]; + ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters; + ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters; + assert(!std::isnan(ret.undulator_length)); + assert(!std::isnan(ret.undulator_period)); + assert(!std::isnan(ret.extents[0])); + assert(!std::isnan(ret.extents[1])); + assert(!std::isnan(ret.extents[2])); + assert(!std::isnan(ret.total_time)); + ret.length_scale_in_jobfile = get_length_multiplier(j); + ret.temporal_scale_in_jobfile = get_time_multiplier(j); + ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges; + ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses; + ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"]; + ret.mean_position = getVector(j["bunch"]["position"]) * lmult / unit_length_in_meters; + ret.sigma_position = getVector(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters; + ret.position_truncations = getVector(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters; + ret.sigma_momentum = getVector(j["bunch"]["sigma-momentum"]); + ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0; + if(j.contains("experimentation")){ + nlohmann::json je = j["experimentation"]; + for(auto it = je.begin(); it!= je.end();it++){ + ret.experiment_options[it.key()] = double(it.value()); + } + } + return ret; +} +template +using FieldVector = ippl::Vector; +template +struct BunchInitialize { + + /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is + * manual the charge at points of the position vector will be produced. */ + // std::string bunchType_; + + /* Type of the distributions (transverse or longitudinal) in the bunch. */ + std::string distribution_; + + /* Type of the generator for creating the bunch distribution. */ + std::string generator_; + + /* Total number of macroparticles in the bunch. */ + unsigned int numberOfParticles_; + + /* Total charge of the bunch in pC. */ + scalar cloudCharge_; + + /* Initial energy of the bunch in MeV. */ + scalar initialGamma_; + + /* Initial normalized speed of the bunch. */ + scalar initialBeta_; + + /* Initial movement direction of the bunch, which is a unit vector. */ + FieldVector initialDirection_; + + /* Position of the center of the bunch in the unit of length scale. */ + // std::vector > position_; + FieldVector position_; + + /* Number of macroparticles in each direction for 3Dcrystal type. */ + FieldVector numbers_; + + /* Lattice constant in x, y, and z directions for 3D crystal type. */ + FieldVector latticeConstants_; + + /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal + * type, it will be the spread in position for each micro-bunch of the crystal. */ + FieldVector sigmaPosition_; + + /* Spread in energy in each direction. */ + FieldVector sigmaGammaBeta_; + + /* Store the truncation transverse distance for the electron generation. */ + scalar tranTrun_; + + /* Store the truncation longitudinal distance for the electron generation. */ + scalar longTrun_; + + /* Name of the file for reading the electrons distribution from. */ + std::string fileName_; + + /* The radiation wavelength corresponding to the bunch length outside the undulator */ + scalar lambda_; + + /* Bunching factor for the initialization of the bunch. */ + scalar bF_; + + /* Phase of the bunching factor for the initialization of the bunch. */ + scalar bFP_; + + /* Boolean flag determining the activation of shot-noise. */ + bool shotNoise_; + + /* Initial beta vector of the bunch, which is obtained as the product of beta and direction. */ + FieldVector betaVector_; + + /* Initialize the parameters for the bunch initialization to some first values. */ + // BunchInitialize (); +}; + + + + + +//END CONFIG + +//LORENTZ FRAME AND UNDULATOR +template +struct UniaxialLorentzframe{ + constexpr static T c = 1.0; + using scalar = T; + using Vector3 = ippl::Vector; + scalar beta_m; + scalar gammaBeta_m; + scalar gamma_m; + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){ + gammaBeta_m = gammaBeta; + beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta)); + gamma_m = sqrt(1 + gammaBeta * (gammaBeta)); + } + KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{ + arg[axis] = gamma_m * (arg[axis] + beta_m * time); + } + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB(const Kokkos::pair, ippl::Vector>& unprimedEB)const noexcept{ + + Kokkos::pair, ippl::Vector> ret; + ippl::Vector betavec{0, 0, beta_m}; + ret.first = ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); + ret.second = ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); + ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; + ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; + return ret; + } +}; +template +BunchInitialize generate_mithra_config(const config& cfg, const UniaxialLorentzframe& /*frame_boost unused*/) { + using vec3 = ippl::Vector; + scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K); + BunchInitialize init; + init.generator_ = "random"; + init.distribution_ = "uniform"; + init.initialDirection_ = vec3{0, 0, 1}; + init.initialGamma_ = cfg.bunch_gamma; + init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma); + init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast(); + init.sigmaPosition_ = cfg.sigma_position.template cast(); + + // TODO: Initial bunching factor huh + init.bF_ = 0.01; + init.bFP_ = 0; + init.shotNoise_ = false; + init.cloudCharge_ = cfg.charge; + init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma); + init.longTrun_ = cfg.position_truncations[2]; + init.tranTrun_ = cfg.position_truncations[0]; + init.position_ = cfg.mean_position.cast(); + init.betaVector_ = ippl::Vector{0,0,init.initialBeta_}; + init.numberOfParticles_ = cfg.num_particles; + + init.numbers_ = 0; // UNUSED + init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED + + return init; +} +template +struct Charge { + + Double q; /* Charge of the point in the unit of electron charge. */ + FieldVector rnp, rnm; /* Position vector of the charge. */ + FieldVector gb; /* Normalized velocity vector of the charge. */ + + /* Double flag determining if the particle is passing the entrance point of the undulator. This flag + * can be used for better boosting the bunch to the moving frame. We need to consider it to be double, + * because this flag needs to be communicated during bunch update. */ + Double e; + + // Charge(); +}; +template +using ChargeVector = std::list>; +template +void initializeBunchEllipsoid (BunchInitialize bunchInit, ChargeVector & chargeVector, int rank, int size, int ia){ + /* Correct the number of particles if it is not a multiple of four. */ + if ( bunchInit.numberOfParticles_ % 4 != 0 ) + { + unsigned int n = bunchInit.numberOfParticles_ % 4; + bunchInit.numberOfParticles_ += 4 - n; + //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") + + // std::string("It is corrected to ") + std::to_string(bunchInit.numberOfParticles_) ); + } + + /* Save the initially given number of particles. */ + unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size(); + + /* Declare the required parameters for the initialization of charge vectors. */ + Charge charge; charge.q = bunchInit.cloudCharge_ / Np; + FieldVector gb = bunchInit.initialGamma_ * bunchInit.betaVector_; + FieldVector r (0.0); + FieldVector t (0.0); + Double t0, g; + Double zmin = 1e100; + Double Ne, bF, bFi; + unsigned int bmi; + std::vector randomNumbers; + + /* The initialization in group of four particles should only be done if there exists an undulator in + * the interaction. */ + unsigned int ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4; + + /* Check the bunching factor. */ + if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 ) + { + //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") ); + //exit(1); + } + + /* If the generator is random we should make sure that different processors do not produce the same + * random numbers. */ + if ( bunchInit.generator_ == "random" ) + { + /* Initialize the random number generator. */ + srand ( time(NULL) ); + /* Np / ng * 20 is the maximum number of particles. */ + randomNumbers.resize( Np / ng * 20, 0.0); + for ( unsigned int ri = 0; ri < Np / ng * 20; ri++) + randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX)); + } + + /* Declare the generator function depending on the input. */ + auto generate = [&] (unsigned int n, unsigned int m) { + //if ( bunchInit.generator_ == "random" ) + return ( randomNumbers.at( n * 2 * Np/ng + m ) ); + //else + // return ( randomNumbers[ n * 2 * Np/ng + m ] ); + //TODO: Return halton properly + //return ( halton(n,m) ); + }; + + /* Declare the function for injecting the shot noise. */ + auto insertCharge = [&] (Charge q) { + + for ( unsigned int ii = 0; ii < ng; ii++ ) + { + /* The random modulation is introduced depending on the shot-noise being activated. */ + if ( bunchInit.shotNoise_ ) + { + /* Obtain the number of beamlet. */ + bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ ); + + /* Obtain the phase and amplitude of the modulation. */ + bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) ); + + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + + q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) ); + } + else if ( bunchInit.lambda_ != 0.0) + { + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + + q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 ); + } + + /* Set this charge into the charge vector. */ + chargeVector.push_back(q); + } + }; + + /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to + * calculate the FEL bucket number. */ + if ( bunchInit.shotNoise_ ) + { + for (i = 0; i < Np / ng; i++) + { + if ( bunchInit.distribution_ == "uniform" ) + zmin = std::min( Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin ); + else if ( bunchInit.distribution_ == "gaussian" ) + zmin = std::min( (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin ); + else + { + //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") ); + exit(1); + } + } + + if ( bunchInit.distribution_ == "uniform" ) + for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++) + { + t0 = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) ); + t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); + + zmin = std::min( t0 , zmin ); + } + + //zmin = zmin + bunchInit.position_[ia][2]; + zmin = zmin + bunchInit.position_[2]; + + /* Obtain the average number of electrons per FEL beamlet. */ + Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] ); + + /* Set the bunching factor level for the shot noise depending on the given values. */ + bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_; + + //printmessage(std::string(__FILE__), __LINE__, std::string("The standard deviation of the bunching factor for the shot noise implementation is set to ") + stringify(bF) ); + } + + /* Determine the properties of each charge point and add them to the charge vector. */ + for (i = rank; i < Np / ng; i += size) + { + /* Determine the transverse coordinate. */ + r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); + r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); + + /* Determine the longitudinal coordinate. */ + if ( bunchInit.distribution_ == "uniform" ) + r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2]; + else if ( bunchInit.distribution_ == "gaussian" ) + r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); + else + { + //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") ); + exit(1); + } + + /* Determine the transverse momentum. */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); + + if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) + { + /* Shift the generated charge to the center position and momentum space. */ + //charge.rnp = bunchInit.position_[ia]; + charge.rnp = bunchInit.position_; + charge.rnp += r; + + charge.gb = gb; + charge.gb += t; + //std::cout << gb << "\n"; + if(std::isinf(gb[2])){ + std::cerr << "it klonked here\n"; + } + + /* Insert this charge and the mirrored ones into the charge vector. */ + insertCharge(charge); + } + } + + /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the + * CSE from the tail of the bunch. */ + if ( bunchInit.distribution_ == "uniform" ){ + for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size) + { + + r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); + r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); + + /* Determine the longitudinal coordinate. */ + r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); + r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); + + /* Determine the transverse momentum. */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); + //std::cerr << "DOING UNIFORM tapering!!!\n"; + if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) + { + //std::cerr << "ACTUALLY DOING UNIFORM tapering!!!\n"; + /* Shift the generated charge to the center position and momentum space. */ + charge.rnp = bunchInit.position_[ia]; + charge.rnp += r; + + charge.gb = gb; + + charge.gb += t; + //std::cout << gb[0] << "\n"; + //if(std::isinf(gb.squaredNorm())){ + // std::cerr << "it klonked here\n"; + //} + /* Insert this charge and the mirrored ones into the charge vector. */ + insertCharge(charge); + } + } + } + + /* Reset the value for the number of particle variable according to the installed number of + * macro-particles and perform the corresponding changes. */ + bunchInit.numberOfParticles_ = chargeVector.size(); +} +template +void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma){ + Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma); + Double zmaxL = -1.0e100, zmaxG; + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) + { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + if(std::isinf(g)){ + std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n"; + abort(); + } + Double bz = iterQ->gb[2] / g; + iterQ->rnp[2] *= frame_gamma; + + iterQ->gb[2] = frame_gamma * g * ( bz - frame_beta ); + + zmaxL = std::max( zmaxL , iterQ->rnp[2] ); + } + zmaxG = zmaxL; + struct { + Double zu_; + Double beta_; + } bunch_; + bunch_.zu_ = zmaxG; + bunch_.beta_ = frame_beta; + + /****************************************************************************************************/ + + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) + { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + iterQ->rnp[0] += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; + iterQ->rnp[1] += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; + iterQ->rnp[2] += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; + if(std::isnan(iterQ->rnp[2])){ + std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_ << ", " << frame_beta << "\n"; + std::cerr << __FILE__ << ": " << __LINE__ << " OOOOOF\n"; + abort(); + } + } +} + + + + +template +size_t initialize_bunch_mithra( + bunch_type& bunch, + const BunchInitialize& bunchInit, + scalar frame_gamma){ + + ChargeVector oof; + initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0); + for(auto& c : oof){ + if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) + std::cout << "Pos before boost: " << c.rnp << "\n"; + if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2])) + std::cout << "Pos before boost: " << c.rnp << "\n"; + } + boost_bunch(oof, frame_gamma); + std::cout << "boost gamma" << frame_gamma << "\n"; + for(auto& c : oof){ + if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){ + std::cout << "Pos after boost: " << c.rnp << "\n"; + break; + } + } + Kokkos::View*, Kokkos::HostSpace> positions("", oof.size()); + Kokkos::View*, Kokkos::HostSpace> gammabetas("", oof.size()); + auto iterQ = oof.begin(); + for (size_t i = 0; i < oof.size(); i++) { + assert_isreal(iterQ->gb[0]); + assert_isreal(iterQ->gb[1]); + assert_isreal(iterQ->gb[2]); + assert(iterQ->gb[2] != 0.0f); + scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + assert_isreal(g); + scalar bz = iterQ->gb[2] / g; + assert_isreal(bz); + (void)bz; + positions(i) = iterQ->rnp; + gammabetas(i) = iterQ->gb; + ++iterQ; + } + if(oof.size() > bunch.getLocalNum()){ + bunch.create(oof.size() - bunch.getLocalNum()); + } + Kokkos::View*> dpositions ("", oof.size()); + Kokkos::View*> dgammabetas("", oof.size()); + + Kokkos::deep_copy(dpositions, positions); + Kokkos::deep_copy(dgammabetas, gammabetas); + Kokkos::deep_copy(bunch.R_nm1.getView(), positions); + Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas); + auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();; + Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){ + rview(i) = dpositions(i); + rm1view(i) = dpositions(i); + gbview(i) = dgammabetas(i); + }); + Kokkos::fence(); + ippl::Vector meanpos = bunch.R.sum() * (1.0 / oof.size()); + + Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){ + rview(i) -= meanpos; + rm1view(i) -= meanpos; + }); + return oof.size(); +} + + + + + + + +//END LORENTZ FRAME AND UNDULATOR AND BUNCH + + + + + +//PREAMBLE + +template + requires((std::is_floating_point_v)) +KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ + uint32_t dim = sizeof...(scalar); + ippl::Vector vec{scalar1(x - mean)...}; + for(unsigned d = 0;d < dim;d++){ + vec[d] = vec[d] * vec[d]; + } + #ifndef __CUDA_ARCH__ + using std::exp; + #endif + return exp(-(vec.sum()) / (stddev * stddev)); +} + +constexpr double sq(double x){ + return x * x; +} +constexpr float sq(float x){ + return x * x; +} + + +template +constexpr KOKKOS_INLINE_FUNCTION auto first(){ + return a; +} +template +constexpr KOKKOS_INLINE_FUNCTION auto second(){ + return b; +} + +template +KOKKOS_INLINE_FUNCTION ippl::Vector cross_prod(const ippl::Vector& a, + const ippl::Vector& b) { + ippl::Vector ret{0.0, 0.0, 0.0}; + ret[0] = a[1] * b[2] - a[2] * b[1]; + ret[1] = a[2] * b[0] - a[0] * b[2]; + ret[2] = a[0] * b[1] - a[1] * b[0]; + return ret; +} +template +KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos){ + //return pear, ippl::Vector>{ippl::Vector{5,5,5}, ippl::Vector{0,0,0}}; + //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); + Kokkos::pair, ippl::Vector> ret; + //pos -= spacing * T(0.5); + ippl::Vector relpos = pos - origin; + ippl::Vector gridpos = relpos / hr; + ippl::Vector ipos; + ipos = gridpos.template cast(); + ippl::Vector fracpos; + for(unsigned k = 0;k < 3;k++){ + fracpos[k] = gridpos[k] - (int)ipos[k]; + } + //TODO: NGHOST!!!!!!! + ipos += ippl::Vector(1); + ret.first = ipos; + ret.second = fracpos; + return ret; +} +template +KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + if( + ipos[0] < 0 + ||ipos[1] < 0 + ||ipos[2] < 0 + ||size_t(ipos[0]) >= view.extent(0) - 1 + ||size_t(ipos[1]) >= view.extent(1) - 1 + ||size_t(ipos[2]) >= view.extent(2) - 1 + ||fracpos[0] < 0 + ||fracpos[1] < 0 + ||fracpos[2] < 0 + ){ + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for(unsigned i = 0;i < 8;i++){ + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for(unsigned d = 0;d < 3;d++){ + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(value); + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); +} +template +KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ + //std::cout << "Value: " << value << "\n"; + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + if( + ipos[0] < 0 + ||ipos[1] < 0 + ||ipos[2] < 0 + ||size_t(ipos[0]) >= view.extent(0) - 1 + ||size_t(ipos[1]) >= view.extent(1) - 1 + ||size_t(ipos[2]) >= view.extent(2) - 1 + ||fracpos[0] < 0 + ||fracpos[1] < 0 + ||fracpos[2] < 0 + ){ + //std::cout << "out of bounds\n"; + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for(unsigned i = 0;i < 8;i++){ + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for(unsigned d = 0;d < 3;d++){ + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); +} + +template +KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ + + + Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to ); + //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]); + for(int d = 0;d < 3;d++){ + //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ + // std::cout <(nghost); + //to_ipos += ippl::Vector(nghost); + + if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ + scatterToGrid(Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); + + return; + } + ippl::Vector relay; + const int nghost = 1; + const ippl::Vector orig = origin; + using Kokkos::max; + using Kokkos::min; + for (unsigned int i = 0; i < 3; i++) { + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i], + scalar(0.5) * (to[i] + from[i]))); + } + scatterToGrid(Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); + scatterToGrid(Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); +} + +// END PREAMBLE + + +//BEGIN ABSORBING BOUNDARY CONDITION +template +struct second_order_abc_face{ + using scalar = _scalar; + scalar Cweights[5]; + int sign; + constexpr static unsigned main_axis = _main_axis; + KOKKOS_FUNCTION second_order_abc_face(ippl::Vector hr, scalar dt, int _sign) : sign(_sign){ + constexpr scalar c = 1; + constexpr unsigned side_axes[2] = {_side_axes...}; + static_assert( + (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) || + (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) || + (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1) + ); + assert(_main_axis != side_axes[0]); + assert(_main_axis != side_axes[1]); + assert(side_axes[0] != side_axes[1]); + constexpr scalar truncation_order = 2.0; + scalar p = ( 1.0 + 1 * 1 ) / ( 1 + 1 ); + scalar q = - 1.0 / ( 1 + 1 ); + + scalar d = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt); + + Cweights[0] = ( 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + Cweights[1] = ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + assert(abs(Cweights[1] + 1) < 1e-6); //Like literally + Cweights[2] = ( p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d; + Cweights[3] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d; + Cweights[4] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d; + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + constexpr unsigned side_axes[2] = {_side_axes...}; + ippl::Vector side_axis1_onehot = ippl::Vector{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2}; + ippl::Vector side_axis2_onehot = ippl::Vector{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2}; + ippl::Vector mainaxis_off = ippl::Vector{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast(); + return advanceBoundaryS( + A_nm1(i,j,k), A_n(i,j,k), + apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off), + apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off), + apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot), apply(A_n, c - side_axis1_onehot), + apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot) + ); + } + template + KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 , + const value_type& v3 , const value_type& v4 , const value_type& v5 , + const value_type& v6 , const value_type& v7 , const value_type& v8 , + const value_type& v9 , const value_type& v10, const value_type& v11, + const value_type& v12, const value_type& v13)const noexcept + { + + value_type v0 = + Cweights[0] * (v1 + v5) + + (Cweights[1]) * v3 + + (Cweights[2]) * ( v2 + v4 ) + + (Cweights[3]) * ( v6 + v7 + v10 + v11 ) + + (Cweights[4]) * ( v8 + v9 + v12 + v13 ); + return v0; + } +}; +template +struct second_order_abc_edge{ + using scalar = _scalar; + // + scalar Eweights[5]; + + KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector hr, scalar dt){ + static_assert(normal_axis1 != normal_axis2); + static_assert(edge_axis != normal_axis2); + static_assert(edge_axis != normal_axis1); + static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0)); + constexpr scalar c0_ = scalar(1); + scalar d = ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt ); + if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else{ + assert(false); + } + + + + + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + //constexpr unsigned nax[2] = {normal_axis1, normal_axis2}; + ippl::Vector normal_axis1_onehot = ippl::Vector{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1); + ippl::Vector normal_axis2_onehot = ippl::Vector{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1); + ippl::Vector acc0 = {i, j, k}; + ippl::Vector acc1 = acc0 + normal_axis1_onehot.cast(); + ippl::Vector acc2 = acc0 + normal_axis2_onehot.cast(); + ippl::Vector acc3 = acc0 + normal_axis1_onehot.cast() + normal_axis2_onehot.cast(); + //ippl::Vector axism = (-ippl::Vector{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast(); + ippl::Vector axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2}; + //return A_n(i, j, k); + return advanceEdgeS( + A_n(i, j, k), A_nm1(i, j, k), + apply(A_np1, acc1), apply(A_n, acc1 ), apply(A_nm1, acc1), + apply(A_np1, acc2), apply(A_n, acc2 ), apply(A_nm1, acc2), + apply(A_np1, acc3), apply(A_n, acc3 ), apply(A_nm1, acc3), + apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp), + apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceEdgeS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19)const noexcept{ + value_type v0 = + Eweights[0] * (v3 + v8) + + Eweights[1] * (v5 + v6) + + Eweights[2] * (v2 + v9) + + Eweights[3] * (v1 + v4 + v7 + v10) + + Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11; + return v0; + } +}; +template +struct second_order_abc_corner{ + using scalar = _scalar; + scalar Cweights[17]; + KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector hr, scalar dt){ + constexpr scalar c0_ = scalar(1); + Cweights[0] = ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[1] = ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[2] = ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[3] = ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[4] = ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[5] = ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[6] = ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[7] = ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[8] = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[9] = - ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[12] = - ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[13] = - ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[15] = - ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt); + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + //First implementation: 0,0,0 corner + constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1); + constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1); + constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1); + using ippl::apply; + constexpr ippl::Vector offsets[8] = { + ippl::Vector{0,0,0}, + ippl::Vector{xoff,0,0}, + ippl::Vector{0,yoff,0}, + ippl::Vector{0,0,zoff}, + ippl::Vector{xoff,yoff,0}, + ippl::Vector{xoff,0,zoff}, + ippl::Vector{0,yoff,zoff}, + ippl::Vector{xoff,yoff,zoff}, + }; + return advanceCornerS( + apply(A_n, c), apply(A_nm1, c), + apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]), + apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]), + apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]), + apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]), + apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]), + apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]), + apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7]) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceCornerS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19, value_type v20, + value_type v21, value_type v22, value_type v23)const noexcept{ + return - ( v1 * (Cweights[16]) + v2 * (Cweights[8]) + + v3 * Cweights[1] + v4 * Cweights[16] + v5 * Cweights[9] + + v6 * Cweights[2] + v7 * Cweights[16] + v8 * Cweights[10] + + v9 * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] + + v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] + + v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] + + v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] + + v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0]; + } +}; + + + + + + +struct second_order_mur_boundary_conditions{ + template + void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr){ + using scalar = decltype(dt); + //TODO: tbh don't know + //const unsigned nghost = 1; + const ippl::Vector hr = FA_n.get_mesh().getMeshSpacing(); + //assert_isreal((betaMur[0])); + //assert_isreal((betaMur[1])); + //assert_isreal((betaMur[2])); + auto A_n = FA_n.getView(); + auto A_np1 = FA_np1.getView(); + auto A_nm1 = FA_nm1.getView(); + + Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5); + if(Kokkos::popcount(val) == 1){ + if(i == 0){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(j == 0){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(k == 0){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(i == true_nr[0] - 1){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(j == true_nr[1] - 1){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(k == true_nr[2] - 1){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + } + }); + Kokkos::fence(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5); + if(Kokkos::popcount(val) == 2){ //Edge + if(i == 0 && k == 0){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == 0 && j == 0){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(j == 0 && k == 0){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(i == 0 && k == true_nr[2] - 1){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == 0 && j == true_nr[1] - 1){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(j == 0 && k == true_nr[2] - 1){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(i == true_nr[0] - 1 && k == 0){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == true_nr[0] - 1 && j == 0){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(j == true_nr[1] - 1 && k == 0){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(i == true_nr[0] - 1 && k == true_nr[2] - 1){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == true_nr[0] - 1 && j == true_nr[1] - 1){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(j == true_nr[1] - 1 && k == true_nr[2] - 1){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5); + + if(Kokkos::popcount(val) == 3){ + //printf("Corner: %d, %d, %d\n", i, j, k); + if(i == 0 && j == 0 && k == 0){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == true_nr[0] - 1 && j == 0 && k == 0){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == 0 && j == true_nr[1] - 1 && k == 0){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == 0){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == 0 && j == 0 && k == true_nr[2] - 1){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == true_nr[0] - 1 && j == 0 && k == true_nr[2] - 1){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == 0 && j == true_nr[1] - 1 && k == true_nr[2] - 1){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == true_nr[2] - 1){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + } +}; +//END ABSORBING BOUNDARY CONDITION + + + + + + + + + +namespace ippl { + + template + struct undulator_parameters{ + scalar lambda; //MITHRA: lambda_u + scalar K; //Undulator parameter + scalar length; + scalar B_magnitude; + undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){ + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u); + //std::cout << "Setting bmag: " << B_magnitude << "\n"; + } + undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){ + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda); + } + }; + + template + struct nondispersive{ + scalar a1; + scalar a2; + scalar a4; + scalar a6; + scalar a8; + }; + template + struct Bunch : public ippl::ParticleBase { + using scalar = _scalar; + + // Constructor for the Bunch class, taking a PLayout reference + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) { + // Add attributes to the particle bunch + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(R_np1); // Position attribute for the next time step + this->addAttribute(R_nm1); // Position attribute for the next time step + this->addAttribute(EB_gather); // Electric field attribute for particle gathering + } + + // Destructor for the Bunch class + ~Bunch() {} + + // Define container types for various attributes + using charge_container_type = ippl::ParticleAttrib; + using velocity_container_type = ippl::ParticleAttrib>; + using vector_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; + + // Declare instances of the attribute containers + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type R_nm1; // Position container for the previous time step + ippl::ParticleAttrib, 2>> EB_gather; // Electric field container for particle gathering + + }; + + + template + // clang-format off + struct FDTDFieldState{ + + //Sorry, can't do more than 3d + + constexpr static unsigned int dim = 3; + using Vector_t = ippl::Vector; + using value_type = ippl::Vector; + using EB_type = ippl::Vector, 2>; + using Mesh_t = ippl::UniformCartesian; + + bool periodic_bc; + + //Fields + using VField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_np1; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_n; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_nm1; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> J; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> EB; + + //Discretization options + Vector_t hr_m; + ippl::Vector nr_global; + ippl::Vector nr_local; + scalar dt; + Mesh_t* mesh_mp; + ParticleSpatialLayout playout; + Bunch> particles; + using bunch_type = Bunch>; + + /** + * @brief Construct a new FDTDFieldState object + * Mesh and resolution parameter are technically redundant + * + * @param resolution + * @param layout + * @param mesch + */ + FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), playout(layout, mesch), particles(playout){ + FA_np1.initialize(mesch, layout, 1); + FA_n.initialize(mesch, layout, 1); + FA_nm1.initialize(mesch, layout, 1); + J.initialize(mesch, layout, 1); + EB.initialize(mesch, layout, 1); + FA_n = value_type(0); + FA_np1 = value_type(0); + FA_nm1 = value_type(0); + hr_m = mesch.getMeshSpacing(); + nr_global = ippl::Vector{ + uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), + uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), + uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1) + }; + nr_local = ippl::Vector{ + uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1), + uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1), + uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1) + }; + std::cout << "NR_M_g: " << nr_global << "\n"; + std::cout << "NR_M_l: " << nr_local << "\n"; + dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2])); + particles.create(nparticles); + setNoBoundaryConditions(); + + } + void setNoBoundaryConditions() { + periodic_bc = false; + typename VField_t::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + FA_n .setFieldBC(vector_bcs); + FA_np1.setFieldBC(vector_bcs); + FA_nm1.setFieldBC(vector_bcs); + } + void setPeriodicBoundaryConditions() { + periodic_bc = true; + typename VField_t::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + FA_n .setFieldBC(vector_bcs); + FA_np1.setFieldBC(vector_bcs); + FA_nm1.setFieldBC(vector_bcs); + } + + void scatterBunch(){ + //ippl::Vector* gammaBeta = this->gammaBeta; + const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; + assert_isreal(volume); + assert_isreal((scalar(1) / volume)); + auto Jview = J.getView(); + auto qview = particles.Q.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto orig = mesh_mp->getOrigin(); + auto hr = mesh_mp->getMeshSpacing(); + auto dt = this->dt; + Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + Vector_t pos = rview(i); + Vector_t to = rview(i); + Vector_t from = rm1view(i); + //scatterToGrid(Jview,hr, orig, pos, qview(i) / volume); + scatterLineToGrid(Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); + }); + Kokkos::fence(); + } + + void fieldStep(){ + const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1]))); + nondispersive ndisp{ + .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])), + .a2 = sq(dt / hr_m[0]), + .a4 = sq(dt / hr_m[1]), + .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), + .a8 = sq(dt) + }; + if(periodic_bc){ + FA_n.getFieldBC().apply(FA_n); + } + auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); + auto source = this->J.getView(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + A_np1(i, j, k) = -A_nm1(i,j,k) + + ndisp.a1 * A_n (i,j,k) + + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1)) + + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1)) + + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1)) + + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1)) + + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k); + }); + Kokkos::fence(); + + if(!periodic_bc){ + second_order_mur_boundary_conditions bc; + ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; + bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr); + } + Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView()); + Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView()); + //std::swap(this->A_n, this->A_nm1); + //std::swap(this->A_np1, this->A_n); + evaluate_EB(); + } + void evaluate_EB(){ + ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); + auto source = this->J.getView(); + auto EBv = this->EB.getView(); + Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{ + dAdx[0], dAdy[0], dAdz[0] + }; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + EBv(i,j,k)[0] = -dAdt - grad_phi; + EBv(i,j,k)[1] = curlA; + }); + Kokkos::fence(); + } + template + void updateBunch(scalar time, UniaxialLorentzframe ulb, callable undulator_field){ + + Kokkos::fence(); + auto gbview = particles.gamma_beta.getView(); + auto ebview = particles.EB_gather.getView(); + auto qview = particles.Q.getView(); + auto mview = particles.mass.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto rp1view = particles.R_np1.getView(); + scalar bunch_dt = dt / 3; + Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); + Kokkos::fence(); + for(int bts = 0;bts < 3;bts++){ + gather(particles.EB_gather, EB, particles.R); + Kokkos::fence(); + Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + const ippl::Vector pgammabeta = gbview(i); + ippl::Vector, 2> EB = ebview(i); + ippl::Vector labpos = rview(i); + + ulb.primedToUnprimed(labpos, time); + + Kokkos::pair, ippl::Vector> EB_undulator_frame = undulator_field(labpos); + Kokkos::pair, ippl::Vector> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame); + assert_isreal((EB_undulator_bunch.first[0])); + assert_isreal((EB_undulator_bunch.first[1])); + assert_isreal((EB_undulator_bunch.first[2])); + assert_isreal((EB_undulator_bunch.second[0])); + assert_isreal((EB_undulator_bunch.second[1])); + assert_isreal((EB_undulator_bunch.second[2])); + EB[0] += EB_undulator_bunch.first; + EB[1] += EB_undulator_bunch.second; + + const scalar charge = qview(i); + const scalar mass = mview(i); + const ippl::Vector t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); + const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); + const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); + const ippl::Vector t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); + const ippl::Vector ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); + + assert_isreal((ngammabeta[0])); + assert_isreal((ngammabeta[1])); + assert_isreal((ngammabeta[2])); + rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); + gbview(i) = ngammabeta; + }); + Kokkos::fence(); + } + Kokkos::fence(); + + } + }; + // clang-format on +} // namespace ippl + +int main(int argc, char* argv[]) { + using scalar = float; + ippl::initialize(argc, argv); + { + + config cfg = read_config("../config.json"); + const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); + cfg.extents[2] *= frame_gamma; + cfg.total_time /= frame_gamma; + + const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma)); + const scalar frame_gammabeta = frame_gamma * frame_beta; + UniaxialLorentzframe frame_boost(frame_gammabeta); + ippl::undulator_parameters uparams(cfg); + const scalar k_u = scalar(2.0 * M_PI) / uparams.lambda; + const scalar distance_to_entry = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma); + auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector& position_in_lab_frame){ + Kokkos::pair, ippl::Vector> ret; + ret.first.fill(0); + ret.second.fill(0); + + if(position_in_lab_frame[2] < distance_to_entry){ + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator < 0); + scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); + ret.second[0] = 0; + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; + } + else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){ + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator >= 0); + ret.second[0] = 0; + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); + } + return ret; + + }; + BunchInitialize mithra_config = generate_mithra_config(cfg, frame_boost); + ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); + + std::array isParallel; + isParallel.fill(true); + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])}; + ippl::Vector origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)}; + ippl::UniformCartesian mesh(owned, hx, origin); + std::cout << "hx: " << hx << "\n"; + std::cout << "origin: " << origin << "\n"; + std::cout << "extents: " << cfg.extents << std::endl; + if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){ + std::cerr << "Dispersion relation not satisfiable\n"; + abort(); + } + + ippl::FDTDFieldState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/); + fdtd_state.particles.Q = scalar(cfg.charge / cfg.num_particles); + fdtd_state.particles.mass = scalar(cfg.mass / cfg.num_particles); + auto rv = fdtd_state.particles.R.getView(); + auto rm1 = fdtd_state.particles.R_nm1.getView(); + size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma); + fdtd_state.particles.Q = cfg.charge / actual_pc; + fdtd_state.particles.mass = cfg.mass / actual_pc; + //fdtd_state.scatterBunch(); + //std::cout << cfg.charge << "\n"; + + size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); + for(int i = 0;i < timesteps_required;i++){ + fdtd_state.J = scalar(0.0); + fdtd_state.scatterBunch(); + //std::cout << fdtd_state.J.getVolumeIntegral() << "\n"; + fdtd_state.fieldStep(); + fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); + //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n"; + //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n"; + + if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ + int img_height = 500; + int img_width = int(500.0 * cfg.extents[2] / cfg.extents[0]); + float* imagedata = new float[img_width * img_height * 3]; + uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; + std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float)); + auto phmirror = fdtd_state.particles.R.getHostMirror(); + Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView()); + for(size_t hi = 0;hi < phmirror.extent(0);hi++){ + ippl::Vector ppos = phmirror(hi); + ppos -= mesh.getOrigin(); + ppos /= cfg.extents.cast(); + int x_imgcoord = ppos[2] * img_width; + int y_imgcoord = ppos[0] * img_height; + //printf("%d, %d\n", x_imgcoord, y_imgcoord); + if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){ + const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles); + //std::cout << intensity << "\n"; + imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = + std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity); + } + }; + + char output[1024] = {0}; + snprintf(output, 1023, "../data/outimage%.05d.bmp", i); + std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;}); + stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); + delete[] imagedata; + delete[] imagedata_final; + } + } + } + ippl::finalize(); +} \ No newline at end of file From c8cb043d3098daded79d2afee8aeb325b47bf1a3 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 13 Apr 2024 18:09:26 +0200 Subject: [PATCH 02/32] Working on MPI --- CMakeLists.txt | 2 +- config.json | 84 +++++++++++++ src/Particle/ParticleAttrib.hpp | 10 ++ test/solver/TestFDTDSolver.cpp | 214 +++++++++++++++++++++----------- 4 files changed, 239 insertions(+), 71 deletions(-) create mode 100644 config.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 46a2ac866..058bfab5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ set (CMAKE_CXX_STANDARD 20) set (CMAKE_CUDA_EXTENSIONS OFF) set (CMAKE_CXX_EXTENSIONS OFF) set (CMAKE_CXX_STANDARD_REQUIRED ON) -set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINGO} -O3 -g ") +set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g") set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") #set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") diff --git a/config.json b/config.json new file mode 100644 index 000000000..5b5c053e4 --- /dev/null +++ b/config.json @@ -0,0 +1,84 @@ +{ + "timestep-ratio" : 1.0, + "mesh": { + "length-scale" : "micrometers", + "time-scale" : "picoseconds", + "extents": [3400.0, 3400.0, 280.0], + "resolution": [64, 64, 2800], + "mesh-center": [0.0, 0.0, 0.0], + "total-time": 30000.0, + "bunch-time-step": 1.6, + "mesh-truncation-order": 2, + "space-charge": false, + "solver": "NSFD" + }, + "bunch": { + "type": "ellipsoid", + "distribution": "uniform", + "_charge": 1, + "_mass": 1, + "charge": 1.846e8, + "mass": 1.846e8, + "number-of-particles": 500000, + "gamma": 100.41, + "_gamma": 1.0, + "direction (ignored)": [0.0, 0.0, 1.0], + "position is 0 because that's the center": "", + "position": [0.0, 0.0, 0.0], + "_sigma-position (those are commented out)": [260.0, 260.0, 50.25], + "_sigma-momentum (those are commented out)": [1.0e-8, 1.0e-8, 100.41e-4], + "sigma-position": [260.0, 260.0, 50.25], + "sigma-momentum": [1.0e-8, 1.0e-8, 100.41e-4], + "particle-gammabetas": [0, 0, 0], + "transverse-truncation (Deprecated! see distribution-truncations)": 1040.0, + "longitudinal-truncation (Deprecated! see distribution-truncations)": 90.0, + "distribution-truncations" : [1040.0, 1040.0, 90.0], + "bunching-factor": 0.01, + "update-rule" : {"type" : "lorentz"} + }, + "field": { + "update-rule" : "do", + "strength" : 100, + "field-sampling": { + "sample": true, + "type": "at-point", + "field": ["Ex", "Ey", "Ez"], + "directory": "./", + "base-name": "field-sampling/field", + "rhythm": 3.2, + "position": [0.0, 0.0, 110.0] + } + }, + "undulator": { + "static-undulator": { + "undulator-parameter": 1.417, + "_undulator-parameter": 0, + "period": 30000.0, + "length": 5e6, + "polarization-angle": 0.0 + } + }, + "output":{ + "rhythm" : 30, + "count" : 20, + "type" : "E", + "track" : { + "radiation" : "boundary_radiation.txt", + "particle-position" : "p0pos.txt" + } + }, + "fel-output": { + "radiation-power": { + "sample": true, + "type": "at-point", + "directory": "./", + "base-name": "power-sampling/power-NSFD", + "plane-position": 110.0, + "normalized-frequency": 1.0 + } + }, + "experimentation" : { + "stretch-factor" : 1.0, + "resort" : 1.0 + } + } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 9f22b72be..e53ae6402 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -194,6 +194,16 @@ namespace ippl { Vector args = (index - lDom.first() + nghost).template cast(); // gather + for(unsigned d = 0;d < Field::dim;d++){ + if(args[d] >= view.extent(d)){ + dview_m(idx) = T(0); + return; + } + else if(args[d] < 1){ + dview_m(idx) = T(0); + return; + } + } dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{}, view, wlo, whi, args); }); diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 65446b03f..54f099246 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -707,7 +707,6 @@ size_t initialize_bunch_mithra( std::cout << "Pos before boost: " << c.rnp << "\n"; } boost_bunch(oof, frame_gamma); - std::cout << "boost gamma" << frame_gamma << "\n"; for(auto& c : oof){ if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){ std::cout << "Pos after boost: " << c.rnp << "\n"; @@ -748,12 +747,7 @@ size_t initialize_bunch_mithra( gbview(i) = dgammabetas(i); }); Kokkos::fence(); - ippl::Vector meanpos = bunch.R.sum() * (1.0 / oof.size()); - Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){ - rview(i) -= meanpos; - rm1view(i) -= meanpos; - }); return oof.size(); } @@ -832,7 +826,7 @@ KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gr return ret; } template -KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); if( ipos[0] < 0 @@ -861,7 +855,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, assert(one_minus_fracpos[2] >= 0.0f); assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - + ipos -= ldom.first(); for(unsigned i = 0;i < 8;i++){ scalar weight = 1; ippl::Vector ipos_l = ipos; @@ -877,7 +871,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, assert(abs(accum - 1.0f) < 1e-6f); } template -KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ //std::cout << "Value: " << value << "\n"; auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); if( @@ -908,7 +902,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, assert(one_minus_fracpos[2] >= 0.0f); assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - + ipos -= ldom.first(); for(unsigned i = 0;i < 8;i++){ scalar weight = 1; ippl::Vector ipos_l = ipos; @@ -926,7 +920,7 @@ KOKKOS_FUNCTION void scatterToGrid(view_type& view, ippl::Vector hr, } template -KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ +KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); @@ -943,7 +937,7 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector(nghost); if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ - scatterToGrid(Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); return; } @@ -957,8 +951,8 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(view_type& Jview, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); - scatterToGrid(Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); } // END PREAMBLE @@ -1195,7 +1189,7 @@ struct second_order_abc_corner{ struct second_order_mur_boundary_conditions{ template - void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr){ + void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr, ippl::NDIndex<3> lDom){ using scalar = decltype(dt); //TODO: tbh don't know //const unsigned nghost = 1; @@ -1206,32 +1200,45 @@ struct second_order_mur_boundary_conditions{ auto A_n = FA_n.getView(); auto A_np1 = FA_np1.getView(); auto A_nm1 = FA_nm1.getView(); - + ippl::Vector local_nr{ + uint32_t(A_n.extent(0)), + uint32_t(A_n.extent(1)), + uint32_t(A_n.extent(2)) + }; Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5); + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) + + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + if(Kokkos::popcount(val) == 1){ - if(i == 0){ + if(ig == 0){ second_order_abc_face soa(hr, dt, 1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(j == 0){ + if(jg == 0){ second_order_abc_face soa(hr, dt, 1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(k == 0){ + if(kg == 0){ second_order_abc_face soa(hr, dt, 1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(i == true_nr[0] - 1){ + if(ig == true_nr[0] - 1){ second_order_abc_face soa(hr, dt, -1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(j == true_nr[1] - 1){ + if(jg == true_nr[1] - 1){ second_order_abc_face soa(hr, dt, -1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(k == true_nr[2] - 1){ + if(kg == true_nr[2] - 1){ second_order_abc_face soa(hr, dt, -1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } @@ -1239,57 +1246,65 @@ struct second_order_mur_boundary_conditions{ }); Kokkos::fence(); Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5); + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 2)return; + uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) + + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); if(Kokkos::popcount(val) == 2){ //Edge - if(i == 0 && k == 0){ + if(ig == 0 && kg == 0){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == 0 && j == 0){ + else if(ig == 0 && jg == 0){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(j == 0 && k == 0){ + else if(jg == 0 && kg == 0){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == 0 && k == true_nr[2] - 1){ + else if(ig == 0 && kg == true_nr[2] - 1){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == 0 && j == true_nr[1] - 1){ + else if(ig == 0 && jg == true_nr[1] - 1){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(j == 0 && k == true_nr[2] - 1){ + else if(jg == 0 && kg == true_nr[2] - 1){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && k == 0){ + else if(ig == true_nr[0] - 1 && kg == 0){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && j == 0){ + else if(ig == true_nr[0] - 1 && jg == 0){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(j == true_nr[1] - 1 && k == 0){ + else if(jg == true_nr[1] - 1 && kg == 0){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && k == true_nr[2] - 1){ + else if(ig == true_nr[0] - 1 && kg == true_nr[2] - 1){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && j == true_nr[1] - 1){ + else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(j == true_nr[1] - 1 && k == true_nr[2] - 1){ + else if(jg == true_nr[1] - 1 && kg == true_nr[2] - 1){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } @@ -1300,40 +1315,48 @@ struct second_order_mur_boundary_conditions{ }); Kokkos::fence(); Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t val = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - + (uint32_t(i == true_nr[0] - 1) << 3) + (uint32_t(j == true_nr[1] - 1) << 4) + (uint32_t(k == true_nr[2] - 1) << 5); + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + // + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + //if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) + + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); if(Kokkos::popcount(val) == 3){ //printf("Corner: %d, %d, %d\n", i, j, k); - if(i == 0 && j == 0 && k == 0){ + if(ig == 0 && jg == 0 && kg == 0){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && j == 0 && k == 0){ + else if(ig == true_nr[0] - 1 && jg == 0 && kg == 0){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == 0 && j == true_nr[1] - 1 && k == 0){ + else if(ig == 0 && jg == true_nr[1] - 1 && kg == 0){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == 0){ + else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == 0){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == 0 && j == 0 && k == true_nr[2] - 1){ + else if(ig == 0 && jg == 0 && kg == true_nr[2] - 1){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && j == 0 && k == true_nr[2] - 1){ + else if(ig == true_nr[0] - 1 && jg == 0 && kg == true_nr[2] - 1){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == 0 && j == true_nr[1] - 1 && k == true_nr[2] - 1){ + else if(ig == 0 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(i == true_nr[0] - 1 && j == true_nr[1] - 1 && k == true_nr[2] - 1){ + else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } @@ -1444,6 +1467,7 @@ namespace ippl { ippl::Vector nr_local; scalar dt; Mesh_t* mesh_mp; + FieldLayout* layout_mp; ParticleSpatialLayout playout; Bunch> particles; using bunch_type = Bunch>; @@ -1456,7 +1480,7 @@ namespace ippl { * @param layout * @param mesch */ - FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), playout(layout, mesch), particles(playout){ + FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout){ FA_np1.initialize(mesch, layout, 1); FA_n.initialize(mesch, layout, 1); FA_nm1.initialize(mesch, layout, 1); @@ -1528,14 +1552,16 @@ namespace ippl { auto orig = mesh_mp->getOrigin(); auto hr = mesh_mp->getMeshSpacing(); auto dt = this->dt; + ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ Vector_t pos = rview(i); Vector_t to = rview(i); Vector_t from = rm1view(i); //scatterToGrid(Jview,hr, orig, pos, qview(i) / volume); - scatterLineToGrid(Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); + scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); }); Kokkos::fence(); + J.accumulateHalo(); } void fieldStep(){ @@ -1547,11 +1573,13 @@ namespace ippl { .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), .a8 = sq(dt) }; - if(periodic_bc){ - FA_n.getFieldBC().apply(FA_n); - } + //if(periodic_bc){ + // FA_n.getFieldBC().apply(FA_n); + //} auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); auto source = this->J.getView(); + FA_nm1.fillHalo(); + FA_n.fillHalo(); Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ A_np1(i, j, k) = -A_nm1(i,j,k) + ndisp.a1 * A_n (i,j,k) @@ -1565,8 +1593,9 @@ namespace ippl { if(!periodic_bc){ second_order_mur_boundary_conditions bc; + const auto& ldom = layout_mp->getLocalNDIndex(); ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; - bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr); + bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom); } Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView()); Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView()); @@ -1700,7 +1729,8 @@ int main(int argc, char* argv[]) { ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); std::array isParallel; - isParallel.fill(true); + isParallel.fill(false); + isParallel[2] = true; // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); @@ -1718,35 +1748,57 @@ int main(int argc, char* argv[]) { } ippl::FDTDFieldState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/); - fdtd_state.particles.Q = scalar(cfg.charge / cfg.num_particles); - fdtd_state.particles.mass = scalar(cfg.mass / cfg.num_particles); - auto rv = fdtd_state.particles.R.getView(); - auto rm1 = fdtd_state.particles.R_nm1.getView(); - size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma); - fdtd_state.particles.Q = cfg.charge / actual_pc; - fdtd_state.particles.mass = cfg.mass / actual_pc; + + if(ippl::Comm->rank() == 0){ + std::cout << "Init particles: " << std::endl; + size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma); + fdtd_state.particles.Q = cfg.charge / actual_pc; + fdtd_state.particles.mass = cfg.mass / actual_pc; + } + else{ + fdtd_state.particles.create(0); + } + { + auto rview = fdtd_state.particles.R.getView(); + auto rm1view = fdtd_state.particles.R_nm1.getView(); + ippl::Vector meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum()); + + Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + rview(i) -= meanpos; + rm1view(i) -= meanpos; + }); + } //fdtd_state.scatterBunch(); //std::cout << cfg.charge << "\n"; size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); - for(int i = 0;i < timesteps_required;i++){ + for(size_t i = 0;i < timesteps_required;i++){ + if(ippl::Comm->rank() == 0){ + std::cout << "Step: " << i << std::endl; + } fdtd_state.J = scalar(0.0); + fdtd_state.playout.update(fdtd_state.particles); fdtd_state.scatterBunch(); //std::cout << fdtd_state.J.getVolumeIntegral() << "\n"; fdtd_state.fieldStep(); fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n"; //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n"; - + int rank = ippl::Comm->rank(); + int size = ippl::Comm->size(); if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ + + auto ldom = layout.getLocalNDIndex(); int img_height = 500; int img_width = int(500.0 * cfg.extents[2] / cfg.extents[0]); float* imagedata = new float[img_width * img_height * 3]; + float* idata_recvbuffer = new float[img_width * img_height * 3]; + int floatcount = img_width * img_height * 3; uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float)); auto phmirror = fdtd_state.particles.R.getHostMirror(); Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView()); - for(size_t hi = 0;hi < phmirror.extent(0);hi++){ + for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){ ippl::Vector ppos = phmirror(hi); ppos -= mesh.getOrigin(); ppos /= cfg.extents.cast(); @@ -1760,12 +1812,34 @@ int main(int argc, char* argv[]) { std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity); } }; - - char output[1024] = {0}; - snprintf(output, 1023, "../data/outimage%.05d.bmp", i); - std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;}); - stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); + int mask = 1; + while (mask < size) { + int partner = rank ^ mask; + //if((rank & (mask - 1)) == 0) + { + if ((rank & mask) == 0) { + // Send data to partner + MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE); + // Apply image summation + for(int f = 0;f < floatcount;f++){ + imagedata[f] += idata_recvbuffer[f]; + } + } else { + MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator()); + // Receive data from partner and apply reduction + + } + } + mask <<= 1; // Move to next bit position for pairing + } + if(rank == 0){ + char output[1024] = {0}; + snprintf(output, 1023, "../data/outimage%.05d.bmp", i); + std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;}); + stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); + } delete[] imagedata; + delete[] idata_recvbuffer; delete[] imagedata_final; } } From 2b25e729b7334fc7deea7ec819b55d5a0235f9a4 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 14 Apr 2024 00:44:31 +0200 Subject: [PATCH 03/32] ABC working with mpi --- src/Types/Vector.h | 2 +- test/solver/TestFDTDSolver.cpp | 180 +++++++++++++++++++++++---------- 2 files changed, 129 insertions(+), 53 deletions(-) diff --git a/src/Types/Vector.h b/src/Types/Vector.h index 56963f864..0f1d00b96 100644 --- a/src/Types/Vector.h +++ b/src/Types/Vector.h @@ -66,7 +66,7 @@ namespace ippl { #ifndef __CUDA_ARCH__ using std::sqrt; #endif - return sqrt(squaredNorm()); + return Kokkos::sqrt(squaredNorm()); } KOKKOS_INLINE_FUNCTION Vector normalized() const noexcept { return *this / norm(); } KOKKOS_INLINE_FUNCTION value_type sum() const noexcept { diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 54f099246..6ef9c0210 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -5,6 +5,7 @@ #include "Field/Field.h" #include "MaxwellSolvers/FDTD.h" +#define JSON_HAS_RANGES 0 //Merlin compilation #include #include #include @@ -40,7 +41,17 @@ KOKKOS_INLINE_FUNCTION bool isINF(double x){ #endif } #define assert_isreal(X) assert(!isNaN(X) && !isINF(X)) - +template +KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector from, ippl::Vector to, Ts... args){ + if constexpr(sizeof...(Ts) == Dim){ + c(args...); + } + else{ + for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){ + serial_for(c, from, to, args..., i); + } + } +} @@ -647,6 +658,7 @@ void initializeBunchEllipsoid (BunchInitialize bunchInit, ChargeVector void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma){ Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma); @@ -828,6 +840,7 @@ KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gr template KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); if( ipos[0] < 0 ||ipos[1] < 0 @@ -855,7 +868,7 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view assert(one_minus_fracpos[2] >= 0.0f); assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - ipos -= ldom.first(); + for(unsigned i = 0;i < 8;i++){ scalar weight = 1; ippl::Vector ipos_l = ipos; @@ -874,6 +887,7 @@ template KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ //std::cout << "Value: " << value << "\n"; auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); if( ipos[0] < 0 ||ipos[1] < 0 @@ -902,7 +916,7 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view assert(one_minus_fracpos[2] >= 0.0f); assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - ipos -= ldom.first(); + for(unsigned i = 0;i < 8;i++){ scalar weight = 1; ippl::Vector ipos_l = ipos; @@ -930,7 +944,7 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ // std::cout <(nghost); @@ -1205,7 +1219,9 @@ struct second_order_mur_boundary_conditions{ uint32_t(A_n.extent(1)), uint32_t(A_n.extent(2)) }; - Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + constexpr uint32_t min_abc_boundary = 1; + constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1; + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ uint32_t ig = i + lDom.first()[0]; uint32_t jg = j + lDom.first()[1]; uint32_t kg = k + lDom.first()[2]; @@ -1214,38 +1230,39 @@ struct second_order_mur_boundary_conditions{ + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); if(Kokkos::popcount(lval) > 1)return; - uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) - + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); if(Kokkos::popcount(val) == 1){ - if(ig == 0){ + if(ig == min_abc_boundary){ second_order_abc_face soa(hr, dt, 1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(jg == 0){ + if(jg == min_abc_boundary){ second_order_abc_face soa(hr, dt, 1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(kg == 0){ + if(kg == min_abc_boundary){ second_order_abc_face soa(hr, dt, 1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(ig == true_nr[0] - 1){ + if(ig == true_nr[0] - max_abc_boundary_sub){ second_order_abc_face soa(hr, dt, -1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(jg == true_nr[1] - 1){ + if(jg == true_nr[1] - max_abc_boundary_sub){ second_order_abc_face soa(hr, dt, -1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - if(kg == true_nr[2] - 1){ + if(kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_face soa(hr, dt, -1); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } } }); Kokkos::fence(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ uint32_t ig = i + lDom.first()[0]; uint32_t jg = j + lDom.first()[1]; uint32_t kg = k + lDom.first()[2]; @@ -1254,57 +1271,57 @@ struct second_order_mur_boundary_conditions{ + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); if(Kokkos::popcount(lval) > 2)return; - uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) - + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); if(Kokkos::popcount(val) == 2){ //Edge - if(ig == 0 && kg == 0){ + if(ig == min_abc_boundary && kg == min_abc_boundary){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == 0 && jg == 0){ + else if(ig == min_abc_boundary && jg == min_abc_boundary){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(jg == 0 && kg == 0){ + else if(jg == min_abc_boundary && kg == min_abc_boundary){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == 0 && kg == true_nr[2] - 1){ + else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == 0 && jg == true_nr[1] - 1){ + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(jg == 0 && kg == true_nr[2] - 1){ + else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && kg == 0){ + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && jg == 0){ + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(jg == true_nr[1] - 1 && kg == 0){ + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && kg == true_nr[2] - 1){ + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1){ + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(jg == true_nr[1] - 1 && kg == true_nr[2] - 1){ + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_edge soa(hr, dt); A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } @@ -1314,7 +1331,8 @@ struct second_order_mur_boundary_conditions{ } }); Kokkos::fence(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ uint32_t ig = i + lDom.first()[0]; uint32_t jg = j + lDom.first()[1]; uint32_t kg = k + lDom.first()[2]; @@ -1323,40 +1341,40 @@ struct second_order_mur_boundary_conditions{ // + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); //if(Kokkos::popcount(lval) > 1)return; - uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) - + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); if(Kokkos::popcount(val) == 3){ //printf("Corner: %d, %d, %d\n", i, j, k); - if(ig == 0 && jg == 0 && kg == 0){ + if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && jg == 0 && kg == 0){ + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == 0 && jg == true_nr[1] - 1 && kg == 0){ + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == 0){ + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == 0 && jg == 0 && kg == true_nr[2] - 1){ + else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && jg == 0 && kg == true_nr[2] - 1){ + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == 0 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){ + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } - else if(ig == true_nr[0] - 1 && jg == true_nr[1] - 1 && kg == true_nr[2] - 1){ + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ second_order_abc_corner coa(hr, dt); A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); } @@ -1468,7 +1486,8 @@ namespace ippl { scalar dt; Mesh_t* mesh_mp; FieldLayout* layout_mp; - ParticleSpatialLayout playout; + using playout_type = ParticleSpatialLayout; + playout_type playout; Bunch> particles; using bunch_type = Bunch>; @@ -1580,30 +1599,42 @@ namespace ippl { auto source = this->J.getView(); FA_nm1.fillHalo(); FA_n.fillHalo(); + ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; + const auto& ldom = layout_mp->getLocalNDIndex(); Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - A_np1(i, j, k) = -A_nm1(i,j,k) - + ndisp.a1 * A_n (i,j,k) - + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1)) - + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1)) - + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1)) - + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1)) - + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k); + uint32_t ig = i + ldom.first()[0]; + uint32_t jg = j + ldom.first()[1]; + uint32_t kg = k + ldom.first()[2]; + uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2) + + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5); + if(!val){ + A_np1(i, j, k) = -A_nm1(i,j,k) + + ndisp.a1 * A_n (i,j,k) + + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1)) + + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1)) + + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1)) + + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1)) + + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k); + } }); Kokkos::fence(); if(!periodic_bc){ + FA_np1.fillHalo(); second_order_mur_boundary_conditions bc; - const auto& ldom = layout_mp->getLocalNDIndex(); - ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; + + bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom); } Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView()); Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView()); //std::swap(this->A_n, this->A_nm1); //std::swap(this->A_np1, this->A_n); + evaluate_EB(); } void evaluate_EB(){ + FA_n.fillHalo();FA_nm1.fillHalo(); ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; const scalar idt = scalar(1.0) / dt; auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); @@ -1679,6 +1710,27 @@ namespace ippl { }); Kokkos::fence(); } + Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); + size_t invalid_count = 0; + auto origo = mesh_mp->getOrigin(); + ippl::Vector extenz;// + extenz[0] = nr_global[0] * hr_m[0]; + extenz[1] = nr_global[1] * hr_m[1]; + extenz[2] = nr_global[2] * hr_m[2]; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, particles.getLocalNum()), + KOKKOS_LAMBDA(size_t i, size_t& ref){ + bool out_of_bounds = false; + ippl::Vector ppos = rview(i); + for(size_t d = 0;d < dim;d++){ + out_of_bounds |= (ppos[d] <= origo[d]); + out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain + } + invalid(i) = out_of_bounds; + ref += out_of_bounds; + }, + invalid_count); + particles.destroy(invalid, invalid_count); Kokkos::fence(); } @@ -1772,6 +1824,7 @@ int main(int argc, char* argv[]) { //std::cout << cfg.charge << "\n"; size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); + for(size_t i = 0;i < timesteps_required;i++){ if(ippl::Comm->rank() == 0){ std::cout << "Step: " << i << std::endl; @@ -1789,9 +1842,10 @@ int main(int argc, char* argv[]) { if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ auto ldom = layout.getLocalNDIndex(); - int img_height = 500; - int img_width = int(500.0 * cfg.extents[2] / cfg.extents[0]); + int img_height = 1000; + int img_width = int(1000.0 * cfg.extents[2] / cfg.extents[0]); float* imagedata = new float[img_width * img_height * 3]; + std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); float* idata_recvbuffer = new float[img_width * img_height * 3]; int floatcount = img_width * img_height * 3; uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; @@ -1812,6 +1866,28 @@ int main(int argc, char* argv[]) { std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity); } }; + auto ebh = fdtd_state.EB.getHostMirror(); + Kokkos::deep_copy(ebh, fdtd_state.EB.getView()); + + //double exp_avg = double(exp_sum) / double(acount); + { + for(int i = 1;i < img_width;i++){ + for(int j = 1;j < img_height;j++){ + int i_remap = (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2; + int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2; + if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){ + if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){ + ippl::Vector, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); + ippl::Vector poynting = acc[0].cross(acc[1]); + if(poynting.norm() > 0){ + //std::cout << poynting.norm() << "\n"; + } + imagedata[(j * img_width + i) * 3 + 0] += std::sqrt(poynting.norm()) * 0.5f;//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + } + } + } + } + } int mask = 1; while (mask < size) { int partner = rank ^ mask; @@ -1835,7 +1911,7 @@ int main(int argc, char* argv[]) { if(rank == 0){ char output[1024] = {0}; snprintf(output, 1023, "../data/outimage%.05d.bmp", i); - std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)x;}); + std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); } delete[] imagedata; From cfba362e3894944849263b1bbbff1f3aff6c46bb Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 14 Apr 2024 02:30:00 +0200 Subject: [PATCH 04/32] timing --- test/solver/TestFDTDSolver.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 6ef9c0210..f803938a1 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -3,6 +3,7 @@ #include "Types/Vector.h" #include "Field/Field.h" +#include #include "MaxwellSolvers/FDTD.h" #define JSON_HAS_RANGES 0 //Merlin compilation @@ -11,6 +12,12 @@ #include #define STB_IMAGE_WRITE_IMPLEMENTATION #include + +uint64_t nanoTime(){ + using namespace std; + using namespace chrono; + return duration_cast(high_resolution_clock::now().time_since_epoch()).count(); +} //CONFIG KOKKOS_INLINE_FUNCTION bool isNaN(float x){ #ifdef __CUDA_ARCH__ @@ -1261,7 +1268,7 @@ struct second_order_mur_boundary_conditions{ } }); Kokkos::fence(); - FA_np1.fillHalo(); + //FA_np1.fillHalo(); Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ uint32_t ig = i + lDom.first()[0]; uint32_t jg = j + lDom.first()[1]; @@ -1331,7 +1338,7 @@ struct second_order_mur_boundary_conditions{ } }); Kokkos::fence(); - FA_np1.fillHalo(); + //FA_np1.fillHalo(); Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ uint32_t ig = i + lDom.first()[0]; uint32_t jg = j + lDom.first()[1]; @@ -1824,11 +1831,8 @@ int main(int argc, char* argv[]) { //std::cout << cfg.charge << "\n"; size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); - + uint64_t starttime = nanoTime(); for(size_t i = 0;i < timesteps_required;i++){ - if(ippl::Comm->rank() == 0){ - std::cout << "Step: " << i << std::endl; - } fdtd_state.J = scalar(0.0); fdtd_state.playout.update(fdtd_state.particles); fdtd_state.scatterBunch(); @@ -1917,8 +1921,10 @@ int main(int argc, char* argv[]) { delete[] imagedata; delete[] idata_recvbuffer; delete[] imagedata_final; - } + } } + uint64_t endtime = nanoTime(); + std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; } ippl::finalize(); } \ No newline at end of file From 9da50c92f3be2e4be138dd8711f6d3a972df9585 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 14 Apr 2024 10:01:14 +0200 Subject: [PATCH 05/32] Remove gathers --- src/Particle/ParticleAttrib.hpp | 9 +++++---- test/solver/TestFDTDSolver.cpp | 18 +++++++++++------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index e53ae6402..9ee77e3e3 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -159,10 +159,11 @@ namespace ippl { constexpr unsigned Dim = Field::dim; using PositionType = typename Field::Mesh_t::value_type; - static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); - IpplTimings::startTimer(fillHaloTimer); - f.fillHalo(); - IpplTimings::stopTimer(fillHaloTimer); + //This completely klönks performance for subdivided particle timesteps + //static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); + //IpplTimings::startTimer(fillHaloTimer); + //f.fillHalo(); + //IpplTimings::stopTimer(fillHaloTimer); static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("gather"); IpplTimings::startTimer(gatherTimer); diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index f803938a1..c59c77875 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -1497,6 +1497,7 @@ namespace ippl { playout_type playout; Bunch> particles; using bunch_type = Bunch>; + config m_config; /** * @brief Construct a new FDTDFieldState object @@ -1506,7 +1507,7 @@ namespace ippl { * @param layout * @param mesch */ - FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout){ + FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){ FA_np1.initialize(mesch, layout, 1); FA_n.initialize(mesch, layout, 1); FA_nm1.initialize(mesch, layout, 1); @@ -1604,7 +1605,7 @@ namespace ippl { //} auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); auto source = this->J.getView(); - FA_nm1.fillHalo(); + //FA_nm1.fillHalo(); FA_n.fillHalo(); ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; const auto& ldom = layout_mp->getLocalNDIndex(); @@ -1641,7 +1642,7 @@ namespace ippl { evaluate_EB(); } void evaluate_EB(){ - FA_n.fillHalo();FA_nm1.fillHalo(); + FA_n.fillHalo();//FA_nm1.fillHalo(); ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; const scalar idt = scalar(1.0) / dt; auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); @@ -1679,9 +1680,11 @@ namespace ippl { auto rp1view = particles.R_np1.getView(); scalar bunch_dt = dt / 3; Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); + EB.fillHalo(); Kokkos::fence(); for(int bts = 0;bts < 3;bts++){ - gather(particles.EB_gather, EB, particles.R); + + particles.EB_gather.gather(EB, particles.R); Kokkos::fence(); Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ const ippl::Vector pgammabeta = gbview(i); @@ -1806,7 +1809,7 @@ int main(int argc, char* argv[]) { abort(); } - ippl::FDTDFieldState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/); + ippl::FDTDFieldState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); if(ippl::Comm->rank() == 0){ std::cout << "Init particles: " << std::endl; @@ -1827,6 +1830,7 @@ int main(int argc, char* argv[]) { rm1view(i) -= meanpos; }); } + fdtd_state.particles.setParticleBC(ippl::NO); //fdtd_state.scatterBunch(); //std::cout << cfg.charge << "\n"; @@ -1846,8 +1850,8 @@ int main(int argc, char* argv[]) { if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ auto ldom = layout.getLocalNDIndex(); - int img_height = 1000; - int img_width = int(1000.0 * cfg.extents[2] / cfg.extents[0]); + int img_height = 400; + int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]); float* imagedata = new float[img_width * img_height * 3]; std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); float* idata_recvbuffer = new float[img_width * img_height * 3]; From 24ff0a6186905f1d30082446dac4debd65893c17 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 14 Apr 2024 10:42:09 +0200 Subject: [PATCH 06/32] Custom output path --- test/solver/TestFDTDSolver.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index c59c77875..9ea06d754 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -96,6 +96,7 @@ struct config { scalar undulator_length; uint32_t output_rhythm; + std::string output_path; std::unordered_map experiment_options; }; template @@ -292,6 +293,13 @@ config read_config(const char *filepath){ ret.position_truncations = getVector(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters; ret.sigma_momentum = getVector(j["bunch"]["sigma-momentum"]); ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0; + ret.output_path = "../data/"; + if(j["output"].contains("path")){ + ret.output_path = j["output"]["path"]; + if(!ret.output_path.ends_with('/')){ + ret.output_path.push_back('/'); + } + } if(j.contains("experimentation")){ nlohmann::json je = j["experimentation"]; for(auto it = je.begin(); it!= je.end();it++){ @@ -1579,12 +1587,15 @@ namespace ippl { auto orig = mesh_mp->getOrigin(); auto hr = mesh_mp->getMeshSpacing(); auto dt = this->dt; + bool space_charge = m_config.space_charge; ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ Vector_t pos = rview(i); Vector_t to = rview(i); Vector_t from = rm1view(i); - //scatterToGrid(Jview,hr, orig, pos, qview(i) / volume); + if(space_charge){ + scatterToGrid(lDom, Jview,hr, orig, pos, qview(i) / volume); + } scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); }); Kokkos::fence(); @@ -1918,7 +1929,8 @@ int main(int argc, char* argv[]) { } if(rank == 0){ char output[1024] = {0}; - snprintf(output, 1023, "../data/outimage%.05d.bmp", i); + + snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i); std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); } From 35f2d01b46e6a74a4e5cde9a5ea34b7a664980bc Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 14 Apr 2024 19:05:11 +0200 Subject: [PATCH 07/32] Add useful radiation output --- test/solver/TestFDTDSolver.cpp | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 9ea06d754..f932c7da3 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -1847,6 +1847,10 @@ int main(int argc, char* argv[]) { size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); uint64_t starttime = nanoTime(); + std::ofstream rad; + if(ippl::Comm->rank() == 0){ + rad = std::ofstream("radiation.txt"); + } for(size_t i = 0;i < timesteps_required;i++){ fdtd_state.J = scalar(0.0); fdtd_state.playout.update(fdtd_state.particles); @@ -1854,13 +1858,36 @@ int main(int argc, char* argv[]) { //std::cout << fdtd_state.J.getVolumeIntegral() << "\n"; fdtd_state.fieldStep(); fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); + auto ldom = layout.getLocalNDIndex(); + auto nrg = fdtd_state.nr_global; + auto ebv = fdtd_state.EB.getView(); + double radiation = 0.0; + Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ + uint32_t ig = i + ldom.first()[0]; + uint32_t jg = j + ldom.first()[1]; + uint32_t kg = k + ldom.first()[2]; + if(kg == nrg[2] - 3){ + ref += ebv(i,j,k)[0].cross(ebv(i,j,k)[1])[2]; + } + + }, radiation); + double radiation_in_watt_on_this_rank = radiation * + unit_powerdensity_in_watt_per_square_meter * + fdtd_state.hr_m[0] * fdtd_state.hr_m[1] * unit_length_in_meters * unit_length_in_meters; + double radiation_in_watt_global = 0.0; + MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); + if(ippl::Comm->rank() == 0){ + ippl::Vector pos{0,0,0}; + frame_boost.primedToUnprimed(pos, fdtd_state.dt * i); + rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; + } //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n"; //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n"; int rank = ippl::Comm->rank(); int size = ippl::Comm->size(); if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ - auto ldom = layout.getLocalNDIndex(); + int img_height = 400; int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]); float* imagedata = new float[img_width * img_height * 3]; From 0fbb69a42a9719ba7c0483e62b7998bf2a38f9a1 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 14 Apr 2024 21:25:36 +0200 Subject: [PATCH 08/32] Correct radiation eval --- test/solver/TestFDTDSolver.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index f932c7da3..e6a4d7d5a 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -416,6 +416,9 @@ struct UniaxialLorentzframe{ ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; return ret; } + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> inverse_transform_EB(const Kokkos::pair, ippl::Vector>& primedEB)const noexcept{ + return UniaxialLorentzframe(-gammaBeta_m).transform_EB(primedEB); + } }; template BunchInitialize generate_mithra_config(const config& cfg, const UniaxialLorentzframe& /*frame_boost unused*/) { @@ -1863,17 +1866,20 @@ int main(int argc, char* argv[]) { auto ebv = fdtd_state.EB.getView(); double radiation = 0.0; Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ - uint32_t ig = i + ldom.first()[0]; - uint32_t jg = j + ldom.first()[1]; + //uint32_t ig = i + ldom.first()[0]; + //uint32_t jg = j + ldom.first()[1]; + Kokkos::pair, ippl::Vector> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]}; + ippl::Vector Elab = frame_boost.transform_EB(buncheb).first; + ippl::Vector Blab = frame_boost.transform_EB(buncheb).second; uint32_t kg = k + ldom.first()[2]; if(kg == nrg[2] - 3){ - ref += ebv(i,j,k)[0].cross(ebv(i,j,k)[1])[2]; + ref += Elab.cross(Blab)[2]; } }, radiation); double radiation_in_watt_on_this_rank = radiation * - unit_powerdensity_in_watt_per_square_meter * - fdtd_state.hr_m[0] * fdtd_state.hr_m[1] * unit_length_in_meters * unit_length_in_meters; + double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) * + fdtd_state.hr_m[0] * fdtd_state.hr_m[1]; double radiation_in_watt_global = 0.0; MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); if(ippl::Comm->rank() == 0){ From eb553ceb48c765f819ed58e3f7c21d39a2dbc280 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 14 Apr 2024 21:37:07 +0200 Subject: [PATCH 09/32] Correct now --- test/solver/TestFDTDSolver.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index e6a4d7d5a..978d1e9eb 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -1869,8 +1869,8 @@ int main(int argc, char* argv[]) { //uint32_t ig = i + ldom.first()[0]; //uint32_t jg = j + ldom.first()[1]; Kokkos::pair, ippl::Vector> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]}; - ippl::Vector Elab = frame_boost.transform_EB(buncheb).first; - ippl::Vector Blab = frame_boost.transform_EB(buncheb).second; + ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; + ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; uint32_t kg = k + ldom.first()[2]; if(kg == nrg[2] - 3){ ref += Elab.cross(Blab)[2]; From 120726d8b21f7869202dcd09a36e320141d8f2f7 Mon Sep 17 00:00:00 2001 From: Manuel Date: Tue, 16 Apr 2024 12:36:04 +0200 Subject: [PATCH 10/32] Change colorscheme and labframe rad --- test/solver/TestFDTDSolver.cpp | 273 ++++++++++++++++++++++++++++++++- 1 file changed, 272 insertions(+), 1 deletion(-) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 978d1e9eb..fe90bcf24 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -12,6 +12,267 @@ #include #define STB_IMAGE_WRITE_IMPLEMENTATION #include +constexpr float turbo_cm[256][3] = { + {0.18995,0.07176,0.23217}, + {0.19483,0.08339,0.26149}, + {0.19956,0.09498,0.29024}, + {0.20415,0.10652,0.31844}, + {0.20860,0.11802,0.34607}, + {0.21291,0.12947,0.37314}, + {0.21708,0.14087,0.39964}, + {0.22111,0.15223,0.42558}, + {0.22500,0.16354,0.45096}, + {0.22875,0.17481,0.47578}, + {0.23236,0.18603,0.50004}, + {0.23582,0.19720,0.52373}, + {0.23915,0.20833,0.54686}, + {0.24234,0.21941,0.56942}, + {0.24539,0.23044,0.59142}, + {0.24830,0.24143,0.61286}, + {0.25107,0.25237,0.63374}, + {0.25369,0.26327,0.65406}, + {0.25618,0.27412,0.67381}, + {0.25853,0.28492,0.69300}, + {0.26074,0.29568,0.71162}, + {0.26280,0.30639,0.72968}, + {0.26473,0.31706,0.74718}, + {0.26652,0.32768,0.76412}, + {0.26816,0.33825,0.78050}, + {0.26967,0.34878,0.79631}, + {0.27103,0.35926,0.81156}, + {0.27226,0.36970,0.82624}, + {0.27334,0.38008,0.84037}, + {0.27429,0.39043,0.85393}, + {0.27509,0.40072,0.86692}, + {0.27576,0.41097,0.87936}, + {0.27628,0.42118,0.89123}, + {0.27667,0.43134,0.90254}, + {0.27691,0.44145,0.91328}, + {0.27701,0.45152,0.92347}, + {0.27698,0.46153,0.93309}, + {0.27680,0.47151,0.94214}, + {0.27648,0.48144,0.95064}, + {0.27603,0.49132,0.95857}, + {0.27543,0.50115,0.96594}, + {0.27469,0.51094,0.97275}, + {0.27381,0.52069,0.97899}, + {0.27273,0.53040,0.98461}, + {0.27106,0.54015,0.98930}, + {0.26878,0.54995,0.99303}, + {0.26592,0.55979,0.99583}, + {0.26252,0.56967,0.99773}, + {0.25862,0.57958,0.99876}, + {0.25425,0.58950,0.99896}, + {0.24946,0.59943,0.99835}, + {0.24427,0.60937,0.99697}, + {0.23874,0.61931,0.99485}, + {0.23288,0.62923,0.99202}, + {0.22676,0.63913,0.98851}, + {0.22039,0.64901,0.98436}, + {0.21382,0.65886,0.97959}, + {0.20708,0.66866,0.97423}, + {0.20021,0.67842,0.96833}, + {0.19326,0.68812,0.96190}, + {0.18625,0.69775,0.95498}, + {0.17923,0.70732,0.94761}, + {0.17223,0.71680,0.93981}, + {0.16529,0.72620,0.93161}, + {0.15844,0.73551,0.92305}, + {0.15173,0.74472,0.91416}, + {0.14519,0.75381,0.90496}, + {0.13886,0.76279,0.89550}, + {0.13278,0.77165,0.88580}, + {0.12698,0.78037,0.87590}, + {0.12151,0.78896,0.86581}, + {0.11639,0.79740,0.85559}, + {0.11167,0.80569,0.84525}, + {0.10738,0.81381,0.83484}, + {0.10357,0.82177,0.82437}, + {0.10026,0.82955,0.81389}, + {0.09750,0.83714,0.80342}, + {0.09532,0.84455,0.79299}, + {0.09377,0.85175,0.78264}, + {0.09287,0.85875,0.77240}, + {0.09267,0.86554,0.76230}, + {0.09320,0.87211,0.75237}, + {0.09451,0.87844,0.74265}, + {0.09662,0.88454,0.73316}, + {0.09958,0.89040,0.72393}, + {0.10342,0.89600,0.71500}, + {0.10815,0.90142,0.70599}, + {0.11374,0.90673,0.69651}, + {0.12014,0.91193,0.68660}, + {0.12733,0.91701,0.67627}, + {0.13526,0.92197,0.66556}, + {0.14391,0.92680,0.65448}, + {0.15323,0.93151,0.64308}, + {0.16319,0.93609,0.63137}, + {0.17377,0.94053,0.61938}, + {0.18491,0.94484,0.60713}, + {0.19659,0.94901,0.59466}, + {0.20877,0.95304,0.58199}, + {0.22142,0.95692,0.56914}, + {0.23449,0.96065,0.55614}, + {0.24797,0.96423,0.54303}, + {0.26180,0.96765,0.52981}, + {0.27597,0.97092,0.51653}, + {0.29042,0.97403,0.50321}, + {0.30513,0.97697,0.48987}, + {0.32006,0.97974,0.47654}, + {0.33517,0.98234,0.46325}, + {0.35043,0.98477,0.45002}, + {0.36581,0.98702,0.43688}, + {0.38127,0.98909,0.42386}, + {0.39678,0.99098,0.41098}, + {0.41229,0.99268,0.39826}, + {0.42778,0.99419,0.38575}, + {0.44321,0.99551,0.37345}, + {0.45854,0.99663,0.36140}, + {0.47375,0.99755,0.34963}, + {0.48879,0.99828,0.33816}, + {0.50362,0.99879,0.32701}, + {0.51822,0.99910,0.31622}, + {0.53255,0.99919,0.30581}, + {0.54658,0.99907,0.29581}, + {0.56026,0.99873,0.28623}, + {0.57357,0.99817,0.27712}, + {0.58646,0.99739,0.26849}, + {0.59891,0.99638,0.26038}, + {0.61088,0.99514,0.25280}, + {0.62233,0.99366,0.24579}, + {0.63323,0.99195,0.23937}, + {0.64362,0.98999,0.23356}, + {0.65394,0.98775,0.22835}, + {0.66428,0.98524,0.22370}, + {0.67462,0.98246,0.21960}, + {0.68494,0.97941,0.21602}, + {0.69525,0.97610,0.21294}, + {0.70553,0.97255,0.21032}, + {0.71577,0.96875,0.20815}, + {0.72596,0.96470,0.20640}, + {0.73610,0.96043,0.20504}, + {0.74617,0.95593,0.20406}, + {0.75617,0.95121,0.20343}, + {0.76608,0.94627,0.20311}, + {0.77591,0.94113,0.20310}, + {0.78563,0.93579,0.20336}, + {0.79524,0.93025,0.20386}, + {0.80473,0.92452,0.20459}, + {0.81410,0.91861,0.20552}, + {0.82333,0.91253,0.20663}, + {0.83241,0.90627,0.20788}, + {0.84133,0.89986,0.20926}, + {0.85010,0.89328,0.21074}, + {0.85868,0.88655,0.21230}, + {0.86709,0.87968,0.21391}, + {0.87530,0.87267,0.21555}, + {0.88331,0.86553,0.21719}, + {0.89112,0.85826,0.21880}, + {0.89870,0.85087,0.22038}, + {0.90605,0.84337,0.22188}, + {0.91317,0.83576,0.22328}, + {0.92004,0.82806,0.22456}, + {0.92666,0.82025,0.22570}, + {0.93301,0.81236,0.22667}, + {0.93909,0.80439,0.22744}, + {0.94489,0.79634,0.22800}, + {0.95039,0.78823,0.22831}, + {0.95560,0.78005,0.22836}, + {0.96049,0.77181,0.22811}, + {0.96507,0.76352,0.22754}, + {0.96931,0.75519,0.22663}, + {0.97323,0.74682,0.22536}, + {0.97679,0.73842,0.22369}, + {0.98000,0.73000,0.22161}, + {0.98289,0.72140,0.21918}, + {0.98549,0.71250,0.21650}, + {0.98781,0.70330,0.21358}, + {0.98986,0.69382,0.21043}, + {0.99163,0.68408,0.20706}, + {0.99314,0.67408,0.20348}, + {0.99438,0.66386,0.19971}, + {0.99535,0.65341,0.19577}, + {0.99607,0.64277,0.19165}, + {0.99654,0.63193,0.18738}, + {0.99675,0.62093,0.18297}, + {0.99672,0.60977,0.17842}, + {0.99644,0.59846,0.17376}, + {0.99593,0.58703,0.16899}, + {0.99517,0.57549,0.16412}, + {0.99419,0.56386,0.15918}, + {0.99297,0.55214,0.15417}, + {0.99153,0.54036,0.14910}, + {0.98987,0.52854,0.14398}, + {0.98799,0.51667,0.13883}, + {0.98590,0.50479,0.13367}, + {0.98360,0.49291,0.12849}, + {0.98108,0.48104,0.12332}, + {0.97837,0.46920,0.11817}, + {0.97545,0.45740,0.11305}, + {0.97234,0.44565,0.10797}, + {0.96904,0.43399,0.10294}, + {0.96555,0.42241,0.09798}, + {0.96187,0.41093,0.09310}, + {0.95801,0.39958,0.08831}, + {0.95398,0.38836,0.08362}, + {0.94977,0.37729,0.07905}, + {0.94538,0.36638,0.07461}, + {0.94084,0.35566,0.07031}, + {0.93612,0.34513,0.06616}, + {0.93125,0.33482,0.06218}, + {0.92623,0.32473,0.05837}, + {0.92105,0.31489,0.05475}, + {0.91572,0.30530,0.05134}, + {0.91024,0.29599,0.04814}, + {0.90463,0.28696,0.04516}, + {0.89888,0.27824,0.04243}, + {0.89298,0.26981,0.03993}, + {0.88691,0.26152,0.03753}, + {0.88066,0.25334,0.03521}, + {0.87422,0.24526,0.03297}, + {0.86760,0.23730,0.03082}, + {0.86079,0.22945,0.02875}, + {0.85380,0.22170,0.02677}, + {0.84662,0.21407,0.02487}, + {0.83926,0.20654,0.02305}, + {0.83172,0.19912,0.02131}, + {0.82399,0.19182,0.01966}, + {0.81608,0.18462,0.01809}, + {0.80799,0.17753,0.01660}, + {0.79971,0.17055,0.01520}, + {0.79125,0.16368,0.01387}, + {0.78260,0.15693,0.01264}, + {0.77377,0.15028,0.01148}, + {0.76476,0.14374,0.01041}, + {0.75556,0.13731,0.00942}, + {0.74617,0.13098,0.00851}, + {0.73661,0.12477,0.00769}, + {0.72686,0.11867,0.00695}, + {0.71692,0.11268,0.00629}, + {0.70680,0.10680,0.00571}, + {0.69650,0.10102,0.00522}, + {0.68602,0.09536,0.00481}, + {0.67535,0.08980,0.00449}, + {0.66449,0.08436,0.00424}, + {0.65345,0.07902,0.00408}, + {0.64223,0.07380,0.00401}, + {0.63082,0.06868,0.00401}, + {0.61923,0.06367,0.00410}, + {0.60746,0.05878,0.00427}, + {0.59550,0.05399,0.00453}, + {0.58336,0.04931,0.00486}, + {0.57103,0.04474,0.00529}, + {0.55852,0.04028,0.00579}, + {0.54583,0.03593,0.00638}, + {0.53295,0.03169,0.00705}, + {0.51989,0.02756,0.00780}, + {0.50664,0.02354,0.00863}, + {0.49321,0.01963,0.00955}, + {0.47960,0.01583,0.01055} +}; + + + uint64_t nanoTime(){ using namespace std; @@ -1930,11 +2191,21 @@ int main(int argc, char* argv[]) { if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){ if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){ ippl::Vector, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); + ippl::Vector poynting = acc[0].cross(acc[1]); + Kokkos::pair, ippl::Vector> eblab = frame_boost.inverse_transform_EB( + Kokkos::make_pair, ippl::Vector>(acc[0], acc[1]) + ); + ippl::Vector poyntinglab = eblab.first.cross(eblab.second); + poynting = poyntinglab; if(poynting.norm() > 0){ //std::cout << poynting.norm() << "\n"; } - imagedata[(j * img_width + i) * 3 + 0] += std::sqrt(poynting.norm()) * 0.5f;//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f; + int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f)); + imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); } } } From ba073e60189b4f3ef830b2a0f2de930e76a60bbb Mon Sep 17 00:00:00 2001 From: Manuel Date: Wed, 17 Apr 2024 11:22:38 +0200 Subject: [PATCH 11/32] FFMPEG output --- test/solver/TestFDTDSolver.cpp | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index fe90bcf24..f6cdcdf5e 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -4,7 +4,7 @@ #include "Field/Field.h" #include - +#include // For popen #include "MaxwellSolvers/FDTD.h" #define JSON_HAS_RANGES 0 //Merlin compilation #include @@ -2022,7 +2022,23 @@ namespace ippl { }; // clang-format on } // namespace ippl +bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { + const int channels = 3; // RGB + const int stride = width * channels; + std::vector flippedData(data, data + stride * height); + + // Use stb_image_write to write the BMP image to the file descriptor + if (!stbi_write_bmp_to_func( + [](void* context, void* data, int size) { + FILE* f = reinterpret_cast(context); + fwrite(data, 1, size, f); + }, + fd, width, height, channels, flippedData.data())) { + return false; + } + return true; +} int main(int argc, char* argv[]) { using scalar = float; ippl::initialize(argc, argv); @@ -2068,7 +2084,7 @@ int main(int argc, char* argv[]) { std::array isParallel; isParallel.fill(false); isParallel[2] = true; - + // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); @@ -2112,9 +2128,16 @@ int main(int argc, char* argv[]) { size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); uint64_t starttime = nanoTime(); std::ofstream rad; + FILE* ffmpeg_file = nullptr; if(ippl::Comm->rank() == 0){ rad = std::ofstream("radiation.txt"); + const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv"; + if(cfg.output_rhythm != 0){ + ffmpeg_file = popen(ffmpegCmd, "w"); + } } + + for(size_t i = 0;i < timesteps_required;i++){ fdtd_state.J = scalar(0.0); fdtd_state.playout.update(fdtd_state.particles); @@ -2236,7 +2259,7 @@ int main(int argc, char* argv[]) { snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i); std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); - stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); + writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final); } delete[] imagedata; delete[] idata_recvbuffer; From 200ad6d030831283bca7cb464ce36b8db868b3ea Mon Sep 17 00:00:00 2001 From: Manuel Date: Fri, 19 Apr 2024 11:21:55 +0200 Subject: [PATCH 12/32] Gauss test --- config.json | 4 ++-- test/solver/TestFDTDSolver.cpp | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/config.json b/config.json index 5b5c053e4..0948650ee 100644 --- a/config.json +++ b/config.json @@ -4,7 +4,7 @@ "length-scale" : "micrometers", "time-scale" : "picoseconds", "extents": [3400.0, 3400.0, 280.0], - "resolution": [64, 64, 2800], + "resolution": [32, 32, 800], "mesh-center": [0.0, 0.0, 0.0], "total-time": 30000.0, "bunch-time-step": 1.6, @@ -19,7 +19,7 @@ "_mass": 1, "charge": 1.846e8, "mass": 1.846e8, - "number-of-particles": 500000, + "number-of-particles": 50000, "gamma": 100.41, "_gamma": 1.0, "direction (ignored)": [0.0, 0.0, 1.0], diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index f6cdcdf5e..223bf1bd2 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #define STB_IMAGE_WRITE_IMPLEMENTATION #include constexpr float turbo_cm[256][3] = { @@ -2039,6 +2040,48 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { return true; } +template +scalar test_gauss_law(uint32_t n){ + + ippl::NDIndex<3> owned(n / 2, n / 2, n); + ippl::Vector nr{n / 2, n / 2, n}; + ippl::Vector extents{1,1,1}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx = extents / nr.cast(); + ippl::Vector origin = {scalar(-0.5), scalar(-0.5), scalar(-0.5)}; + ippl::UniformCartesian mesh(owned, hx, origin); + ippl::FDTDFieldState field_state(layout, mesh, (1 << 20), config{}); + field_state.particles.Q = scalar(coulomb_in_unit_charges) / (1 << 20); + field_state.particles.mass = scalar(1.0) / (1 << 20); //Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); + + //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); + scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); + + Kokkos::parallel_for((1 << 20), KOKKOS_LAMBDA(size_t i){ + //bunch.gammaBeta[i].fill(scalar(0)); + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.01 * meter_in_unit_lengths); + pview(i)[1] = state.normal(0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.01 * meter_in_unit_lengths); + p1view(i) = pview(i); + random_pool.free_state(state); + }); + Kokkos::fence(); + field_state.scatterBunch(); + for(size_t i = 0;i < 8*n;i++){ + field_state.fieldStep(); + } +} int main(int argc, char* argv[]) { using scalar = float; ippl::initialize(argc, argv); From 3958ed82e29c88426c8b72fb213e5a46acd97bd3 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 21 Apr 2024 13:12:06 +0200 Subject: [PATCH 13/32] Revert vector and particle attrib changes, make FDTD Solver compatible --- src/Particle/ParticleAttrib.hpp | 43 ++--- src/Types/Vector.h | 310 +++++++++++--------------------- src/Types/Vector.hpp | 9 + test/solver/TestFDTDSolver.cpp | 2 +- 4 files changed, 127 insertions(+), 237 deletions(-) diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 9ee77e3e3..3b37f920a 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -121,7 +121,7 @@ namespace ippl { const vector_type& dx = mesh.getMeshSpacing(); const vector_type& origin = mesh.getOrigin(); - const vector_type invdx = vector_type(1.0) / dx; + const vector_type invdx = 1.0 / dx; const FieldLayout& layout = f.getLayout(); const NDIndex& lDom = layout.getLocalNDIndex(); @@ -132,12 +132,12 @@ namespace ippl { "ParticleAttrib::scatter", policy_type(0, *(this->localNum_mp)), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + vector_type(1.0); // Terribile; - Vector index = l.template cast(); - Vector whi = l - index.template cast(); - Vector wlo = vector_type(1.0) - whi; + vector_type l = (pp(idx) - origin) * invdx + 0.5; + Vector index = l; + Vector whi = l - index; + Vector wlo = 1.0 - whi; - Vector args = (index - lDom.first() + nghost).template cast(); + Vector args = index - lDom.first() + nghost; // scatter const value_type& val = dview_m(idx); @@ -159,11 +159,10 @@ namespace ippl { constexpr unsigned Dim = Field::dim; using PositionType = typename Field::Mesh_t::value_type; - //This completely klönks performance for subdivided particle timesteps - //static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); - //IpplTimings::startTimer(fillHaloTimer); - //f.fillHalo(); - //IpplTimings::stopTimer(fillHaloTimer); + static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); + IpplTimings::startTimer(fillHaloTimer); + f.fillHalo(); + IpplTimings::stopTimer(fillHaloTimer); static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("gather"); IpplTimings::startTimer(gatherTimer); @@ -176,7 +175,7 @@ namespace ippl { const vector_type& dx = mesh.getMeshSpacing(); const vector_type& origin = mesh.getOrigin(); - const vector_type invdx = vector_type(1.0) / dx; + const vector_type invdx = 1.0 / dx; const FieldLayout& layout = f.getLayout(); const NDIndex& lDom = layout.getLocalNDIndex(); @@ -187,24 +186,14 @@ namespace ippl { "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + vector_type(1.0); //terribile - Vector index = l.template cast(); - Vector whi = l - index.template cast(); - Vector wlo = vector_type(1.0) - whi; + vector_type l = (pp(idx) - origin) * invdx + 0.5; + Vector index = l; + Vector whi = l - index; + Vector wlo = 1.0 - whi; - Vector args = (index - lDom.first() + nghost).template cast(); + Vector args = index - lDom.first() + nghost; // gather - for(unsigned d = 0;d < Field::dim;d++){ - if(args[d] >= view.extent(d)){ - dview_m(idx) = T(0); - return; - } - else if(args[d] < 1){ - dview_m(idx) = T(0); - return; - } - } dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{}, view, wlo, whi, args); }); diff --git a/src/Types/Vector.h b/src/Types/Vector.h index 0f1d00b96..216ddf5b7 100644 --- a/src/Types/Vector.h +++ b/src/Types/Vector.h @@ -20,239 +20,131 @@ namespace ippl { * @tparam Dim vector dimension */ template - struct Vector { - using value_type = T; + class Vector : public detail::Expression, sizeof(T) * Dim> { + public: + typedef T value_type; + static constexpr unsigned dim = Dim; - constexpr static unsigned dim = Dim; - T data[Dim]; - KOKKOS_INLINE_FUNCTION constexpr Vector(const std::initializer_list& list) { - // PAssert(list.size() == Dim); - unsigned int i = 0; - for (auto& l : list) { - data[i] = l; - ++i; - } - } - Vector() = default; - constexpr KOKKOS_INLINE_FUNCTION Vector(T v) { fill(v); } - KOKKOS_INLINE_FUNCTION value_type dot(const Vector& v) const noexcept { - value_type ret = 0; - for (unsigned i = 0; i < dim; i++) { - ret += data[i] * v[i]; - } - return ret; + KOKKOS_FUNCTION + constexpr Vector() + : Vector(value_type(0)) {} + + template ::type = true> + explicit KOKKOS_FUNCTION Vector(const Args&... args); + + template + KOKKOS_FUNCTION Vector(const detail::Expression& expr); + + KOKKOS_DEFAULTED_FUNCTION + Vector(const Vector& v) = default; + + KOKKOS_FUNCTION + Vector(const T& val); + + + Vector(const std::array& a); + + Vector(const std::array, Dim>& a); + + /*! + * @param list of values + */ + KOKKOS_FUNCTION + Vector(const std::initializer_list& list); + + KOKKOS_FUNCTION + ~Vector() {} + + // Get and Set Operations + KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i); + + KOKKOS_INLINE_FUNCTION value_type operator[](unsigned int i) const; + + KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i); + + KOKKOS_INLINE_FUNCTION value_type operator()(unsigned int i) const; + + // Assignment Operators + template + KOKKOS_INLINE_FUNCTION Vector& operator=(const detail::Expression& expr); + + template + KOKKOS_INLINE_FUNCTION Vector& operator+=(const detail::Expression& expr); + + template + KOKKOS_INLINE_FUNCTION Vector& operator-=(const detail::Expression& expr); + + template + KOKKOS_INLINE_FUNCTION Vector& operator*=(const detail::Expression& expr); + + template + KOKKOS_INLINE_FUNCTION Vector& operator/=(const detail::Expression& expr); + + KOKKOS_INLINE_FUNCTION Vector& operator+=(const T& val); + + KOKKOS_INLINE_FUNCTION Vector& operator-=(const T& val); + + KOKKOS_INLINE_FUNCTION Vector& operator*=(const T& val); + + KOKKOS_INLINE_FUNCTION Vector& operator/=(const T& val); + + using iterator = T*; + using const_iterator = const T*; + KOKKOS_INLINE_FUNCTION constexpr iterator begin(); + KOKKOS_INLINE_FUNCTION constexpr iterator end(); + KOKKOS_INLINE_FUNCTION constexpr const_iterator begin() const; + KOKKOS_INLINE_FUNCTION constexpr const_iterator end() const; + + KOKKOS_INLINE_FUNCTION T dot(const Vector& rhs) const; + KOKKOS_INLINE_FUNCTION Vector cross(const Vector& rhs) const; + KOKKOS_INLINE_FUNCTION void fill(const T& v){ + for(unsigned k = 0;k < Dim;k++)(*this)[k] = v; } - template - KOKKOS_INLINE_FUNCTION Vector tail() const noexcept { + template + KOKKOS_INLINE_FUNCTION Vector tail()const noexcept{ Vector ret; static_assert(N <= Dim, "N must be smaller than Dim"); constexpr unsigned diff = Dim - N; - for (unsigned i = 0; i < N; i++) { + for(unsigned i = 0;i < N;i++){ ret[i] = (*this)[i + diff]; } return ret; } - template - KOKKOS_INLINE_FUNCTION Vector head() const noexcept { + template + KOKKOS_INLINE_FUNCTION Vector head()const noexcept{ Vector ret; static_assert(N <= Dim, "N must be smaller than Dim"); - for (unsigned i = 0; i < N; i++) { + for(unsigned i = 0;i < N;i++){ ret[i] = (*this)[i]; } return ret; } - KOKKOS_INLINE_FUNCTION value_type squaredNorm() const noexcept { return dot(*this); } - KOKKOS_INLINE_FUNCTION value_type norm() { -#ifndef __CUDA_ARCH__ - using std::sqrt; -#endif - return Kokkos::sqrt(squaredNorm()); - } - KOKKOS_INLINE_FUNCTION Vector normalized() const noexcept { return *this / norm(); } - KOKKOS_INLINE_FUNCTION value_type sum() const noexcept { - value_type ret = 0; - for (unsigned i = 0; i < dim; i++) { - ret += data[i]; - } + template + KOKKOS_INLINE_FUNCTION Vector cast()const{ + Vector ret; + for(unsigned k = 0;k < Dim;k++)ret[k] = OtherType((*this)[k]); return ret; } - KOKKOS_INLINE_FUNCTION value_type average() const noexcept { - value_type ret = 0; - for (unsigned i = 0; i < dim; i++) { - ret += data[i]; - } - return ret / dim; + KOKKOS_INLINE_FUNCTION T squaredNorm()const{ + return this->dot(*this); } - KOKKOS_INLINE_FUNCTION Vector cross(const Vector& v) const noexcept - requires(Dim == 3) - { - Vector ret(0); - ret[0] = data[1] * v[2] - data[2] * v[1]; - ret[1] = data[2] * v[0] - data[0] * v[2]; - ret[2] = data[0] * v[1] - data[1] * v[0]; - return ret; + KOKKOS_INLINE_FUNCTION T norm()const{ + using Kokkos::sqrt; + return sqrt(squaredNorm()); } - KOKKOS_INLINE_FUNCTION bool operator==(const Vector& o) const noexcept { - for (unsigned i = 0; i < dim; i++) { - if (data[i] != o[i]) - return false; - } - return true; - } - KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i) noexcept { - assert(i < dim); - return data[i]; - } - KOKKOS_INLINE_FUNCTION T* begin() noexcept { return data; } - KOKKOS_INLINE_FUNCTION T* end() noexcept { return data + dim; } - KOKKOS_INLINE_FUNCTION const T* begin() const noexcept { return data; } - KOKKOS_INLINE_FUNCTION const T* end() const noexcept { return data + dim; } - KOKKOS_INLINE_FUNCTION constexpr void fill(value_type x) { - for (unsigned i = 0; i < dim; i++) { - data[i] = value_type(x); - } - } - KOKKOS_INLINE_FUNCTION const value_type& operator[](unsigned int i) const noexcept { - assert(i < dim); - return data[i]; - } - - KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i) noexcept { - assert(i < dim); - return data[i]; - } - - KOKKOS_INLINE_FUNCTION const value_type& operator()(unsigned int i) const noexcept { - assert(i < dim); - return data[i]; - } - KOKKOS_INLINE_FUNCTION Vector operator-() const noexcept { - Vector ret; - for (unsigned i = 0; i < dim; i++) { - ret[i] = -(*this)[i]; - } - return ret; - } - KOKKOS_INLINE_FUNCTION Vector decompose(Vector* integral) { -#ifndef __CUDA_ARCH__ - using std::modf; -#endif - Vector ret; - for (unsigned i = 0; i < dim; i++) { - if constexpr (std::is_same_v) { - float tmp; - ret[i] = modff((*this)[i], &tmp); - (*integral)[i] = (int)tmp; - } else if constexpr (std::is_same_v) { - double tmp; - ret[i] = modf((*this)[i], &tmp); - (*integral)[i] = (int)tmp; - } - } - return ret; - } - template - constexpr KOKKOS_INLINE_FUNCTION Vector cast() const noexcept { - Vector ret; - for (unsigned i = 0; i < dim; i++) { - ret.data[i] = (O)(data[i]); - } - return ret; - } - KOKKOS_INLINE_FUNCTION Vector operator*(const value_type& o) const noexcept { - Vector ret; - for (unsigned i = 0; i < dim; i++) { - ret[i] = (*this)[i] * o; - } - return ret; - } -#define defop_kf(OP) \ - constexpr KOKKOS_INLINE_FUNCTION Vector operator OP(const Vector& o) const noexcept { \ - Vector ret; \ - for (unsigned i = 0; i < dim; i++) { \ - ret.data[i] = data[i] OP o.data[i]; \ - } \ - return ret; \ - } - defop_kf(+) - defop_kf(-) - defop_kf(*) - defop_kf(/) -#define def_aop_kf(OP) \ - KOKKOS_INLINE_FUNCTION Vector& operator OP(const Vector& o) noexcept { \ - for (unsigned i = 0; i < dim; i++) { \ - (*this)[i] OP o[i]; \ - } \ - return *this; \ - } - def_aop_kf(+=) - def_aop_kf(-=) - def_aop_kf(*=) - def_aop_kf(/=) - template - friend stream_t& operator<<(stream_t& str, const Vector& v) { - // tr << "{"; - for (unsigned i = 0; i < Dim; i++) { - str << v[i]; - if (i < Dim - 1) - str << ", "; - } - // str << "}"; - return str; - } - // defop_kf(*) - // defop_kf(/) + //Needs to be public to be a standard-layout type + //private: + T data_m[Dim]; }; - - //template - //KOKKOS_INLINE_FUNCTION Vector min(const Vector& a, const Vector& b); - //template - //KOKKOS_INLINE_FUNCTION Vector max(const Vector& a, const Vector& b); + template - KOKKOS_INLINE_FUNCTION Vector min(const Vector& a, const Vector& b){ - using Kokkos::min; - Vector ret; - for(unsigned d = 0; d < Dim;d++){ - ret[d] = min(a[d], b[d]); - } - return ret; - } + KOKKOS_INLINE_FUNCTION Vector min(const Vector& a, const Vector& b); template - KOKKOS_INLINE_FUNCTION Vector max(const Vector& a, const Vector& b){ - using Kokkos::max; - Vector ret; - for(unsigned d = 0; d < Dim;d++){ - ret[d] = max(a[d], b[d]); - } - return ret; - } - template - KOKKOS_INLINE_FUNCTION ippl::Vector operator *(const O &o, const ippl::Vector& v) noexcept { - ippl::Vector ret; - for (unsigned i = 0; i < N; i++) { - ret[i] = v[i] * o; - } - return ret; - } - template - KOKKOS_INLINE_FUNCTION Vector cross(const ippl::Vector& a, const ippl::Vector& b) { - ippl::Vector ret{0.0, 0.0, 0.0}; - ret[0] = a[1] * b[2] - a[2] * b[1]; - ret[1] = a[2] * b[0] - a[0] * b[2]; - ret[2] = a[0] * b[1] - a[1] * b[0]; - return ret; - } - template - KOKKOS_INLINE_FUNCTION T dot(const ippl::Vector& a, const ippl::Vector& b) { - T ret = 0; - for(unsigned k = 0;k < dim;k++){ - ret += a[k] * b[k]; - } - return ret; - } + KOKKOS_INLINE_FUNCTION Vector max(const Vector& a, const Vector& b); } // namespace ippl -//#include "Vector.hpp" +#include "Vector.hpp" #endif diff --git a/src/Types/Vector.hpp b/src/Types/Vector.hpp index aa9a6168b..8723ed431 100644 --- a/src/Types/Vector.hpp +++ b/src/Types/Vector.hpp @@ -187,6 +187,15 @@ namespace ippl { } return res; } + template + KOKKOS_INLINE_FUNCTION Vector Vector::cross(const Vector& rhs) const { + + Vector ret; + ret[0] = data_m[1] * rhs[2] - data_m[2] * rhs[1]; + ret[1] = data_m[2] * rhs[0] - data_m[0] * rhs[2]; + ret[2] = data_m[0] * rhs[1] - data_m[1] * rhs[0]; + return ret; + } template inline std::ostream& operator<<(std::ostream& out, const Vector& v) { diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 223bf1bd2..58faf4632 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -1434,7 +1434,7 @@ struct second_order_abc_corner{ constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1); constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1); using ippl::apply; - constexpr ippl::Vector offsets[8] = { + const ippl::Vector offsets[8] = { ippl::Vector{0,0,0}, ippl::Vector{xoff,0,0}, ippl::Vector{0,yoff,0}, From b45bd97ac2e4610a55a3bfd519ec5106222b531b Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 22 Apr 2024 12:35:32 +0200 Subject: [PATCH 14/32] Gauss and ampere --- CMakeLists.txt | 2 +- test/solver/TestFDTDSolver.cpp | 160 +++++++++++++++++++++++++++++---- 2 files changed, 142 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 058bfab5f..b2a799e6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ file( ) file( DOWNLOAD - https://raw.githubusercontent.com/nothings/stb/master/stb_image_write.h + https://raw.githubusercontent.com/manuel5975p/stb/master/stb_image_write.h "${CMAKE_BINARY_DIR}/downloaded_headers/stb_image_write.hpp" ) include_directories("${CMAKE_BINARY_DIR}/downloaded_headers/") diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 58faf4632..433f16ec1 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -1121,6 +1121,7 @@ template KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); ipos -= ldom.first(); + //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; if( ipos[0] < 0 ||ipos[1] < 0 @@ -1751,8 +1752,11 @@ namespace ippl { bool periodic_bc; - //Fields using VField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using EBField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using view_type = VField_t::view_type; + using ev_view_type = EBField_t::view_type; + //Fields ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_np1; ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_n; ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_nm1; @@ -1800,8 +1804,8 @@ namespace ippl { uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1), uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1) }; - std::cout << "NR_M_g: " << nr_global << "\n"; - std::cout << "NR_M_l: " << nr_local << "\n"; + //std::cout << "NR_M_g: " << nr_global << "\n"; + //std::cout << "NR_M_l: " << nr_local << "\n"; dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2])); particles.create(nparticles); setNoBoundaryConditions(); @@ -1859,7 +1863,7 @@ namespace ippl { Vector_t to = rview(i); Vector_t from = rm1view(i); if(space_charge){ - scatterToGrid(lDom, Jview,hr, orig, pos, qview(i) / volume); + scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); } scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); }); @@ -2040,12 +2044,39 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { return true; } +template +KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector& pos, const ippl::Vector& origin, const ippl::Vector& hr, const ippl::NDIndex<3>& lDom){ + using vector_type = ippl::Vector; + + vector_type l; + //vector_type origin = v.get_mesh().getOrigin(); + //auto lDom = v.getLayout().getLocalNDIndex(); + //vector_type hr = v.get_mesh().getMeshSpacing(); + for(unsigned k = 0;k < 3;k++){ + l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong + } + + ippl::Vector index{int(l[0]), int(l[1]), int(l[2])}; + ippl::Vector whi = l - index; + ippl::Vector wlo(1.0); + wlo -= whi; + //TODO: nghost + ippl::Vector args = index - lDom.first() + 1; + for(unsigned k = 0;k < 3;k++){ + if(args[k] >= v.extent(k) || args[k] == 0){ + return typename View::value_type(0); + } + } + //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n"; + return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/; + +} template scalar test_gauss_law(uint32_t n){ - ippl::NDIndex<3> owned(n / 2, n / 2, n); - ippl::Vector nr{n / 2, n / 2, n}; - ippl::Vector extents{1,1,1}; + ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::Vector extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths}; std::array isParallel; isParallel.fill(false); isParallel[2] = true; @@ -2055,38 +2086,127 @@ scalar test_gauss_law(uint32_t n){ //[-1, 1] box ippl::Vector hx = extents / nr.cast(); - ippl::Vector origin = {scalar(-0.5), scalar(-0.5), scalar(-0.5)}; + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; ippl::UniformCartesian mesh(owned, hx, origin); - ippl::FDTDFieldState field_state(layout, mesh, (1 << 20), config{}); - field_state.particles.Q = scalar(coulomb_in_unit_charges) / (1 << 20); - field_state.particles.mass = scalar(1.0) / (1 << 20); //Irrelefant + + uint32_t pcount = 1 << 20; + ippl::FDTDFieldState field_state(layout, mesh, pcount, config{.space_charge = true}); + field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant auto pview = field_state.particles.R.getView(); auto p1view = field_state.particles.R_nm1.getView(); //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); - scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); + //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); + + Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ + //bunch.gammaBeta[i].fill(scalar(0)); + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + p1view(i) = pview(i); + random_pool.free_state(state); + }); + Kokkos::fence(); + field_state.J = scalar(0.0); + field_state.scatterBunch(); + for(size_t i = 0;i < 8*n;i++){ + field_state.fieldStep(); + } + field_state.evaluate_EB(); + Kokkos::fence(); + auto lDom = field_state.EB.getLayout().getLocalNDIndex(); + + std::ofstream line("gauss_line.txt"); + typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); + for(unsigned i = 1;i < nr[2];i++){ + vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; + pos[2] += hx[2] * scalar(i); + ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); + //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; + } + return 0.0f; +} +template +scalar test_amperes_law(uint32_t n){ + + ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::Vector extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx; + for(unsigned d = 0;d < 3;d++){ + hx[d] = extents[d] / (scalar)nr[d]; + } + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; + ippl::UniformCartesian mesh(owned, hx, origin); + + uint32_t pcount = 1 << 20; + ippl::FDTDFieldState field_state(layout, mesh, pcount, config{.space_charge = true}); + field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); + constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + scalar timestep = field_state.dt; + //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); + //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); - Kokkos::parallel_for((1 << 20), KOKKOS_LAMBDA(size_t i){ + Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ //bunch.gammaBeta[i].fill(scalar(0)); auto state = random_pool.get_state(); - pview(i)[0] = state.normal(0.01 * meter_in_unit_lengths); - pview(i)[1] = state.normal(0.01 * meter_in_unit_lengths); - pview(i)[2] = state.normal(0.01 * meter_in_unit_lengths); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1); p1view(i) = pview(i); + p1view(i)[1] -= vy * timestep; random_pool.free_state(state); }); Kokkos::fence(); + field_state.J = scalar(0.0); field_state.scatterBunch(); for(size_t i = 0;i < 8*n;i++){ field_state.fieldStep(); } + field_state.evaluate_EB(); + Kokkos::fence(); + auto lDom = field_state.EB.getLayout().getLocalNDIndex(); + + std::ofstream line("ampere_line.txt"); + + typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); + for(unsigned i = 1;i < nr[2];i++){ + vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; + pos[2] += hx[2] * scalar(i); + ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); + //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n"; + } + return 0.0f; } int main(int argc, char* argv[]) { - using scalar = float; + using scalar = double; ippl::initialize(argc, argv); { - + + test_gauss_law(64); + test_amperes_law(64); + goto exit; config cfg = read_config("../config.json"); const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); cfg.extents[2] *= frame_gamma; @@ -2302,7 +2422,8 @@ int main(int argc, char* argv[]) { snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i); std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); - writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final); + if(ffmpeg_file != nullptr) + writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final); } delete[] imagedata; delete[] idata_recvbuffer; @@ -2312,5 +2433,6 @@ int main(int argc, char* argv[]) { uint64_t endtime = nanoTime(); std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; } + exit: ippl::finalize(); } \ No newline at end of file From d8f7568a2266202d4a0e54d49c25a939505310d8 Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 22 Apr 2024 12:51:55 +0200 Subject: [PATCH 15/32] Add plotscripts --- plotscripts/ampereplot.gp | 13 +++++++++++++ plotscripts/gaussplot.gp | 13 +++++++++++++ 2 files changed, 26 insertions(+) create mode 100755 plotscripts/ampereplot.gp create mode 100755 plotscripts/gaussplot.gp diff --git a/plotscripts/ampereplot.gp b/plotscripts/ampereplot.gp new file mode 100755 index 000000000..495a05cbd --- /dev/null +++ b/plotscripts/ampereplot.gp @@ -0,0 +1,13 @@ +#! /usr/bin/gnuplot +set terminal svg size 800,600 font "CMU Serif,22" +set logscale x 2 +set logscale y 2 +set key box +set output "ampere.svg" +set ylabel "Magnetic Flux density[T]" +set xlabel "Distance to wire [m]" +#set title "Ampere's Law" +set grid lw 2 +set ytics format "2^{%L}" +set xtics format "2^{%L}" +plot 'ampere_line.txt' u (abs($1)):(abs($2)) w points title "Measurements", 1.256e-6/(2*3.14159)*(1/x) w lines lw 2 title "Radial falloff" diff --git a/plotscripts/gaussplot.gp b/plotscripts/gaussplot.gp new file mode 100755 index 000000000..879584114 --- /dev/null +++ b/plotscripts/gaussplot.gp @@ -0,0 +1,13 @@ +#! /usr/bin/gnuplot +set terminal svg size 900,700 font "CMU Serif, 25" +set logscale x 2 +set logscale y 2 +set key box +set output "gauss.svg" +set ylabel "Electric Field [V/m]" +set xlabel "Distance to charge [m]" +#set title "Gauss's Law" +set grid lw 2 +set ytics format "2^{%L}" +set xtics format "2^{%L}" +plot 'gauss_line.txt' u (abs($1)):(abs($2)) w points title "Measurements" pt 2, (1 / (4 * 3.1416 * 8.854e-12 * x * x)) w lines lw 3 title "Quadratic falloff" From 46ff0d9eaf1666917b6464a25e27509539907b7d Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 29 Apr 2024 21:37:51 +0200 Subject: [PATCH 16/32] Some things --- alpine/CMakeLists.txt | 3 + alpine/FreeElectronLaser.cpp | 2442 +++++++++++++++++++++++++++++++ src/CMakeLists.txt | 2 +- src/MaxwellSolvers/FDTD.h | 340 ++++- src/MaxwellSolvers/Maxwell.h | 21 +- test/solver/TestFDTDSolver.cpp | 2507 ++------------------------------ 6 files changed, 2894 insertions(+), 2421 deletions(-) create mode 100644 alpine/FreeElectronLaser.cpp diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt index c045305a8..e2711a472 100644 --- a/alpine/CMakeLists.txt +++ b/alpine/CMakeLists.txt @@ -21,6 +21,9 @@ target_link_libraries (PenningTrap ${IPPL_LIBS}) add_executable (LandauDamping LandauDamping.cpp) target_link_libraries (LandauDamping ${IPPL_LIBS}) +add_executable (FreeElectronLaser FreeElectronLaser.cpp) +target_link_libraries (FreeElectronLaser ${IPPL_LIBS}) + add_executable (BumponTailInstability BumponTailInstability.cpp) target_link_libraries (BumponTailInstability ${IPPL_LIBS}) diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp new file mode 100644 index 000000000..d15dd6e76 --- /dev/null +++ b/alpine/FreeElectronLaser.cpp @@ -0,0 +1,2442 @@ +#include "Ippl.h" + +#include "Types/Vector.h" + +#include "Field/Field.h" +#include "MaxwellSolvers/FDTD.h" +#include +#include // For popen +#define JSON_HAS_RANGES 0 //Merlin compilation +#include +#include +#include +#include +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include + + +constexpr float turbo_cm[256][3] = { + {0.18995,0.07176,0.23217}, + {0.19483,0.08339,0.26149}, + {0.19956,0.09498,0.29024}, + {0.20415,0.10652,0.31844}, + {0.20860,0.11802,0.34607}, + {0.21291,0.12947,0.37314}, + {0.21708,0.14087,0.39964}, + {0.22111,0.15223,0.42558}, + {0.22500,0.16354,0.45096}, + {0.22875,0.17481,0.47578}, + {0.23236,0.18603,0.50004}, + {0.23582,0.19720,0.52373}, + {0.23915,0.20833,0.54686}, + {0.24234,0.21941,0.56942}, + {0.24539,0.23044,0.59142}, + {0.24830,0.24143,0.61286}, + {0.25107,0.25237,0.63374}, + {0.25369,0.26327,0.65406}, + {0.25618,0.27412,0.67381}, + {0.25853,0.28492,0.69300}, + {0.26074,0.29568,0.71162}, + {0.26280,0.30639,0.72968}, + {0.26473,0.31706,0.74718}, + {0.26652,0.32768,0.76412}, + {0.26816,0.33825,0.78050}, + {0.26967,0.34878,0.79631}, + {0.27103,0.35926,0.81156}, + {0.27226,0.36970,0.82624}, + {0.27334,0.38008,0.84037}, + {0.27429,0.39043,0.85393}, + {0.27509,0.40072,0.86692}, + {0.27576,0.41097,0.87936}, + {0.27628,0.42118,0.89123}, + {0.27667,0.43134,0.90254}, + {0.27691,0.44145,0.91328}, + {0.27701,0.45152,0.92347}, + {0.27698,0.46153,0.93309}, + {0.27680,0.47151,0.94214}, + {0.27648,0.48144,0.95064}, + {0.27603,0.49132,0.95857}, + {0.27543,0.50115,0.96594}, + {0.27469,0.51094,0.97275}, + {0.27381,0.52069,0.97899}, + {0.27273,0.53040,0.98461}, + {0.27106,0.54015,0.98930}, + {0.26878,0.54995,0.99303}, + {0.26592,0.55979,0.99583}, + {0.26252,0.56967,0.99773}, + {0.25862,0.57958,0.99876}, + {0.25425,0.58950,0.99896}, + {0.24946,0.59943,0.99835}, + {0.24427,0.60937,0.99697}, + {0.23874,0.61931,0.99485}, + {0.23288,0.62923,0.99202}, + {0.22676,0.63913,0.98851}, + {0.22039,0.64901,0.98436}, + {0.21382,0.65886,0.97959}, + {0.20708,0.66866,0.97423}, + {0.20021,0.67842,0.96833}, + {0.19326,0.68812,0.96190}, + {0.18625,0.69775,0.95498}, + {0.17923,0.70732,0.94761}, + {0.17223,0.71680,0.93981}, + {0.16529,0.72620,0.93161}, + {0.15844,0.73551,0.92305}, + {0.15173,0.74472,0.91416}, + {0.14519,0.75381,0.90496}, + {0.13886,0.76279,0.89550}, + {0.13278,0.77165,0.88580}, + {0.12698,0.78037,0.87590}, + {0.12151,0.78896,0.86581}, + {0.11639,0.79740,0.85559}, + {0.11167,0.80569,0.84525}, + {0.10738,0.81381,0.83484}, + {0.10357,0.82177,0.82437}, + {0.10026,0.82955,0.81389}, + {0.09750,0.83714,0.80342}, + {0.09532,0.84455,0.79299}, + {0.09377,0.85175,0.78264}, + {0.09287,0.85875,0.77240}, + {0.09267,0.86554,0.76230}, + {0.09320,0.87211,0.75237}, + {0.09451,0.87844,0.74265}, + {0.09662,0.88454,0.73316}, + {0.09958,0.89040,0.72393}, + {0.10342,0.89600,0.71500}, + {0.10815,0.90142,0.70599}, + {0.11374,0.90673,0.69651}, + {0.12014,0.91193,0.68660}, + {0.12733,0.91701,0.67627}, + {0.13526,0.92197,0.66556}, + {0.14391,0.92680,0.65448}, + {0.15323,0.93151,0.64308}, + {0.16319,0.93609,0.63137}, + {0.17377,0.94053,0.61938}, + {0.18491,0.94484,0.60713}, + {0.19659,0.94901,0.59466}, + {0.20877,0.95304,0.58199}, + {0.22142,0.95692,0.56914}, + {0.23449,0.96065,0.55614}, + {0.24797,0.96423,0.54303}, + {0.26180,0.96765,0.52981}, + {0.27597,0.97092,0.51653}, + {0.29042,0.97403,0.50321}, + {0.30513,0.97697,0.48987}, + {0.32006,0.97974,0.47654}, + {0.33517,0.98234,0.46325}, + {0.35043,0.98477,0.45002}, + {0.36581,0.98702,0.43688}, + {0.38127,0.98909,0.42386}, + {0.39678,0.99098,0.41098}, + {0.41229,0.99268,0.39826}, + {0.42778,0.99419,0.38575}, + {0.44321,0.99551,0.37345}, + {0.45854,0.99663,0.36140}, + {0.47375,0.99755,0.34963}, + {0.48879,0.99828,0.33816}, + {0.50362,0.99879,0.32701}, + {0.51822,0.99910,0.31622}, + {0.53255,0.99919,0.30581}, + {0.54658,0.99907,0.29581}, + {0.56026,0.99873,0.28623}, + {0.57357,0.99817,0.27712}, + {0.58646,0.99739,0.26849}, + {0.59891,0.99638,0.26038}, + {0.61088,0.99514,0.25280}, + {0.62233,0.99366,0.24579}, + {0.63323,0.99195,0.23937}, + {0.64362,0.98999,0.23356}, + {0.65394,0.98775,0.22835}, + {0.66428,0.98524,0.22370}, + {0.67462,0.98246,0.21960}, + {0.68494,0.97941,0.21602}, + {0.69525,0.97610,0.21294}, + {0.70553,0.97255,0.21032}, + {0.71577,0.96875,0.20815}, + {0.72596,0.96470,0.20640}, + {0.73610,0.96043,0.20504}, + {0.74617,0.95593,0.20406}, + {0.75617,0.95121,0.20343}, + {0.76608,0.94627,0.20311}, + {0.77591,0.94113,0.20310}, + {0.78563,0.93579,0.20336}, + {0.79524,0.93025,0.20386}, + {0.80473,0.92452,0.20459}, + {0.81410,0.91861,0.20552}, + {0.82333,0.91253,0.20663}, + {0.83241,0.90627,0.20788}, + {0.84133,0.89986,0.20926}, + {0.85010,0.89328,0.21074}, + {0.85868,0.88655,0.21230}, + {0.86709,0.87968,0.21391}, + {0.87530,0.87267,0.21555}, + {0.88331,0.86553,0.21719}, + {0.89112,0.85826,0.21880}, + {0.89870,0.85087,0.22038}, + {0.90605,0.84337,0.22188}, + {0.91317,0.83576,0.22328}, + {0.92004,0.82806,0.22456}, + {0.92666,0.82025,0.22570}, + {0.93301,0.81236,0.22667}, + {0.93909,0.80439,0.22744}, + {0.94489,0.79634,0.22800}, + {0.95039,0.78823,0.22831}, + {0.95560,0.78005,0.22836}, + {0.96049,0.77181,0.22811}, + {0.96507,0.76352,0.22754}, + {0.96931,0.75519,0.22663}, + {0.97323,0.74682,0.22536}, + {0.97679,0.73842,0.22369}, + {0.98000,0.73000,0.22161}, + {0.98289,0.72140,0.21918}, + {0.98549,0.71250,0.21650}, + {0.98781,0.70330,0.21358}, + {0.98986,0.69382,0.21043}, + {0.99163,0.68408,0.20706}, + {0.99314,0.67408,0.20348}, + {0.99438,0.66386,0.19971}, + {0.99535,0.65341,0.19577}, + {0.99607,0.64277,0.19165}, + {0.99654,0.63193,0.18738}, + {0.99675,0.62093,0.18297}, + {0.99672,0.60977,0.17842}, + {0.99644,0.59846,0.17376}, + {0.99593,0.58703,0.16899}, + {0.99517,0.57549,0.16412}, + {0.99419,0.56386,0.15918}, + {0.99297,0.55214,0.15417}, + {0.99153,0.54036,0.14910}, + {0.98987,0.52854,0.14398}, + {0.98799,0.51667,0.13883}, + {0.98590,0.50479,0.13367}, + {0.98360,0.49291,0.12849}, + {0.98108,0.48104,0.12332}, + {0.97837,0.46920,0.11817}, + {0.97545,0.45740,0.11305}, + {0.97234,0.44565,0.10797}, + {0.96904,0.43399,0.10294}, + {0.96555,0.42241,0.09798}, + {0.96187,0.41093,0.09310}, + {0.95801,0.39958,0.08831}, + {0.95398,0.38836,0.08362}, + {0.94977,0.37729,0.07905}, + {0.94538,0.36638,0.07461}, + {0.94084,0.35566,0.07031}, + {0.93612,0.34513,0.06616}, + {0.93125,0.33482,0.06218}, + {0.92623,0.32473,0.05837}, + {0.92105,0.31489,0.05475}, + {0.91572,0.30530,0.05134}, + {0.91024,0.29599,0.04814}, + {0.90463,0.28696,0.04516}, + {0.89888,0.27824,0.04243}, + {0.89298,0.26981,0.03993}, + {0.88691,0.26152,0.03753}, + {0.88066,0.25334,0.03521}, + {0.87422,0.24526,0.03297}, + {0.86760,0.23730,0.03082}, + {0.86079,0.22945,0.02875}, + {0.85380,0.22170,0.02677}, + {0.84662,0.21407,0.02487}, + {0.83926,0.20654,0.02305}, + {0.83172,0.19912,0.02131}, + {0.82399,0.19182,0.01966}, + {0.81608,0.18462,0.01809}, + {0.80799,0.17753,0.01660}, + {0.79971,0.17055,0.01520}, + {0.79125,0.16368,0.01387}, + {0.78260,0.15693,0.01264}, + {0.77377,0.15028,0.01148}, + {0.76476,0.14374,0.01041}, + {0.75556,0.13731,0.00942}, + {0.74617,0.13098,0.00851}, + {0.73661,0.12477,0.00769}, + {0.72686,0.11867,0.00695}, + {0.71692,0.11268,0.00629}, + {0.70680,0.10680,0.00571}, + {0.69650,0.10102,0.00522}, + {0.68602,0.09536,0.00481}, + {0.67535,0.08980,0.00449}, + {0.66449,0.08436,0.00424}, + {0.65345,0.07902,0.00408}, + {0.64223,0.07380,0.00401}, + {0.63082,0.06868,0.00401}, + {0.61923,0.06367,0.00410}, + {0.60746,0.05878,0.00427}, + {0.59550,0.05399,0.00453}, + {0.58336,0.04931,0.00486}, + {0.57103,0.04474,0.00529}, + {0.55852,0.04028,0.00579}, + {0.54583,0.03593,0.00638}, + {0.53295,0.03169,0.00705}, + {0.51989,0.02756,0.00780}, + {0.50664,0.02354,0.00863}, + {0.49321,0.01963,0.00955}, + {0.47960,0.01583,0.01055} +}; + + + + +uint64_t nanoTime(){ + using namespace std; + using namespace chrono; + return duration_cast(high_resolution_clock::now().time_since_epoch()).count(); +} +//CONFIG +KOKKOS_INLINE_FUNCTION bool isNaN(float x){ + #ifdef __CUDA_ARCH__ + return isnan(x); + #else + return std::isnan(x); + #endif +} +KOKKOS_INLINE_FUNCTION bool isINF(float x){ + #ifdef __CUDA_ARCH__ + return isinf(x); + #else + return std::isinf(x); + #endif +} +KOKKOS_INLINE_FUNCTION bool isNaN(double x){ + #ifdef __CUDA_ARCH__ + return isnan(x); + #else + return std::isnan(x); + #endif +} +KOKKOS_INLINE_FUNCTION bool isINF(double x){ + #ifdef __CUDA_ARCH__ + return isinf(x); + #else + return std::isinf(x); + #endif +} +#define assert_isreal(X) assert(!isNaN(X) && !isINF(X)) +template +KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector from, ippl::Vector to, Ts... args){ + if constexpr(sizeof...(Ts) == Dim){ + c(args...); + } + else{ + for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){ + serial_for(c, from, to, args..., i); + } + } +} + + + + + +struct config { + using scalar = double; + + //using length_unit = funits::length; + //using duration_unit = funits::time; + // GRID PARAMETERS + ippl::Vector resolution; + + ippl::Vector extents; + scalar total_time; + scalar timestep_ratio; + + scalar length_scale_in_jobfile, temporal_scale_in_jobfile; + + // All in unit_charge, or unit_mass + scalar charge, mass; + + uint64_t num_particles; + bool space_charge; + + // BUNCH PARAMETERS + ippl::Vector mean_position; + ippl::Vector sigma_position; + ippl::Vector position_truncations; + ippl::Vector sigma_momentum; + scalar bunch_gamma; + + scalar undulator_K; + scalar undulator_period; + scalar undulator_length; + + uint32_t output_rhythm; + std::string output_path; + std::unordered_map experiment_options; +}; +template +ippl::Vector getVector(const nlohmann::json& j){ + if(j.is_array()){ + assert(j.size() == Dim); + ippl::Vector ret; + for(unsigned i = 0;i < Dim;i++) + ret[i] = (scalar)j[i]; + return ret; + } + else{ + std::cerr << "Warning: Obtaining Vector from scalar json\n"; + ippl::Vector ret; + ret.fill((scalar)j); + return ret; + } +} +template +struct DefaultedStringLiteral { + constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) { + std::copy_n(str, N, key); + } + + T value; + char key[N]; +}; +template +struct StringLiteral { + constexpr StringLiteral(const char (&str)[N]) { + std::copy_n(str, N, value); + } + + char value[N]; + constexpr DefaultedStringLiteral operator>>(int t)const noexcept{ + return DefaultedStringLiteral(value, t); + } + constexpr size_t size()const noexcept{return N - 1;} +}; +template +constexpr size_t chash(){ + size_t hash = 5381; + int c; + + for(size_t i = 0;i < lit.size();i++){ + c = lit.value[i]; + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +size_t chash(const char* val) { + size_t hash = 5381; + int c; + + while ((c = *val++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +size_t chash(const std::string& _val) { + size_t hash = 5381; + const char* val = _val.c_str(); + int c; + + while ((c = *val++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +std::string lowercase_singular(std::string str) { + // Convert string to lowercase + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + + // Check if the string ends with "s" and remove it if it does + if (!str.empty() && str.back() == 's') { + str.pop_back(); + } + + return str; +} +double get_time_multiplier(const nlohmann::json& j){ + std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]); + double time_factor = 1.0; + switch (chash(length_scale_string)) { + case chash<"planck-time">(): + case chash<"plancktime">(): + case chash<"pt">(): + case chash<"natural">(): + time_factor = unit_time_in_seconds; + break; + case chash<"picosecond">(): + time_factor = 1e-12; + break; + case chash<"nanosecond">(): + time_factor = 1e-9; + break; + case chash<"microsecond">(): + time_factor = 1e-6; + break; + case chash<"millisecond">(): + time_factor = 1e-3; + break; + case chash<"second">(): + time_factor = 1.0; + break; + default: + std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n"; + break; + } + return time_factor; +} +double get_length_multiplier(const nlohmann::json& options){ + std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]); + double length_factor = 1.0; + switch (chash(length_scale_string)) { + case chash<"planck-length">(): + case chash<"plancklength">(): + case chash<"pl">(): + case chash<"natural">(): + length_factor = unit_length_in_meters; + break; + case chash<"picometer">(): + length_factor = 1e-12; + break; + case chash<"nanometer">(): + length_factor = 1e-9; + break; + case chash<"micrometer">(): + length_factor = 1e-6; + break; + case chash<"millimeter">(): + length_factor = 1e-3; + break; + case chash<"meter">(): + length_factor = 1.0; + break; + default: + std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n"; + break; + } + return length_factor; +} +config read_config(const char *filepath){ + std::ifstream cfile(filepath); + nlohmann::json j; + cfile >> j; + config::scalar lmult = get_length_multiplier(j); + config::scalar tmult = get_time_multiplier(j); + config ret; + + ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters; + ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters; + ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters; + ret.resolution = getVector(j["mesh"]["resolution"]); + + //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n"; + //std::cerr << "Tmult: " << tmult << "\n"; + if(j.contains("timestep-ratio")){ + ret.timestep_ratio = (config::scalar)j["timestep-ratio"]; + } + + else{ + ret.timestep_ratio = 1; + } + ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds; + ret.space_charge = (bool)(j["mesh"]["space-charge"]); + ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]); + if(ret.bunch_gamma < config::scalar(1)){ + std::cerr << "Gamma must be >= 1\n"; + exit(1); + } + assert(j.contains("undulator")); + assert(j["undulator"].contains("static-undulator")); + + ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"]; + ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters; + ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters; + assert(!std::isnan(ret.undulator_length)); + assert(!std::isnan(ret.undulator_period)); + assert(!std::isnan(ret.extents[0])); + assert(!std::isnan(ret.extents[1])); + assert(!std::isnan(ret.extents[2])); + assert(!std::isnan(ret.total_time)); + ret.length_scale_in_jobfile = get_length_multiplier(j); + ret.temporal_scale_in_jobfile = get_time_multiplier(j); + ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges; + ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses; + ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"]; + ret.mean_position = getVector(j["bunch"]["position"]) * lmult / unit_length_in_meters; + ret.sigma_position = getVector(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters; + ret.position_truncations = getVector(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters; + ret.sigma_momentum = getVector(j["bunch"]["sigma-momentum"]); + ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0; + ret.output_path = "../data/"; + if(j["output"].contains("path")){ + ret.output_path = j["output"]["path"]; + if(!ret.output_path.ends_with('/')){ + ret.output_path.push_back('/'); + } + } + if(j.contains("experimentation")){ + nlohmann::json je = j["experimentation"]; + for(auto it = je.begin(); it!= je.end();it++){ + ret.experiment_options[it.key()] = double(it.value()); + } + } + return ret; +} +template +using FieldVector = ippl::Vector; +template +struct BunchInitialize { + + /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is + * manual the charge at points of the position vector will be produced. */ + // std::string bunchType_; + + /* Type of the distributions (transverse or longitudinal) in the bunch. */ + std::string distribution_; + + /* Type of the generator for creating the bunch distribution. */ + std::string generator_; + + /* Total number of macroparticles in the bunch. */ + unsigned int numberOfParticles_; + + /* Total charge of the bunch in pC. */ + scalar cloudCharge_; + + /* Initial energy of the bunch in MeV. */ + scalar initialGamma_; + + /* Initial normalized speed of the bunch. */ + scalar initialBeta_; + + /* Initial movement direction of the bunch, which is a unit vector. */ + FieldVector initialDirection_; + + /* Position of the center of the bunch in the unit of length scale. */ + // std::vector > position_; + FieldVector position_; + + /* Number of macroparticles in each direction for 3Dcrystal type. */ + FieldVector numbers_; + + /* Lattice constant in x, y, and z directions for 3D crystal type. */ + FieldVector latticeConstants_; + + /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal + * type, it will be the spread in position for each micro-bunch of the crystal. */ + FieldVector sigmaPosition_; + + /* Spread in energy in each direction. */ + FieldVector sigmaGammaBeta_; + + /* Store the truncation transverse distance for the electron generation. */ + scalar tranTrun_; + + /* Store the truncation longitudinal distance for the electron generation. */ + scalar longTrun_; + + /* Name of the file for reading the electrons distribution from. */ + std::string fileName_; + + /* The radiation wavelength corresponding to the bunch length outside the undulator */ + scalar lambda_; + + /* Bunching factor for the initialization of the bunch. */ + scalar bF_; + + /* Phase of the bunching factor for the initialization of the bunch. */ + scalar bFP_; + + /* Boolean flag determining the activation of shot-noise. */ + bool shotNoise_; + + /* Initial beta vector of the bunch, which is obtained as the product of beta and direction. */ + FieldVector betaVector_; + + /* Initialize the parameters for the bunch initialization to some first values. */ + // BunchInitialize (); +}; + + + + + +//END CONFIG + +//LORENTZ FRAME AND UNDULATOR +template +struct UniaxialLorentzframe{ + constexpr static T c = 1.0; + using scalar = T; + using Vector3 = ippl::Vector; + scalar beta_m; + scalar gammaBeta_m; + scalar gamma_m; + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){ + gammaBeta_m = gammaBeta; + beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta)); + gamma_m = sqrt(1 + gammaBeta * (gammaBeta)); + } + KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{ + arg[axis] = gamma_m * (arg[axis] + beta_m * time); + } + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB(const Kokkos::pair, ippl::Vector>& unprimedEB)const noexcept{ + + Kokkos::pair, ippl::Vector> ret; + ippl::Vector betavec{0, 0, beta_m}; + ret.first = ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); + ret.second = ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); + ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; + ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; + return ret; + } + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> inverse_transform_EB(const Kokkos::pair, ippl::Vector>& primedEB)const noexcept{ + return UniaxialLorentzframe(-gammaBeta_m).transform_EB(primedEB); + } +}; +template +BunchInitialize generate_mithra_config(const config& cfg, const UniaxialLorentzframe& /*frame_boost unused*/) { + using vec3 = ippl::Vector; + scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K); + BunchInitialize init; + init.generator_ = "random"; + init.distribution_ = "uniform"; + init.initialDirection_ = vec3{0, 0, 1}; + init.initialGamma_ = cfg.bunch_gamma; + init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma); + init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast(); + init.sigmaPosition_ = cfg.sigma_position.template cast(); + + // TODO: Initial bunching factor huh + init.bF_ = 0.01; + init.bFP_ = 0; + init.shotNoise_ = false; + init.cloudCharge_ = cfg.charge; + init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma); + init.longTrun_ = cfg.position_truncations[2]; + init.tranTrun_ = cfg.position_truncations[0]; + init.position_ = cfg.mean_position.cast(); + init.betaVector_ = ippl::Vector{0,0,init.initialBeta_}; + init.numberOfParticles_ = cfg.num_particles; + + init.numbers_ = 0; // UNUSED + init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED + + return init; +} +template +struct Charge { + + Double q; /* Charge of the point in the unit of electron charge. */ + FieldVector rnp, rnm; /* Position vector of the charge. */ + FieldVector gb; /* Normalized velocity vector of the charge. */ + + /* Double flag determining if the particle is passing the entrance point of the undulator. This flag + * can be used for better boosting the bunch to the moving frame. We need to consider it to be double, + * because this flag needs to be communicated during bunch update. */ + Double e; + + // Charge(); +}; +template +using ChargeVector = std::list>; +template +void initializeBunchEllipsoid (BunchInitialize bunchInit, ChargeVector & chargeVector, int rank, int size, int ia){ + /* Correct the number of particles if it is not a multiple of four. */ + if ( bunchInit.numberOfParticles_ % 4 != 0 ) + { + unsigned int n = bunchInit.numberOfParticles_ % 4; + bunchInit.numberOfParticles_ += 4 - n; + //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") + + // std::string("It is corrected to ") + std::to_string(bunchInit.numberOfParticles_) ); + } + + /* Save the initially given number of particles. */ + unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size(); + + /* Declare the required parameters for the initialization of charge vectors. */ + Charge charge; charge.q = bunchInit.cloudCharge_ / Np; + FieldVector gb = bunchInit.initialGamma_ * bunchInit.betaVector_; + FieldVector r (0.0); + FieldVector t (0.0); + Double t0, g; + Double zmin = 1e100; + Double Ne, bF, bFi; + unsigned int bmi; + std::vector randomNumbers; + + /* The initialization in group of four particles should only be done if there exists an undulator in + * the interaction. */ + unsigned int ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4; + + /* Check the bunching factor. */ + if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 ) + { + //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") ); + //exit(1); + } + + /* If the generator is random we should make sure that different processors do not produce the same + * random numbers. */ + if ( bunchInit.generator_ == "random" ) + { + /* Initialize the random number generator. */ + srand ( time(NULL) ); + /* Np / ng * 20 is the maximum number of particles. */ + randomNumbers.resize( Np / ng * 20, 0.0); + for ( unsigned int ri = 0; ri < Np / ng * 20; ri++) + randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX)); + } + + /* Declare the generator function depending on the input. */ + auto generate = [&] (unsigned int n, unsigned int m) { + //if ( bunchInit.generator_ == "random" ) + return ( randomNumbers.at( n * 2 * Np/ng + m ) ); + //else + // return ( randomNumbers[ n * 2 * Np/ng + m ] ); + //TODO: Return halton properly + //return ( halton(n,m) ); + }; + + /* Declare the function for injecting the shot noise. */ + auto insertCharge = [&] (Charge q) { + + for ( unsigned int ii = 0; ii < ng; ii++ ) + { + /* The random modulation is introduced depending on the shot-noise being activated. */ + if ( bunchInit.shotNoise_ ) + { + /* Obtain the number of beamlet. */ + bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ ); + + /* Obtain the phase and amplitude of the modulation. */ + bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) ); + + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + + q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) ); + } + else if ( bunchInit.lambda_ != 0.0) + { + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + + q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 ); + } + + /* Set this charge into the charge vector. */ + chargeVector.push_back(q); + } + }; + + /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to + * calculate the FEL bucket number. */ + if ( bunchInit.shotNoise_ ) + { + for (i = 0; i < Np / ng; i++) + { + if ( bunchInit.distribution_ == "uniform" ) + zmin = std::min( Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin ); + else if ( bunchInit.distribution_ == "gaussian" ) + zmin = std::min( (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin ); + else + { + //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") ); + exit(1); + } + } + + if ( bunchInit.distribution_ == "uniform" ) + for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++) + { + t0 = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) ); + t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); + + zmin = std::min( t0 , zmin ); + } + + //zmin = zmin + bunchInit.position_[ia][2]; + zmin = zmin + bunchInit.position_[2]; + + /* Obtain the average number of electrons per FEL beamlet. */ + Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] ); + + /* Set the bunching factor level for the shot noise depending on the given values. */ + bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_; + + //printmessage(std::string(__FILE__), __LINE__, std::string("The standard deviation of the bunching factor for the shot noise implementation is set to ") + stringify(bF) ); + } + + /* Determine the properties of each charge point and add them to the charge vector. */ + for (i = rank; i < Np / ng; i += size) + { + /* Determine the transverse coordinate. */ + r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); + r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); + + /* Determine the longitudinal coordinate. */ + if ( bunchInit.distribution_ == "uniform" ) + r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2]; + else if ( bunchInit.distribution_ == "gaussian" ) + r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); + else + { + //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") ); + exit(1); + } + + /* Determine the transverse momentum. */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); + + if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) + { + /* Shift the generated charge to the center position and momentum space. */ + //charge.rnp = bunchInit.position_[ia]; + charge.rnp = bunchInit.position_; + charge.rnp += r; + + charge.gb = gb; + charge.gb += t; + //std::cout << gb << "\n"; + if(std::isinf(gb[2])){ + std::cerr << "it klonked here\n"; + } + + /* Insert this charge and the mirrored ones into the charge vector. */ + insertCharge(charge); + } + } + + /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the + * CSE from the tail of the bunch. */ + if ( bunchInit.distribution_ == "uniform" ){ + for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size) + { + + r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); + r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); + + /* Determine the longitudinal coordinate. */ + r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); + r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); + + /* Determine the transverse momentum. */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); + //std::cerr << "DOING UNIFORM tapering!!!\n"; + if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) + { + //std::cerr << "ACTUALLY DOING UNIFORM tapering!!!\n"; + /* Shift the generated charge to the center position and momentum space. */ + charge.rnp = bunchInit.position_[ia]; + charge.rnp += r; + + charge.gb = gb; + + charge.gb += t; + //std::cout << gb[0] << "\n"; + //if(std::isinf(gb.squaredNorm())){ + // std::cerr << "it klonked here\n"; + //} + /* Insert this charge and the mirrored ones into the charge vector. */ + insertCharge(charge); + } + } + } + + /* Reset the value for the number of particle variable according to the installed number of + * macro-particles and perform the corresponding changes. */ + bunchInit.numberOfParticles_ = chargeVector.size(); +} + +template +void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma){ + Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma); + Double zmaxL = -1.0e100, zmaxG; + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) + { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + if(std::isinf(g)){ + std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n"; + abort(); + } + Double bz = iterQ->gb[2] / g; + iterQ->rnp[2] *= frame_gamma; + + iterQ->gb[2] = frame_gamma * g * ( bz - frame_beta ); + + zmaxL = std::max( zmaxL , iterQ->rnp[2] ); + } + zmaxG = zmaxL; + struct { + Double zu_; + Double beta_; + } bunch_; + bunch_.zu_ = zmaxG; + bunch_.beta_ = frame_beta; + + /****************************************************************************************************/ + + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) + { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + iterQ->rnp[0] += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; + iterQ->rnp[1] += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; + iterQ->rnp[2] += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; + if(std::isnan(iterQ->rnp[2])){ + std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_ << ", " << frame_beta << "\n"; + std::cerr << __FILE__ << ": " << __LINE__ << " OOOOOF\n"; + abort(); + } + } +} + + + + +template +size_t initialize_bunch_mithra( + bunch_type& bunch, + const BunchInitialize& bunchInit, + scalar frame_gamma){ + + ChargeVector oof; + initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0); + for(auto& c : oof){ + if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) + std::cout << "Pos before boost: " << c.rnp << "\n"; + if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2])) + std::cout << "Pos before boost: " << c.rnp << "\n"; + } + boost_bunch(oof, frame_gamma); + for(auto& c : oof){ + if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){ + std::cout << "Pos after boost: " << c.rnp << "\n"; + break; + } + } + Kokkos::View*, Kokkos::HostSpace> positions("", oof.size()); + Kokkos::View*, Kokkos::HostSpace> gammabetas("", oof.size()); + auto iterQ = oof.begin(); + for (size_t i = 0; i < oof.size(); i++) { + assert_isreal(iterQ->gb[0]); + assert_isreal(iterQ->gb[1]); + assert_isreal(iterQ->gb[2]); + assert(iterQ->gb[2] != 0.0f); + scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + assert_isreal(g); + scalar bz = iterQ->gb[2] / g; + assert_isreal(bz); + (void)bz; + positions(i) = iterQ->rnp; + gammabetas(i) = iterQ->gb; + ++iterQ; + } + if(oof.size() > bunch.getLocalNum()){ + bunch.create(oof.size() - bunch.getLocalNum()); + } + Kokkos::View*> dpositions ("", oof.size()); + Kokkos::View*> dgammabetas("", oof.size()); + + Kokkos::deep_copy(dpositions, positions); + Kokkos::deep_copy(dgammabetas, gammabetas); + Kokkos::deep_copy(bunch.R_nm1.getView(), positions); + Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas); + auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();; + Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){ + rview(i) = dpositions(i); + rm1view(i) = dpositions(i); + gbview(i) = dgammabetas(i); + }); + Kokkos::fence(); + + return oof.size(); +} + + + + + + + +//END LORENTZ FRAME AND UNDULATOR AND BUNCH + + + + + +//PREAMBLE + +template + requires((std::is_floating_point_v)) +KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ + uint32_t dim = sizeof...(scalar); + ippl::Vector vec{scalar1(x - mean)...}; + for(unsigned d = 0;d < dim;d++){ + vec[d] = vec[d] * vec[d]; + } + #ifndef __CUDA_ARCH__ + using std::exp; + #endif + return exp(-(vec.sum()) / (stddev * stddev)); +} + + + + +template +constexpr KOKKOS_INLINE_FUNCTION auto first(){ + return a; +} +template +constexpr KOKKOS_INLINE_FUNCTION auto second(){ + return b; +} + +template +KOKKOS_INLINE_FUNCTION ippl::Vector cross_prod(const ippl::Vector& a, + const ippl::Vector& b) { + ippl::Vector ret{0.0, 0.0, 0.0}; + ret[0] = a[1] * b[2] - a[2] * b[1]; + ret[1] = a[2] * b[0] - a[0] * b[2]; + ret[2] = a[0] * b[1] - a[1] * b[0]; + return ret; +} +template +KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos){ + //return pear, ippl::Vector>{ippl::Vector{5,5,5}, ippl::Vector{0,0,0}}; + //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); + Kokkos::pair, ippl::Vector> ret; + //pos -= spacing * T(0.5); + ippl::Vector relpos = pos - origin; + ippl::Vector gridpos = relpos / hr; + ippl::Vector ipos; + ipos = gridpos.template cast(); + ippl::Vector fracpos; + for(unsigned k = 0;k < 3;k++){ + fracpos[k] = gridpos[k] - (int)ipos[k]; + } + //TODO: NGHOST!!!!!!! + ipos += ippl::Vector(1); + ret.first = ipos; + ret.second = fracpos; + return ret; +} +template +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; + if( + ipos[0] < 0 + ||ipos[1] < 0 + ||ipos[2] < 0 + ||size_t(ipos[0]) >= view.extent(0) - 1 + ||size_t(ipos[1]) >= view.extent(1) - 1 + ||size_t(ipos[2]) >= view.extent(2) - 1 + ||fracpos[0] < 0 + ||fracpos[1] < 0 + ||fracpos[2] < 0 + ){ + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for(unsigned i = 0;i < 8;i++){ + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for(unsigned d = 0;d < 3;d++){ + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(value); + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); +} +template +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ + //std::cout << "Value: " << value << "\n"; + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + if( + ipos[0] < 0 + ||ipos[1] < 0 + ||ipos[2] < 0 + ||size_t(ipos[0]) >= view.extent(0) - 1 + ||size_t(ipos[1]) >= view.extent(1) - 1 + ||size_t(ipos[2]) >= view.extent(2) - 1 + ||fracpos[0] < 0 + ||fracpos[1] < 0 + ||fracpos[2] < 0 + ){ + //std::cout << "out of bounds\n"; + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for(unsigned i = 0;i < 8;i++){ + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for(unsigned d = 0;d < 3;d++){ + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); +} + +template +KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ + + + Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to ); + //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]); + for(int d = 0;d < 3;d++){ + //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ + // std::cout <(nghost); + //to_ipos += ippl::Vector(nghost); + + if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); + + return; + } + ippl::Vector relay; + const int nghost = 1; + const ippl::Vector orig = origin; + using Kokkos::max; + using Kokkos::min; + for (unsigned int i = 0; i < 3; i++) { + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i], + scalar(0.5) * (to[i] + from[i]))); + } + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); +} + +// END PREAMBLE + + +//BEGIN ABSORBING BOUNDARY CONDITION +template +struct second_order_abc_face{ + using scalar = _scalar; + scalar Cweights[5]; + int sign; + constexpr static unsigned main_axis = _main_axis; + KOKKOS_FUNCTION second_order_abc_face(ippl::Vector hr, scalar dt, int _sign) : sign(_sign){ + constexpr scalar c = 1; + constexpr unsigned side_axes[2] = {_side_axes...}; + static_assert( + (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) || + (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) || + (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1) + ); + assert(_main_axis != side_axes[0]); + assert(_main_axis != side_axes[1]); + assert(side_axes[0] != side_axes[1]); + constexpr scalar truncation_order = 2.0; + scalar p = ( 1.0 + 1 * 1 ) / ( 1 + 1 ); + scalar q = - 1.0 / ( 1 + 1 ); + + scalar d = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt); + + Cweights[0] = ( 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + Cweights[1] = ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + assert(abs(Cweights[1] + 1) < 1e-6); //Like literally + Cweights[2] = ( p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d; + Cweights[3] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d; + Cweights[4] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d; + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + constexpr unsigned side_axes[2] = {_side_axes...}; + ippl::Vector side_axis1_onehot = ippl::Vector{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2}; + ippl::Vector side_axis2_onehot = ippl::Vector{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2}; + ippl::Vector mainaxis_off = ippl::Vector{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast(); + return advanceBoundaryS( + A_nm1(i,j,k), A_n(i,j,k), + apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off), + apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off), + apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot), apply(A_n, c - side_axis1_onehot), + apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot) + ); + } + template + KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 , + const value_type& v3 , const value_type& v4 , const value_type& v5 , + const value_type& v6 , const value_type& v7 , const value_type& v8 , + const value_type& v9 , const value_type& v10, const value_type& v11, + const value_type& v12, const value_type& v13)const noexcept + { + + value_type v0 = + Cweights[0] * (v1 + v5) + + (Cweights[1]) * v3 + + (Cweights[2]) * ( v2 + v4 ) + + (Cweights[3]) * ( v6 + v7 + v10 + v11 ) + + (Cweights[4]) * ( v8 + v9 + v12 + v13 ); + return v0; + } +}; +template +struct second_order_abc_edge{ + using scalar = _scalar; + // + scalar Eweights[5]; + + KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector hr, scalar dt){ + static_assert(normal_axis1 != normal_axis2); + static_assert(edge_axis != normal_axis2); + static_assert(edge_axis != normal_axis1); + static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0)); + constexpr scalar c0_ = scalar(1); + scalar d = ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt ); + if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else{ + assert(false); + } + + + + + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + //constexpr unsigned nax[2] = {normal_axis1, normal_axis2}; + ippl::Vector normal_axis1_onehot = ippl::Vector{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1); + ippl::Vector normal_axis2_onehot = ippl::Vector{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1); + ippl::Vector acc0 = {i, j, k}; + ippl::Vector acc1 = acc0 + normal_axis1_onehot.cast(); + ippl::Vector acc2 = acc0 + normal_axis2_onehot.cast(); + ippl::Vector acc3 = acc0 + normal_axis1_onehot.cast() + normal_axis2_onehot.cast(); + //ippl::Vector axism = (-ippl::Vector{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast(); + ippl::Vector axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2}; + //return A_n(i, j, k); + return advanceEdgeS( + A_n(i, j, k), A_nm1(i, j, k), + apply(A_np1, acc1), apply(A_n, acc1 ), apply(A_nm1, acc1), + apply(A_np1, acc2), apply(A_n, acc2 ), apply(A_nm1, acc2), + apply(A_np1, acc3), apply(A_n, acc3 ), apply(A_nm1, acc3), + apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp), + apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceEdgeS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19)const noexcept{ + value_type v0 = + Eweights[0] * (v3 + v8) + + Eweights[1] * (v5 + v6) + + Eweights[2] * (v2 + v9) + + Eweights[3] * (v1 + v4 + v7 + v10) + + Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11; + return v0; + } +}; +template +struct second_order_abc_corner{ + using scalar = _scalar; + scalar Cweights[17]; + KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector hr, scalar dt){ + constexpr scalar c0_ = scalar(1); + Cweights[0] = ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[1] = ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[2] = ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[3] = ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[4] = ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[5] = ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[6] = ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[7] = ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[8] = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[9] = - ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[12] = - ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[13] = - ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[15] = - ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt); + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + //First implementation: 0,0,0 corner + constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1); + constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1); + constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1); + using ippl::apply; + const ippl::Vector offsets[8] = { + ippl::Vector{0,0,0}, + ippl::Vector{xoff,0,0}, + ippl::Vector{0,yoff,0}, + ippl::Vector{0,0,zoff}, + ippl::Vector{xoff,yoff,0}, + ippl::Vector{xoff,0,zoff}, + ippl::Vector{0,yoff,zoff}, + ippl::Vector{xoff,yoff,zoff}, + }; + return advanceCornerS( + apply(A_n, c), apply(A_nm1, c), + apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]), + apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]), + apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]), + apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]), + apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]), + apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]), + apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7]) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceCornerS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19, value_type v20, + value_type v21, value_type v22, value_type v23)const noexcept{ + return - ( v1 * (Cweights[16]) + v2 * (Cweights[8]) + + v3 * Cweights[1] + v4 * Cweights[16] + v5 * Cweights[9] + + v6 * Cweights[2] + v7 * Cweights[16] + v8 * Cweights[10] + + v9 * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] + + v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] + + v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] + + v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] + + v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0]; + } +}; + + + + + + +struct second_order_mur_boundary_conditions{ + template + void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr, ippl::NDIndex<3> lDom){ + using scalar = decltype(dt); + //TODO: tbh don't know + //const unsigned nghost = 1; + const ippl::Vector hr = FA_n.get_mesh().getMeshSpacing(); + //assert_isreal((betaMur[0])); + //assert_isreal((betaMur[1])); + //assert_isreal((betaMur[2])); + auto A_n = FA_n.getView(); + auto A_np1 = FA_np1.getView(); + auto A_nm1 = FA_nm1.getView(); + ippl::Vector local_nr{ + uint32_t(A_n.extent(0)), + uint32_t(A_n.extent(1)), + uint32_t(A_n.extent(2)) + }; + constexpr uint32_t min_abc_boundary = 1; + constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1; + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + + if(Kokkos::popcount(val) == 1){ + if(ig == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(jg == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(kg == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(ig == true_nr[0] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + } + }); + Kokkos::fence(); + //FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 2)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + if(Kokkos::popcount(val) == 2){ //Edge + if(ig == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + //FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + // + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + //if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + + if(Kokkos::popcount(val) == 3){ + //printf("Corner: %d, %d, %d\n", i, j, k); + if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + } +}; +//END ABSORBING BOUNDARY CONDITION + + + + + + + + + +namespace ippl { + + template + struct undulator_parameters{ + scalar lambda; //MITHRA: lambda_u + scalar K; //Undulator parameter + scalar length; + scalar B_magnitude; + undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){ + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u); + //std::cout << "Setting bmag: " << B_magnitude << "\n"; + } + undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){ + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda); + } + }; + + template + struct nondispersive{ + scalar a1; + scalar a2; + scalar a4; + scalar a6; + scalar a8; + }; + template + struct Bunch : public ippl::ParticleBase { + using scalar = _scalar; + + // Constructor for the Bunch class, taking a PLayout reference + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) { + // Add attributes to the particle bunch + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(R_np1); // Position attribute for the next time step + this->addAttribute(R_nm1); // Position attribute for the next time step + this->addAttribute(EB_gather); // Electric field attribute for particle gathering + } + + // Destructor for the Bunch class + ~Bunch() {} + + // Define container types for various attributes + using charge_container_type = ippl::ParticleAttrib; + using velocity_container_type = ippl::ParticleAttrib>; + using vector_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; + + // Declare instances of the attribute containers + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type R_nm1; // Position container for the previous time step + ippl::ParticleAttrib, 2>> EB_gather; // Electric field container for particle gathering + + }; + + + template + // clang-format off + struct FDTDFieldState{ + + //Sorry, can't do more than 3d + + constexpr static unsigned int dim = 3; + using Vector_t = ippl::Vector; + using value_type = ippl::Vector; + using EB_type = ippl::Vector, 2>; + using Mesh_t = ippl::UniformCartesian; + + bool periodic_bc; + using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using VField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using EBField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using view_type = VField_t::view_type; + using ev_view_type = EBField_t::view_type; + //Fields + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_np1; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_n; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_nm1; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> J; + ippl::Field, typename ippl::UniformCartesian::DefaultCentering> EB; + + //Discretization options + Vector_t hr_m; + ippl::Vector nr_global; + ippl::Vector nr_local; + scalar dt; + Mesh_t* mesh_mp; + FieldLayout* layout_mp; + using playout_type = ParticleSpatialLayout; + playout_type playout; + Bunch> particles; + using bunch_type = Bunch>; + config m_config; + + /** + * @brief Construct a new FDTDFieldState object + * Mesh and resolution parameter are technically redundant + * + * @param resolution + * @param layout + * @param mesch + */ + FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){ + FA_np1.initialize(mesch, layout, 1); + FA_n.initialize(mesch, layout, 1); + FA_nm1.initialize(mesch, layout, 1); + J.initialize(mesch, layout, 1); + EB.initialize(mesch, layout, 1); + ThreeField Edummy; + Edummy.initialize(mesch, layout, 1); + FA_n = value_type(0); + FA_np1 = value_type(0); + FA_nm1 = value_type(0); + ippl::StandardFDTDSolver solver(J, Edummy, Edummy); + hr_m = mesch.getMeshSpacing(); + nr_global = ippl::Vector{ + uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), + uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), + uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1) + }; + nr_local = ippl::Vector{ + uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1), + uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1), + uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1) + }; + //std::cout << "NR_M_g: " << nr_global << "\n"; + //std::cout << "NR_M_l: " << nr_local << "\n"; + dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2])); + particles.create(nparticles); + setNoBoundaryConditions(); + + } + void setNoBoundaryConditions() { + periodic_bc = false; + typename VField_t::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + FA_n .setFieldBC(vector_bcs); + FA_np1.setFieldBC(vector_bcs); + FA_nm1.setFieldBC(vector_bcs); + } + void setPeriodicBoundaryConditions() { + periodic_bc = true; + typename VField_t::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + FA_n .setFieldBC(vector_bcs); + FA_np1.setFieldBC(vector_bcs); + FA_nm1.setFieldBC(vector_bcs); + } + + void scatterBunch(){ + //ippl::Vector* gammaBeta = this->gammaBeta; + const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; + assert_isreal(volume); + assert_isreal((scalar(1) / volume)); + auto Jview = J.getView(); + auto qview = particles.Q.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto orig = mesh_mp->getOrigin(); + auto hr = mesh_mp->getMeshSpacing(); + auto dt = this->dt; + bool space_charge = m_config.space_charge; + ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); + Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + Vector_t pos = rview(i); + Vector_t to = rview(i); + Vector_t from = rm1view(i); + if(space_charge){ + scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); + } + scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); + }); + Kokkos::fence(); + J.accumulateHalo(); + } + + void fieldStep(){ + const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1]))); + nondispersive ndisp{ + .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])), + .a2 = sq(dt / hr_m[0]), + .a4 = sq(dt / hr_m[1]), + .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), + .a8 = sq(dt) + }; + //if(periodic_bc){ + // FA_n.getFieldBC().apply(FA_n); + //} + auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); + auto source = this->J.getView(); + //FA_nm1.fillHalo(); + FA_n.fillHalo(); + ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; + const auto& ldom = layout_mp->getLocalNDIndex(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + uint32_t ig = i + ldom.first()[0]; + uint32_t jg = j + ldom.first()[1]; + uint32_t kg = k + ldom.first()[2]; + uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2) + + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5); + if(!val){ + A_np1(i, j, k) = -A_nm1(i,j,k) + + ndisp.a1 * A_n (i,j,k) + + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1)) + + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1)) + + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1)) + + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1)) + + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k); + } + }); + Kokkos::fence(); + + if(!periodic_bc){ + FA_np1.fillHalo(); + second_order_mur_boundary_conditions bc; + + + bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom); + } + Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView()); + Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView()); + //std::swap(this->A_n, this->A_nm1); + //std::swap(this->A_np1, this->A_n); + + evaluate_EB(); + } + void evaluate_EB(){ + FA_n.fillHalo();//FA_nm1.fillHalo(); + ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); + auto source = this->J.getView(); + auto EBv = this->EB.getView(); + Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{ + dAdx[0], dAdy[0], dAdz[0] + }; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + EBv(i,j,k)[0] = -dAdt - grad_phi; + EBv(i,j,k)[1] = curlA; + }); + Kokkos::fence(); + } + template + void updateBunch(scalar time, UniaxialLorentzframe ulb, callable undulator_field){ + + Kokkos::fence(); + auto gbview = particles.gamma_beta.getView(); + auto ebview = particles.EB_gather.getView(); + auto qview = particles.Q.getView(); + auto mview = particles.mass.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto rp1view = particles.R_np1.getView(); + scalar bunch_dt = dt / 3; + Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); + EB.fillHalo(); + Kokkos::fence(); + for(int bts = 0;bts < 3;bts++){ + + particles.EB_gather.gather(EB, particles.R); + Kokkos::fence(); + Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + const ippl::Vector pgammabeta = gbview(i); + ippl::Vector, 2> EB = ebview(i); + ippl::Vector labpos = rview(i); + + ulb.primedToUnprimed(labpos, time); + + Kokkos::pair, ippl::Vector> EB_undulator_frame = undulator_field(labpos); + Kokkos::pair, ippl::Vector> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame); + assert_isreal((EB_undulator_bunch.first[0])); + assert_isreal((EB_undulator_bunch.first[1])); + assert_isreal((EB_undulator_bunch.first[2])); + assert_isreal((EB_undulator_bunch.second[0])); + assert_isreal((EB_undulator_bunch.second[1])); + assert_isreal((EB_undulator_bunch.second[2])); + EB[0] += EB_undulator_bunch.first; + EB[1] += EB_undulator_bunch.second; + + const scalar charge = qview(i); + const scalar mass = mview(i); + const ippl::Vector t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); + const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); + const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); + const ippl::Vector t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); + const ippl::Vector ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); + + assert_isreal((ngammabeta[0])); + assert_isreal((ngammabeta[1])); + assert_isreal((ngammabeta[2])); + rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); + gbview(i) = ngammabeta; + }); + Kokkos::fence(); + } + Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); + size_t invalid_count = 0; + auto origo = mesh_mp->getOrigin(); + ippl::Vector extenz;// + extenz[0] = nr_global[0] * hr_m[0]; + extenz[1] = nr_global[1] * hr_m[1]; + extenz[2] = nr_global[2] * hr_m[2]; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, particles.getLocalNum()), + KOKKOS_LAMBDA(size_t i, size_t& ref){ + bool out_of_bounds = false; + ippl::Vector ppos = rview(i); + for(size_t d = 0;d < dim;d++){ + out_of_bounds |= (ppos[d] <= origo[d]); + out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain + } + invalid(i) = out_of_bounds; + ref += out_of_bounds; + }, + invalid_count); + particles.destroy(invalid, invalid_count); + Kokkos::fence(); + + } + }; + // clang-format on +} // namespace ippl +bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { + const int channels = 3; // RGB + const int stride = width * channels; + std::vector flippedData(data, data + stride * height); + + // Use stb_image_write to write the BMP image to the file descriptor + if (!stbi_write_bmp_to_func( + [](void* context, void* data, int size) { + FILE* f = reinterpret_cast(context); + fwrite(data, 1, size, f); + }, + fd, width, height, channels, flippedData.data())) { + return false; + } + + return true; +} +template +KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector& pos, const ippl::Vector& origin, const ippl::Vector& hr, const ippl::NDIndex<3>& lDom){ + using vector_type = ippl::Vector; + + vector_type l; + //vector_type origin = v.get_mesh().getOrigin(); + //auto lDom = v.getLayout().getLocalNDIndex(); + //vector_type hr = v.get_mesh().getMeshSpacing(); + for(unsigned k = 0;k < 3;k++){ + l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong + } + + ippl::Vector index{int(l[0]), int(l[1]), int(l[2])}; + ippl::Vector whi = l - index; + ippl::Vector wlo(1.0); + wlo -= whi; + //TODO: nghost + ippl::Vector args = index - lDom.first() + 1; + for(unsigned k = 0;k < 3;k++){ + if(args[k] >= v.extent(k) || args[k] == 0){ + return typename View::value_type(0); + } + } + //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n"; + return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/; + +} +template +scalar test_gauss_law(uint32_t n){ + + ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::Vector extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx = extents / nr.cast(); + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; + ippl::UniformCartesian mesh(owned, hx, origin); + + uint32_t pcount = 1 << 20; + config cfg{}; + cfg.space_charge = true; + ippl::FDTDFieldState field_state(layout, mesh, pcount, cfg); + field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); + + //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); + //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); + + Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ + //bunch.gammaBeta[i].fill(scalar(0)); + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + p1view(i) = pview(i); + random_pool.free_state(state); + }); + Kokkos::fence(); + field_state.J = scalar(0.0); + field_state.scatterBunch(); + for(size_t i = 0;i < 8*n;i++){ + field_state.fieldStep(); + } + field_state.evaluate_EB(); + Kokkos::fence(); + auto lDom = field_state.EB.getLayout().getLocalNDIndex(); + + std::ofstream line("gauss_line.txt"); + typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); + for(unsigned i = 1;i < nr[2];i++){ + vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; + pos[2] += hx[2] * scalar(i); + ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); + //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; + } + return 0.0f; +} +template +scalar test_amperes_law(uint32_t n){ + + ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::Vector extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx; + for(unsigned d = 0;d < 3;d++){ + hx[d] = extents[d] / (scalar)nr[d]; + } + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; + ippl::UniformCartesian mesh(owned, hx, origin); + + uint32_t pcount = 1 << 20; + config cfg{};cfg.space_charge = true; + ippl::FDTDFieldState field_state(layout, mesh, pcount, cfg); + field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); + constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + scalar timestep = field_state.dt; + //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); + //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); + + Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ + //bunch.gammaBeta[i].fill(scalar(0)); + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1); + p1view(i) = pview(i); + p1view(i)[1] -= vy * timestep; + random_pool.free_state(state); + }); + Kokkos::fence(); + field_state.J = scalar(0.0); + field_state.scatterBunch(); + for(size_t i = 0;i < 8*n;i++){ + field_state.fieldStep(); + } + field_state.evaluate_EB(); + Kokkos::fence(); + auto lDom = field_state.EB.getLayout().getLocalNDIndex(); + + std::ofstream line("ampere_line.txt"); + + typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); + for(unsigned i = 1;i < nr[2];i++){ + vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; + pos[2] += hx[2] * scalar(i); + ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); + //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n"; + } + return 0.0f; +} +int main(int argc, char* argv[]) { + using scalar = double; + ippl::initialize(argc, argv); + { + + test_gauss_law(64); + test_amperes_law(64); + goto exit; + config cfg = read_config("../config.json"); + const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); + cfg.extents[2] *= frame_gamma; + cfg.total_time /= frame_gamma; + + const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma)); + const scalar frame_gammabeta = frame_gamma * frame_beta; + UniaxialLorentzframe frame_boost(frame_gammabeta); + ippl::undulator_parameters uparams(cfg); + const scalar k_u = scalar(2.0 * M_PI) / uparams.lambda; + const scalar distance_to_entry = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma); + auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector& position_in_lab_frame){ + Kokkos::pair, ippl::Vector> ret; + ret.first.fill(0); + ret.second.fill(0); + + if(position_in_lab_frame[2] < distance_to_entry){ + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator < 0); + scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); + ret.second[0] = 0; + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; + } + else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){ + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator >= 0); + ret.second[0] = 0; + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); + } + return ret; + + }; + BunchInitialize mithra_config = generate_mithra_config(cfg, frame_boost); + ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); + + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])}; + ippl::Vector origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)}; + ippl::UniformCartesian mesh(owned, hx, origin); + std::cout << "hx: " << hx << "\n"; + std::cout << "origin: " << origin << "\n"; + std::cout << "extents: " << cfg.extents << std::endl; + if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){ + std::cerr << "Dispersion relation not satisfiable\n"; + abort(); + } + + ippl::FDTDFieldState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); + + if(ippl::Comm->rank() == 0){ + std::cout << "Init particles: " << std::endl; + size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma); + fdtd_state.particles.Q = cfg.charge / actual_pc; + fdtd_state.particles.mass = cfg.mass / actual_pc; + } + else{ + fdtd_state.particles.create(0); + } + { + auto rview = fdtd_state.particles.R.getView(); + auto rm1view = fdtd_state.particles.R_nm1.getView(); + ippl::Vector meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum()); + + Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + rview(i) -= meanpos; + rm1view(i) -= meanpos; + }); + } + fdtd_state.particles.setParticleBC(ippl::NO); + //fdtd_state.scatterBunch(); + //std::cout << cfg.charge << "\n"; + + size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); + uint64_t starttime = nanoTime(); + std::ofstream rad; + FILE* ffmpeg_file = nullptr; + if(ippl::Comm->rank() == 0){ + rad = std::ofstream("radiation.txt"); + const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv"; + if(cfg.output_rhythm != 0){ + ffmpeg_file = popen(ffmpegCmd, "w"); + } + } + + + for(size_t i = 0;i < timesteps_required;i++){ + fdtd_state.J = scalar(0.0); + fdtd_state.playout.update(fdtd_state.particles); + fdtd_state.scatterBunch(); + //std::cout << fdtd_state.J.getVolumeIntegral() << "\n"; + fdtd_state.fieldStep(); + fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); + auto ldom = layout.getLocalNDIndex(); + auto nrg = fdtd_state.nr_global; + auto ebv = fdtd_state.EB.getView(); + double radiation = 0.0; + Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ + //uint32_t ig = i + ldom.first()[0]; + //uint32_t jg = j + ldom.first()[1]; + Kokkos::pair, ippl::Vector> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]}; + ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; + ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; + uint32_t kg = k + ldom.first()[2]; + if(kg == nrg[2] - 3){ + ref += Elab.cross(Blab)[2]; + } + + }, radiation); + double radiation_in_watt_on_this_rank = radiation * + double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) * + fdtd_state.hr_m[0] * fdtd_state.hr_m[1]; + double radiation_in_watt_global = 0.0; + MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); + if(ippl::Comm->rank() == 0){ + ippl::Vector pos{0,0,0}; + frame_boost.primedToUnprimed(pos, fdtd_state.dt * i); + rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; + } + //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n"; + //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n"; + int rank = ippl::Comm->rank(); + int size = ippl::Comm->size(); + if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ + + + int img_height = 400; + int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]); + float* imagedata = new float[img_width * img_height * 3]; + std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); + float* idata_recvbuffer = new float[img_width * img_height * 3]; + int floatcount = img_width * img_height * 3; + uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; + std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float)); + auto phmirror = fdtd_state.particles.R.getHostMirror(); + Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView()); + for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){ + ippl::Vector ppos = phmirror(hi); + ppos -= mesh.getOrigin(); + ppos /= cfg.extents.cast(); + int x_imgcoord = ppos[2] * img_width; + int y_imgcoord = ppos[0] * img_height; + //printf("%d, %d\n", x_imgcoord, y_imgcoord); + if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){ + const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles); + //std::cout << intensity << "\n"; + imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = + std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity); + } + }; + auto ebh = fdtd_state.EB.getHostMirror(); + Kokkos::deep_copy(ebh, fdtd_state.EB.getView()); + + //double exp_avg = double(exp_sum) / double(acount); + { + for(int i = 1;i < img_width;i++){ + for(int j = 1;j < img_height;j++){ + int i_remap = (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2; + int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2; + if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){ + if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){ + ippl::Vector, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); + + ippl::Vector poynting = acc[0].cross(acc[1]); + Kokkos::pair, ippl::Vector> eblab = frame_boost.inverse_transform_EB( + Kokkos::make_pair, ippl::Vector>(acc[0], acc[1]) + ); + ippl::Vector poyntinglab = eblab.first.cross(eblab.second); + poynting = poyntinglab; + if(poynting.norm() > 0){ + //std::cout << poynting.norm() << "\n"; + } + float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f; + int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f)); + imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + } + } + } + } + } + int mask = 1; + while (mask < size) { + int partner = rank ^ mask; + //if((rank & (mask - 1)) == 0) + { + if ((rank & mask) == 0) { + // Send data to partner + MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE); + // Apply image summation + for(int f = 0;f < floatcount;f++){ + imagedata[f] += idata_recvbuffer[f]; + } + } else { + MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator()); + // Receive data from partner and apply reduction + + } + } + mask <<= 1; // Move to next bit position for pairing + } + if(rank == 0){ + char output[1024] = {0}; + + snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i); + std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); + if(ffmpeg_file != nullptr) + writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final); + } + delete[] imagedata; + delete[] idata_recvbuffer; + delete[] imagedata_final; + } + } + uint64_t endtime = nanoTime(); + std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; + } + exit: + ippl::finalize(); +} \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cd2dc1364..d130045c9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,7 +64,7 @@ add_ippl_sources (Ippl.cpp) add_subdirectory (Communicate) if (ENABLE_FFT) - set (HEFFTE_LIBRARY Heffte::Heffte) + set (HEFFTE_LIBRARY Heffte) add_subdirectory (FFT) else() set (HEFFTE_LIBRARY "") diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index f4a1d5b9a..827ed7fef 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -1,11 +1,14 @@ #ifndef IPPL_FDTD_H #define IPPL_FDTD_H +#include +using std::size_t; #include "Types/Vector.h" #include "Field/Field.h" #include "FieldLayout/FieldLayout.h" #include "Meshes/UniformCartesian.h" +#include "MaxwellSolvers/Maxwell.h" constexpr double sqrt_4pi = 3.54490770181103205459; constexpr double alpha_scaling_factor = 1e30; constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor; @@ -41,11 +44,340 @@ constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G; constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons; +constexpr double sq(double x){ + return x * x; +} +constexpr float sq(float x){ + return x * x; +} + namespace ippl { - //template - //struct FDTDFieldState{ - // - //}; + enum fdtd_bc{ + periodic, absorbing + }; + + template + class StandardFDTDSolver : Maxwell { + public: + constexpr static unsigned Dim = EMField::dim; + using scalar = typename EMField::value_type::value_type; + using Vector_t = Vector; + using FourVector_t = typename FourField::value_type; + StandardFDTDSolver(FourField& source, EMField& E, EMField& B) { + Maxwell::setSource(source); + Maxwell::setEMFields(E, B); + initialize(); + } + virtual void solve()override{ + step(); + timeShift(); + evaluate_EB(); + } + void setPeriodicBoundaryConditions() { + periodic_bc = true; + typename FourField::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + A_n .setFieldBC(vector_bcs); + A_np1.setFieldBC(vector_bcs); + A_nm1.setFieldBC(vector_bcs); + } + + + FourField A_n; + FourField A_np1; + FourField A_nm1; + + private: + void timeShift(){ + + //Look into this, maybe cyclic swap is better + Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); + Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); + } + + void step(){ + const auto& ldom = layout_mp->getLocalNDIndex(); + const int nghost = A_n.getNghost(); + const auto aview = A_n .getView(); + const auto anp1view = A_np1.getView(); + const auto anm1view = A_nm1.getView(); + const auto source_view = Maxwell::source_mp->getView(); + + const scalar a1 = scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2])); + const scalar a2 = sq(dt / hr_m[0]); + const scalar a4 = sq(dt / hr_m[1]); + const scalar a6 = sq(dt / hr_m[2]); + const scalar a8 = sq(dt); + Vector true_nr = nr_m; + true_nr += (nghost * 2); + A_n.getFieldBC().apply(A_n); + A_np1.getFieldBC().apply(A_np1); + A_nm1.getFieldBC().apply(A_nm1); + Kokkos::parallel_for( + "Scalar potential update", ippl::getRangePolicy(aview, nghost), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + // global indices + const uint32_t ig = i + ldom.first()[0]; + const uint32_t jg = j + ldom.first()[1]; + const uint32_t kg = k + ldom.first()[2]; + uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) + + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + + if(val == 0){ + FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k) + + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k)) + + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k)) + + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1)) + + a8 * (-source_view(i, j, k)); + anp1view(i, j, k) = interior; + } + else{ + //std::cout << i << ", " << j << ", " << k << "\n"; + } + }); + Kokkos::fence(); + A_np1.fillHalo(); + A_np1.getFieldBC().apply(A_np1); + } + void evaluate_EB(){ + ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView(); + auto source = Maxwell::source_mp->getView(); + auto Eview = Maxwell::En_mp->getView(); + auto Bview = Maxwell::Bn_mp->getView(); + + Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{ + dAdx[0], dAdy[0], dAdz[0] + }; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + Eview(i,j,k) = -dAdt - grad_phi; + Bview(i,j,k) = curlA; + }); + Kokkos::fence(); + } + + + bool periodic_bc; + + typename FourField::Mesh_t* mesh_mp; + FieldLayout* layout_mp; + NDIndex domain_m; + Vector_t hr_m; + scalar dt; + Vector nr_m; + + void initialize() { + // get layout and mesh + layout_mp = &(this->source_mp->getLayout()); + mesh_mp = &(this->source_mp->get_mesh()); + + // get mesh spacing, domain, and mesh size + hr_m = mesh_mp->getMeshSpacing(); + dt = hr_m[0] / 2; + for (unsigned int i = 0; i < Dim; ++i){ + dt = std::min(dt, hr_m[i] / 2); + } + domain_m = layout_mp->getDomain(); + for (unsigned int i = 0; i < Dim; ++i) + nr_m[i] = domain_m[i].length(); + + // initialize fields + A_nm1.initialize(*mesh_mp, *layout_mp); + A_n.initialize(*mesh_mp, *layout_mp); + A_np1.initialize(*mesh_mp, *layout_mp); + + A_nm1 = 0.0; + A_n = 0.0; + A_np1 = 0.0; + }; + }; + /** + * @brief Nonstandard Finite-Difference Time-Domain + * + * @tparam EMField + * @tparam FourField + */ + template + class NonStandardFDTDSolver : Maxwell { + public: + constexpr static unsigned Dim = EMField::dim; + using scalar = typename EMField::value_type::value_type; + using Vector_t = Vector; + using FourVector_t = typename FourField::value_type; + NonStandardFDTDSolver(FourField& source, EMField& E, EMField& B) { + auto hx = source.get_mesh().getMeshSpacing(); + if((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1){ + std::cerr << "Dispersion-free CFL condition not satisfiable\n"; + std::abort(); + } + Maxwell::setSource(source); + Maxwell::setEMFields(E, B); + initialize(); + } + virtual void solve()override{ + step(); + timeShift(); + evaluate_EB(); + } + FourField A_n; + FourField A_np1; + FourField A_nm1; + void setPeriodicBoundaryConditions() { + periodic_bc = true; + typename FourField::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + A_n .setFieldBC(vector_bcs); + A_np1.setFieldBC(vector_bcs); + A_nm1.setFieldBC(vector_bcs); + } + + + private: + void timeShift(){ + + //Look into this, maybe cyclic swap is better + Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); + Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); + } + template + struct nondispersive{ + scalar a1; + scalar a2; + scalar a4; + scalar a6; + scalar a8; + }; + void step(){ + const auto& ldom = layout_mp->getLocalNDIndex(); + const int nghost = A_n.getNghost(); + const auto aview = A_n .getView(); + const auto anp1view = A_np1.getView(); + const auto anm1view = A_nm1.getView(); + const auto source_view = Maxwell::source_mp->getView(); + + const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1]))); + nondispersive ndisp{ + .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])), + .a2 = sq(dt / hr_m[0]), + .a4 = sq(dt / hr_m[1]), + .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), + .a8 = sq(dt) + }; + Vector true_nr = nr_m; + true_nr += (nghost * 2); + std::cout << hr_m << "\n"; + std::cout << dt << "\n"; + A_n.getFieldBC().apply(A_n); + A_np1.getFieldBC().apply(A_np1); + A_nm1.getFieldBC().apply(A_nm1); + Kokkos::parallel_for(ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + uint32_t ig = i + ldom.first()[0]; + uint32_t jg = j + ldom.first()[1]; + uint32_t kg = k + ldom.first()[2]; + uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) + + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + if(!val){ + anp1view(i, j, k) = -anm1view(i,j,k) + + ndisp.a1 * aview(i,j,k) + + ndisp.a2 * (calA * aview(i + 1, j, k - 1) + (1 - 2 * calA) * aview(i + 1, j, k) + calA * aview(i + 1, j, k + 1)) + + ndisp.a2 * (calA * aview(i - 1, j, k - 1) + (1 - 2 * calA) * aview(i - 1, j, k) + calA * aview(i - 1, j, k + 1)) + + ndisp.a4 * (calA * aview(i, j + 1, k - 1) + (1 - 2 * calA) * aview(i, j + 1, k) + calA * aview(i, j + 1, k + 1)) + + ndisp.a4 * (calA * aview(i, j - 1, k - 1) + (1 - 2 * calA) * aview(i, j - 1, k) + calA * aview(i, j - 1, k + 1)) + + ndisp.a6 * aview(i, j, k + 1) + ndisp.a6 * aview(i, j, k - 1) + ndisp.a8 * source_view(i, j, k); + } + }); + Kokkos::fence(); + A_np1.fillHalo(); + A_np1.getFieldBC().apply(A_np1); + } + void evaluate_EB(){ + ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView(); + auto source = Maxwell::source_mp->getView(); + auto Eview = Maxwell::En_mp->getView(); + auto Bview = Maxwell::Bn_mp->getView(); + + Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{ + dAdx[0], dAdy[0], dAdz[0] + }; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + Eview(i,j,k) = -dAdt - grad_phi; + Bview(i,j,k) = curlA; + }); + Kokkos::fence(); + } + + + + + typename FourField::Mesh_t* mesh_mp; + FieldLayout* layout_mp; + NDIndex domain_m; + Vector_t hr_m; + scalar dt; + Vector nr_m; + bool periodic_bc; + + void initialize() { + // get layout and mesh + layout_mp = &(this->source_mp->getLayout()); + mesh_mp = &(this->source_mp->get_mesh()); + + // get mesh spacing, domain, and mesh size + hr_m = mesh_mp->getMeshSpacing(); + dt = hr_m[2]; + domain_m = layout_mp->getDomain(); + for (unsigned int i = 0; i < Dim; ++i) + nr_m[i] = domain_m[i].length(); + + // initialize fields + A_nm1.initialize(*mesh_mp, *layout_mp); + A_n.initialize(*mesh_mp, *layout_mp); + A_np1.initialize(*mesh_mp, *layout_mp); + + A_nm1 = 0.0; + A_n = 0.0; + A_np1 = 0.0; + }; + }; } #endif \ No newline at end of file diff --git a/src/MaxwellSolvers/Maxwell.h b/src/MaxwellSolvers/Maxwell.h index c493a1c8d..21e2a2c58 100644 --- a/src/MaxwellSolvers/Maxwell.h +++ b/src/MaxwellSolvers/Maxwell.h @@ -15,32 +15,31 @@ namespace ippl { - template + template class Maxwell { public: // typedefs for the different fields and vector fields - using typeR = typename SourceField::value_type; + using typeR = typename FourField::value_type; constexpr static unsigned Dim = EMField::dim; typedef typename EMField::Mesh_t Mesh_t; typedef typename EMField::Centering_t Centering_t; typedef Vector Vector_t; - typedef Field VectorSourceField_t; // define type for field layout typedef FieldLayout FieldLayout_t; // type for communication buffers - using memory_space = typename SourceField::memory_space; - using buffer_type = Communicate::buffer_type; + using memory_space = typename FourField::memory_space; + //using buffer_type = Communicate::buffer_type; /*! * Default constructor for Maxwell solvers; */ Maxwell() {} - Maxwell(SourceField& charge, VectorSourceField_t& current, EMField& E, EMField& B) { - setSources(charge, current); + Maxwell(FourField& source, EMField& E, EMField& B) { + setSource(source); setEMFields(E, B); } @@ -49,9 +48,8 @@ namespace ippl { * @param charge Reference to rho field * @param current Reference to J field */ - virtual void setSources(SourceField& charge, VectorSourceField_t& current) { - rhoN_mp = &charge; - JN_mp = ¤t; + virtual void setSource(FourField& charge) { + source_mp = &charge; } /*! @@ -73,8 +71,7 @@ namespace ippl { protected: // fields containing reference to charge and current - SourceField* rhoN_mp = nullptr; - VectorSourceField_t* JN_mp = nullptr; + FourField* source_mp = nullptr; // E and B fields EMField* En_mp = nullptr; diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 433f16ec1..6cf9d4d15 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -1,2437 +1,136 @@ +#include +using std::size_t; +#include #include "Ippl.h" - #include "Types/Vector.h" - #include "Field/Field.h" -#include -#include // For popen #include "MaxwellSolvers/FDTD.h" -#define JSON_HAS_RANGES 0 //Merlin compilation -#include -#include -#include -#include #define STB_IMAGE_WRITE_IMPLEMENTATION -#include -constexpr float turbo_cm[256][3] = { - {0.18995,0.07176,0.23217}, - {0.19483,0.08339,0.26149}, - {0.19956,0.09498,0.29024}, - {0.20415,0.10652,0.31844}, - {0.20860,0.11802,0.34607}, - {0.21291,0.12947,0.37314}, - {0.21708,0.14087,0.39964}, - {0.22111,0.15223,0.42558}, - {0.22500,0.16354,0.45096}, - {0.22875,0.17481,0.47578}, - {0.23236,0.18603,0.50004}, - {0.23582,0.19720,0.52373}, - {0.23915,0.20833,0.54686}, - {0.24234,0.21941,0.56942}, - {0.24539,0.23044,0.59142}, - {0.24830,0.24143,0.61286}, - {0.25107,0.25237,0.63374}, - {0.25369,0.26327,0.65406}, - {0.25618,0.27412,0.67381}, - {0.25853,0.28492,0.69300}, - {0.26074,0.29568,0.71162}, - {0.26280,0.30639,0.72968}, - {0.26473,0.31706,0.74718}, - {0.26652,0.32768,0.76412}, - {0.26816,0.33825,0.78050}, - {0.26967,0.34878,0.79631}, - {0.27103,0.35926,0.81156}, - {0.27226,0.36970,0.82624}, - {0.27334,0.38008,0.84037}, - {0.27429,0.39043,0.85393}, - {0.27509,0.40072,0.86692}, - {0.27576,0.41097,0.87936}, - {0.27628,0.42118,0.89123}, - {0.27667,0.43134,0.90254}, - {0.27691,0.44145,0.91328}, - {0.27701,0.45152,0.92347}, - {0.27698,0.46153,0.93309}, - {0.27680,0.47151,0.94214}, - {0.27648,0.48144,0.95064}, - {0.27603,0.49132,0.95857}, - {0.27543,0.50115,0.96594}, - {0.27469,0.51094,0.97275}, - {0.27381,0.52069,0.97899}, - {0.27273,0.53040,0.98461}, - {0.27106,0.54015,0.98930}, - {0.26878,0.54995,0.99303}, - {0.26592,0.55979,0.99583}, - {0.26252,0.56967,0.99773}, - {0.25862,0.57958,0.99876}, - {0.25425,0.58950,0.99896}, - {0.24946,0.59943,0.99835}, - {0.24427,0.60937,0.99697}, - {0.23874,0.61931,0.99485}, - {0.23288,0.62923,0.99202}, - {0.22676,0.63913,0.98851}, - {0.22039,0.64901,0.98436}, - {0.21382,0.65886,0.97959}, - {0.20708,0.66866,0.97423}, - {0.20021,0.67842,0.96833}, - {0.19326,0.68812,0.96190}, - {0.18625,0.69775,0.95498}, - {0.17923,0.70732,0.94761}, - {0.17223,0.71680,0.93981}, - {0.16529,0.72620,0.93161}, - {0.15844,0.73551,0.92305}, - {0.15173,0.74472,0.91416}, - {0.14519,0.75381,0.90496}, - {0.13886,0.76279,0.89550}, - {0.13278,0.77165,0.88580}, - {0.12698,0.78037,0.87590}, - {0.12151,0.78896,0.86581}, - {0.11639,0.79740,0.85559}, - {0.11167,0.80569,0.84525}, - {0.10738,0.81381,0.83484}, - {0.10357,0.82177,0.82437}, - {0.10026,0.82955,0.81389}, - {0.09750,0.83714,0.80342}, - {0.09532,0.84455,0.79299}, - {0.09377,0.85175,0.78264}, - {0.09287,0.85875,0.77240}, - {0.09267,0.86554,0.76230}, - {0.09320,0.87211,0.75237}, - {0.09451,0.87844,0.74265}, - {0.09662,0.88454,0.73316}, - {0.09958,0.89040,0.72393}, - {0.10342,0.89600,0.71500}, - {0.10815,0.90142,0.70599}, - {0.11374,0.90673,0.69651}, - {0.12014,0.91193,0.68660}, - {0.12733,0.91701,0.67627}, - {0.13526,0.92197,0.66556}, - {0.14391,0.92680,0.65448}, - {0.15323,0.93151,0.64308}, - {0.16319,0.93609,0.63137}, - {0.17377,0.94053,0.61938}, - {0.18491,0.94484,0.60713}, - {0.19659,0.94901,0.59466}, - {0.20877,0.95304,0.58199}, - {0.22142,0.95692,0.56914}, - {0.23449,0.96065,0.55614}, - {0.24797,0.96423,0.54303}, - {0.26180,0.96765,0.52981}, - {0.27597,0.97092,0.51653}, - {0.29042,0.97403,0.50321}, - {0.30513,0.97697,0.48987}, - {0.32006,0.97974,0.47654}, - {0.33517,0.98234,0.46325}, - {0.35043,0.98477,0.45002}, - {0.36581,0.98702,0.43688}, - {0.38127,0.98909,0.42386}, - {0.39678,0.99098,0.41098}, - {0.41229,0.99268,0.39826}, - {0.42778,0.99419,0.38575}, - {0.44321,0.99551,0.37345}, - {0.45854,0.99663,0.36140}, - {0.47375,0.99755,0.34963}, - {0.48879,0.99828,0.33816}, - {0.50362,0.99879,0.32701}, - {0.51822,0.99910,0.31622}, - {0.53255,0.99919,0.30581}, - {0.54658,0.99907,0.29581}, - {0.56026,0.99873,0.28623}, - {0.57357,0.99817,0.27712}, - {0.58646,0.99739,0.26849}, - {0.59891,0.99638,0.26038}, - {0.61088,0.99514,0.25280}, - {0.62233,0.99366,0.24579}, - {0.63323,0.99195,0.23937}, - {0.64362,0.98999,0.23356}, - {0.65394,0.98775,0.22835}, - {0.66428,0.98524,0.22370}, - {0.67462,0.98246,0.21960}, - {0.68494,0.97941,0.21602}, - {0.69525,0.97610,0.21294}, - {0.70553,0.97255,0.21032}, - {0.71577,0.96875,0.20815}, - {0.72596,0.96470,0.20640}, - {0.73610,0.96043,0.20504}, - {0.74617,0.95593,0.20406}, - {0.75617,0.95121,0.20343}, - {0.76608,0.94627,0.20311}, - {0.77591,0.94113,0.20310}, - {0.78563,0.93579,0.20336}, - {0.79524,0.93025,0.20386}, - {0.80473,0.92452,0.20459}, - {0.81410,0.91861,0.20552}, - {0.82333,0.91253,0.20663}, - {0.83241,0.90627,0.20788}, - {0.84133,0.89986,0.20926}, - {0.85010,0.89328,0.21074}, - {0.85868,0.88655,0.21230}, - {0.86709,0.87968,0.21391}, - {0.87530,0.87267,0.21555}, - {0.88331,0.86553,0.21719}, - {0.89112,0.85826,0.21880}, - {0.89870,0.85087,0.22038}, - {0.90605,0.84337,0.22188}, - {0.91317,0.83576,0.22328}, - {0.92004,0.82806,0.22456}, - {0.92666,0.82025,0.22570}, - {0.93301,0.81236,0.22667}, - {0.93909,0.80439,0.22744}, - {0.94489,0.79634,0.22800}, - {0.95039,0.78823,0.22831}, - {0.95560,0.78005,0.22836}, - {0.96049,0.77181,0.22811}, - {0.96507,0.76352,0.22754}, - {0.96931,0.75519,0.22663}, - {0.97323,0.74682,0.22536}, - {0.97679,0.73842,0.22369}, - {0.98000,0.73000,0.22161}, - {0.98289,0.72140,0.21918}, - {0.98549,0.71250,0.21650}, - {0.98781,0.70330,0.21358}, - {0.98986,0.69382,0.21043}, - {0.99163,0.68408,0.20706}, - {0.99314,0.67408,0.20348}, - {0.99438,0.66386,0.19971}, - {0.99535,0.65341,0.19577}, - {0.99607,0.64277,0.19165}, - {0.99654,0.63193,0.18738}, - {0.99675,0.62093,0.18297}, - {0.99672,0.60977,0.17842}, - {0.99644,0.59846,0.17376}, - {0.99593,0.58703,0.16899}, - {0.99517,0.57549,0.16412}, - {0.99419,0.56386,0.15918}, - {0.99297,0.55214,0.15417}, - {0.99153,0.54036,0.14910}, - {0.98987,0.52854,0.14398}, - {0.98799,0.51667,0.13883}, - {0.98590,0.50479,0.13367}, - {0.98360,0.49291,0.12849}, - {0.98108,0.48104,0.12332}, - {0.97837,0.46920,0.11817}, - {0.97545,0.45740,0.11305}, - {0.97234,0.44565,0.10797}, - {0.96904,0.43399,0.10294}, - {0.96555,0.42241,0.09798}, - {0.96187,0.41093,0.09310}, - {0.95801,0.39958,0.08831}, - {0.95398,0.38836,0.08362}, - {0.94977,0.37729,0.07905}, - {0.94538,0.36638,0.07461}, - {0.94084,0.35566,0.07031}, - {0.93612,0.34513,0.06616}, - {0.93125,0.33482,0.06218}, - {0.92623,0.32473,0.05837}, - {0.92105,0.31489,0.05475}, - {0.91572,0.30530,0.05134}, - {0.91024,0.29599,0.04814}, - {0.90463,0.28696,0.04516}, - {0.89888,0.27824,0.04243}, - {0.89298,0.26981,0.03993}, - {0.88691,0.26152,0.03753}, - {0.88066,0.25334,0.03521}, - {0.87422,0.24526,0.03297}, - {0.86760,0.23730,0.03082}, - {0.86079,0.22945,0.02875}, - {0.85380,0.22170,0.02677}, - {0.84662,0.21407,0.02487}, - {0.83926,0.20654,0.02305}, - {0.83172,0.19912,0.02131}, - {0.82399,0.19182,0.01966}, - {0.81608,0.18462,0.01809}, - {0.80799,0.17753,0.01660}, - {0.79971,0.17055,0.01520}, - {0.79125,0.16368,0.01387}, - {0.78260,0.15693,0.01264}, - {0.77377,0.15028,0.01148}, - {0.76476,0.14374,0.01041}, - {0.75556,0.13731,0.00942}, - {0.74617,0.13098,0.00851}, - {0.73661,0.12477,0.00769}, - {0.72686,0.11867,0.00695}, - {0.71692,0.11268,0.00629}, - {0.70680,0.10680,0.00571}, - {0.69650,0.10102,0.00522}, - {0.68602,0.09536,0.00481}, - {0.67535,0.08980,0.00449}, - {0.66449,0.08436,0.00424}, - {0.65345,0.07902,0.00408}, - {0.64223,0.07380,0.00401}, - {0.63082,0.06868,0.00401}, - {0.61923,0.06367,0.00410}, - {0.60746,0.05878,0.00427}, - {0.59550,0.05399,0.00453}, - {0.58336,0.04931,0.00486}, - {0.57103,0.04474,0.00529}, - {0.55852,0.04028,0.00579}, - {0.54583,0.03593,0.00638}, - {0.53295,0.03169,0.00705}, - {0.51989,0.02756,0.00780}, - {0.50664,0.02354,0.00863}, - {0.49321,0.01963,0.00955}, - {0.47960,0.01583,0.01055} -}; - - - - -uint64_t nanoTime(){ - using namespace std; - using namespace chrono; - return duration_cast(high_resolution_clock::now().time_since_epoch()).count(); -} -//CONFIG -KOKKOS_INLINE_FUNCTION bool isNaN(float x){ - #ifdef __CUDA_ARCH__ - return isnan(x); - #else - return std::isnan(x); - #endif -} -KOKKOS_INLINE_FUNCTION bool isINF(float x){ - #ifdef __CUDA_ARCH__ - return isinf(x); - #else - return std::isinf(x); - #endif -} -KOKKOS_INLINE_FUNCTION bool isNaN(double x){ - #ifdef __CUDA_ARCH__ - return isnan(x); - #else - return std::isnan(x); - #endif -} -KOKKOS_INLINE_FUNCTION bool isINF(double x){ - #ifdef __CUDA_ARCH__ - return isinf(x); - #else - return std::isinf(x); - #endif -} -#define assert_isreal(X) assert(!isNaN(X) && !isINF(X)) -template -KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector from, ippl::Vector to, Ts... args){ - if constexpr(sizeof...(Ts) == Dim){ - c(args...); - } - else{ - for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){ - serial_for(c, from, to, args..., i); - } - } -} - - - - - -struct config { - using scalar = double; - - //using length_unit = funits::length; - //using duration_unit = funits::time; - // GRID PARAMETERS - ippl::Vector resolution; - - ippl::Vector extents; - scalar total_time; - scalar timestep_ratio; - - scalar length_scale_in_jobfile, temporal_scale_in_jobfile; - - // All in unit_charge, or unit_mass - scalar charge, mass; - - uint64_t num_particles; - bool space_charge; - - // BUNCH PARAMETERS - ippl::Vector mean_position; - ippl::Vector sigma_position; - ippl::Vector position_truncations; - ippl::Vector sigma_momentum; - scalar bunch_gamma; - - scalar undulator_K; - scalar undulator_period; - scalar undulator_length; - - uint32_t output_rhythm; - std::string output_path; - std::unordered_map experiment_options; -}; -template -ippl::Vector getVector(const nlohmann::json& j){ - if(j.is_array()){ - assert(j.size() == Dim); - ippl::Vector ret; - for(unsigned i = 0;i < Dim;i++) - ret[i] = (scalar)j[i]; - return ret; - } - else{ - std::cerr << "Warning: Obtaining Vector from scalar json\n"; - ippl::Vector ret; - ret.fill((scalar)j); - return ret; - } -} -template -struct DefaultedStringLiteral { - constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) { - std::copy_n(str, N, key); - } - - T value; - char key[N]; -}; -template -struct StringLiteral { - constexpr StringLiteral(const char (&str)[N]) { - std::copy_n(str, N, value); - } - - char value[N]; - constexpr DefaultedStringLiteral operator>>(int t)const noexcept{ - return DefaultedStringLiteral(value, t); - } - constexpr size_t size()const noexcept{return N - 1;} -}; -template -constexpr size_t chash(){ - size_t hash = 5381; - int c; - - for(size_t i = 0;i < lit.size();i++){ - c = lit.value[i]; - hash = ((hash << 5) + hash) + c; // hash * 33 + c - } - - return hash; -} -size_t chash(const char* val) { - size_t hash = 5381; - int c; - - while ((c = *val++)) { - hash = ((hash << 5) + hash) + c; // hash * 33 + c - } - - return hash; -} -size_t chash(const std::string& _val) { - size_t hash = 5381; - const char* val = _val.c_str(); - int c; - - while ((c = *val++)) { - hash = ((hash << 5) + hash) + c; // hash * 33 + c - } - - return hash; -} -std::string lowercase_singular(std::string str) { - // Convert string to lowercase - std::transform(str.begin(), str.end(), str.begin(), ::tolower); - - // Check if the string ends with "s" and remove it if it does - if (!str.empty() && str.back() == 's') { - str.pop_back(); - } - - return str; -} -double get_time_multiplier(const nlohmann::json& j){ - std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]); - double time_factor = 1.0; - switch (chash(length_scale_string)) { - case chash<"planck-time">(): - case chash<"plancktime">(): - case chash<"pt">(): - case chash<"natural">(): - time_factor = unit_time_in_seconds; - break; - case chash<"picosecond">(): - time_factor = 1e-12; - break; - case chash<"nanosecond">(): - time_factor = 1e-9; - break; - case chash<"microsecond">(): - time_factor = 1e-6; - break; - case chash<"millisecond">(): - time_factor = 1e-3; - break; - case chash<"second">(): - time_factor = 1.0; - break; - default: - std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n"; - break; - } - return time_factor; -} -double get_length_multiplier(const nlohmann::json& options){ - std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]); - double length_factor = 1.0; - switch (chash(length_scale_string)) { - case chash<"planck-length">(): - case chash<"plancklength">(): - case chash<"pl">(): - case chash<"natural">(): - length_factor = unit_length_in_meters; - break; - case chash<"picometer">(): - length_factor = 1e-12; - break; - case chash<"nanometer">(): - length_factor = 1e-9; - break; - case chash<"micrometer">(): - length_factor = 1e-6; - break; - case chash<"millimeter">(): - length_factor = 1e-3; - break; - case chash<"meter">(): - length_factor = 1.0; - break; - default: - std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n"; - break; - } - return length_factor; -} -config read_config(const char *filepath){ - std::ifstream cfile(filepath); - nlohmann::json j; - cfile >> j; - config::scalar lmult = get_length_multiplier(j); - config::scalar tmult = get_time_multiplier(j); - config ret; - - ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters; - ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters; - ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters; - ret.resolution = getVector(j["mesh"]["resolution"]); - - //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n"; - //std::cerr << "Tmult: " << tmult << "\n"; - if(j.contains("timestep-ratio")){ - ret.timestep_ratio = (config::scalar)j["timestep-ratio"]; - } - - else{ - ret.timestep_ratio = 1; - } - ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds; - ret.space_charge = (bool)(j["mesh"]["space-charge"]); - ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]); - if(ret.bunch_gamma < config::scalar(1)){ - std::cerr << "Gamma must be >= 1\n"; - exit(1); - } - assert(j.contains("undulator")); - assert(j["undulator"].contains("static-undulator")); - - ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"]; - ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters; - ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters; - assert(!std::isnan(ret.undulator_length)); - assert(!std::isnan(ret.undulator_period)); - assert(!std::isnan(ret.extents[0])); - assert(!std::isnan(ret.extents[1])); - assert(!std::isnan(ret.extents[2])); - assert(!std::isnan(ret.total_time)); - ret.length_scale_in_jobfile = get_length_multiplier(j); - ret.temporal_scale_in_jobfile = get_time_multiplier(j); - ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges; - ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses; - ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"]; - ret.mean_position = getVector(j["bunch"]["position"]) * lmult / unit_length_in_meters; - ret.sigma_position = getVector(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters; - ret.position_truncations = getVector(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters; - ret.sigma_momentum = getVector(j["bunch"]["sigma-momentum"]); - ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0; - ret.output_path = "../data/"; - if(j["output"].contains("path")){ - ret.output_path = j["output"]["path"]; - if(!ret.output_path.ends_with('/')){ - ret.output_path.push_back('/'); - } - } - if(j.contains("experimentation")){ - nlohmann::json je = j["experimentation"]; - for(auto it = je.begin(); it!= je.end();it++){ - ret.experiment_options[it.key()] = double(it.value()); - } - } - return ret; -} -template -using FieldVector = ippl::Vector; -template -struct BunchInitialize { - - /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is - * manual the charge at points of the position vector will be produced. */ - // std::string bunchType_; - - /* Type of the distributions (transverse or longitudinal) in the bunch. */ - std::string distribution_; - - /* Type of the generator for creating the bunch distribution. */ - std::string generator_; - - /* Total number of macroparticles in the bunch. */ - unsigned int numberOfParticles_; - - /* Total charge of the bunch in pC. */ - scalar cloudCharge_; - - /* Initial energy of the bunch in MeV. */ - scalar initialGamma_; - - /* Initial normalized speed of the bunch. */ - scalar initialBeta_; - - /* Initial movement direction of the bunch, which is a unit vector. */ - FieldVector initialDirection_; - - /* Position of the center of the bunch in the unit of length scale. */ - // std::vector > position_; - FieldVector position_; - - /* Number of macroparticles in each direction for 3Dcrystal type. */ - FieldVector numbers_; - - /* Lattice constant in x, y, and z directions for 3D crystal type. */ - FieldVector latticeConstants_; - - /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal - * type, it will be the spread in position for each micro-bunch of the crystal. */ - FieldVector sigmaPosition_; - - /* Spread in energy in each direction. */ - FieldVector sigmaGammaBeta_; - - /* Store the truncation transverse distance for the electron generation. */ - scalar tranTrun_; - - /* Store the truncation longitudinal distance for the electron generation. */ - scalar longTrun_; - - /* Name of the file for reading the electrons distribution from. */ - std::string fileName_; - - /* The radiation wavelength corresponding to the bunch length outside the undulator */ - scalar lambda_; - - /* Bunching factor for the initialization of the bunch. */ - scalar bF_; - - /* Phase of the bunching factor for the initialization of the bunch. */ - scalar bFP_; - - /* Boolean flag determining the activation of shot-noise. */ - bool shotNoise_; - - /* Initial beta vector of the bunch, which is obtained as the product of beta and direction. */ - FieldVector betaVector_; - - /* Initialize the parameters for the bunch initialization to some first values. */ - // BunchInitialize (); -}; - - - - - -//END CONFIG - -//LORENTZ FRAME AND UNDULATOR -template -struct UniaxialLorentzframe{ - constexpr static T c = 1.0; - using scalar = T; - using Vector3 = ippl::Vector; - scalar beta_m; - scalar gammaBeta_m; - scalar gamma_m; - KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){ - gammaBeta_m = gammaBeta; - beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta)); - gamma_m = sqrt(1 + gammaBeta * (gammaBeta)); - } - KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{ - arg[axis] = gamma_m * (arg[axis] + beta_m * time); - } - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB(const Kokkos::pair, ippl::Vector>& unprimedEB)const noexcept{ - - Kokkos::pair, ippl::Vector> ret; - ippl::Vector betavec{0, 0, beta_m}; - ret.first = ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); - ret.second = ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); - ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; - ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; - return ret; - } - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> inverse_transform_EB(const Kokkos::pair, ippl::Vector>& primedEB)const noexcept{ - return UniaxialLorentzframe(-gammaBeta_m).transform_EB(primedEB); - } -}; -template -BunchInitialize generate_mithra_config(const config& cfg, const UniaxialLorentzframe& /*frame_boost unused*/) { - using vec3 = ippl::Vector; - scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K); - BunchInitialize init; - init.generator_ = "random"; - init.distribution_ = "uniform"; - init.initialDirection_ = vec3{0, 0, 1}; - init.initialGamma_ = cfg.bunch_gamma; - init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma); - init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast(); - init.sigmaPosition_ = cfg.sigma_position.template cast(); - - // TODO: Initial bunching factor huh - init.bF_ = 0.01; - init.bFP_ = 0; - init.shotNoise_ = false; - init.cloudCharge_ = cfg.charge; - init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma); - init.longTrun_ = cfg.position_truncations[2]; - init.tranTrun_ = cfg.position_truncations[0]; - init.position_ = cfg.mean_position.cast(); - init.betaVector_ = ippl::Vector{0,0,init.initialBeta_}; - init.numberOfParticles_ = cfg.num_particles; - - init.numbers_ = 0; // UNUSED - init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED - - return init; -} -template -struct Charge { - - Double q; /* Charge of the point in the unit of electron charge. */ - FieldVector rnp, rnm; /* Position vector of the charge. */ - FieldVector gb; /* Normalized velocity vector of the charge. */ - - /* Double flag determining if the particle is passing the entrance point of the undulator. This flag - * can be used for better boosting the bunch to the moving frame. We need to consider it to be double, - * because this flag needs to be communicated during bunch update. */ - Double e; - - // Charge(); -}; -template -using ChargeVector = std::list>; -template -void initializeBunchEllipsoid (BunchInitialize bunchInit, ChargeVector & chargeVector, int rank, int size, int ia){ - /* Correct the number of particles if it is not a multiple of four. */ - if ( bunchInit.numberOfParticles_ % 4 != 0 ) - { - unsigned int n = bunchInit.numberOfParticles_ % 4; - bunchInit.numberOfParticles_ += 4 - n; - //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") + - // std::string("It is corrected to ") + std::to_string(bunchInit.numberOfParticles_) ); - } - - /* Save the initially given number of particles. */ - unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size(); - - /* Declare the required parameters for the initialization of charge vectors. */ - Charge charge; charge.q = bunchInit.cloudCharge_ / Np; - FieldVector gb = bunchInit.initialGamma_ * bunchInit.betaVector_; - FieldVector r (0.0); - FieldVector t (0.0); - Double t0, g; - Double zmin = 1e100; - Double Ne, bF, bFi; - unsigned int bmi; - std::vector randomNumbers; - - /* The initialization in group of four particles should only be done if there exists an undulator in - * the interaction. */ - unsigned int ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4; - - /* Check the bunching factor. */ - if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 ) - { - //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") ); - //exit(1); - } - - /* If the generator is random we should make sure that different processors do not produce the same - * random numbers. */ - if ( bunchInit.generator_ == "random" ) - { - /* Initialize the random number generator. */ - srand ( time(NULL) ); - /* Np / ng * 20 is the maximum number of particles. */ - randomNumbers.resize( Np / ng * 20, 0.0); - for ( unsigned int ri = 0; ri < Np / ng * 20; ri++) - randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX)); - } - - /* Declare the generator function depending on the input. */ - auto generate = [&] (unsigned int n, unsigned int m) { - //if ( bunchInit.generator_ == "random" ) - return ( randomNumbers.at( n * 2 * Np/ng + m ) ); - //else - // return ( randomNumbers[ n * 2 * Np/ng + m ] ); - //TODO: Return halton properly - //return ( halton(n,m) ); - }; - - /* Declare the function for injecting the shot noise. */ - auto insertCharge = [&] (Charge q) { - - for ( unsigned int ii = 0; ii < ng; ii++ ) - { - /* The random modulation is introduced depending on the shot-noise being activated. */ - if ( bunchInit.shotNoise_ ) - { - /* Obtain the number of beamlet. */ - bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ ); - - /* Obtain the phase and amplitude of the modulation. */ - bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) ); - - q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; - - q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) ); - } - else if ( bunchInit.lambda_ != 0.0) - { - q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; - - q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 ); - } - - /* Set this charge into the charge vector. */ - chargeVector.push_back(q); - } - }; - - /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to - * calculate the FEL bucket number. */ - if ( bunchInit.shotNoise_ ) - { - for (i = 0; i < Np / ng; i++) - { - if ( bunchInit.distribution_ == "uniform" ) - zmin = std::min( Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin ); - else if ( bunchInit.distribution_ == "gaussian" ) - zmin = std::min( (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin ); - else - { - //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") ); - exit(1); - } - } - - if ( bunchInit.distribution_ == "uniform" ) - for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++) - { - t0 = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) ); - t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); - - zmin = std::min( t0 , zmin ); - } - - //zmin = zmin + bunchInit.position_[ia][2]; - zmin = zmin + bunchInit.position_[2]; - - /* Obtain the average number of electrons per FEL beamlet. */ - Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] ); - - /* Set the bunching factor level for the shot noise depending on the given values. */ - bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_; - - //printmessage(std::string(__FILE__), __LINE__, std::string("The standard deviation of the bunching factor for the shot noise implementation is set to ") + stringify(bF) ); - } - - /* Determine the properties of each charge point and add them to the charge vector. */ - for (i = rank; i < Np / ng; i += size) - { - /* Determine the transverse coordinate. */ - r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); - r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); - - /* Determine the longitudinal coordinate. */ - if ( bunchInit.distribution_ == "uniform" ) - r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2]; - else if ( bunchInit.distribution_ == "gaussian" ) - r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); - else - { - //printmessage(std::string(__FILE__), __LINE__, std::string("The longitudinal type is not correctly given to the code !!!") ); - exit(1); - } - - /* Determine the transverse momentum. */ - t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); - t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); - t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); - - if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) - { - /* Shift the generated charge to the center position and momentum space. */ - //charge.rnp = bunchInit.position_[ia]; - charge.rnp = bunchInit.position_; - charge.rnp += r; - - charge.gb = gb; - charge.gb += t; - //std::cout << gb << "\n"; - if(std::isinf(gb[2])){ - std::cerr << "it klonked here\n"; - } - - /* Insert this charge and the mirrored ones into the charge vector. */ - insertCharge(charge); - } - } - - /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the - * CSE from the tail of the bunch. */ - if ( bunchInit.distribution_ == "uniform" ){ - for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size) - { - - r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); - r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); - - /* Determine the longitudinal coordinate. */ - r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); - r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); - - /* Determine the transverse momentum. */ - t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); - t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); - t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); - //std::cerr << "DOING UNIFORM tapering!!!\n"; - if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) - { - //std::cerr << "ACTUALLY DOING UNIFORM tapering!!!\n"; - /* Shift the generated charge to the center position and momentum space. */ - charge.rnp = bunchInit.position_[ia]; - charge.rnp += r; - - charge.gb = gb; - - charge.gb += t; - //std::cout << gb[0] << "\n"; - //if(std::isinf(gb.squaredNorm())){ - // std::cerr << "it klonked here\n"; - //} - /* Insert this charge and the mirrored ones into the charge vector. */ - insertCharge(charge); - } - } - } - - /* Reset the value for the number of particle variable according to the installed number of - * macro-particles and perform the corresponding changes. */ - bunchInit.numberOfParticles_ = chargeVector.size(); -} - -template -void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma){ - Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma); - Double zmaxL = -1.0e100, zmaxG; - for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) - { - Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); - if(std::isinf(g)){ - std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n"; - abort(); - } - Double bz = iterQ->gb[2] / g; - iterQ->rnp[2] *= frame_gamma; - - iterQ->gb[2] = frame_gamma * g * ( bz - frame_beta ); - - zmaxL = std::max( zmaxL , iterQ->rnp[2] ); - } - zmaxG = zmaxL; - struct { - Double zu_; - Double beta_; - } bunch_; - bunch_.zu_ = zmaxG; - bunch_.beta_ = frame_beta; - - /****************************************************************************************************/ - - for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) - { - Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); - iterQ->rnp[0] += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; - iterQ->rnp[1] += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; - iterQ->rnp[2] += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; - if(std::isnan(iterQ->rnp[2])){ - std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_ << ", " << frame_beta << "\n"; - std::cerr << __FILE__ << ": " << __LINE__ << " OOOOOF\n"; - abort(); - } - } -} - - - - -template -size_t initialize_bunch_mithra( - bunch_type& bunch, - const BunchInitialize& bunchInit, - scalar frame_gamma){ - - ChargeVector oof; - initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0); - for(auto& c : oof){ - if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) - std::cout << "Pos before boost: " << c.rnp << "\n"; - if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2])) - std::cout << "Pos before boost: " << c.rnp << "\n"; - } - boost_bunch(oof, frame_gamma); - for(auto& c : oof){ - if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){ - std::cout << "Pos after boost: " << c.rnp << "\n"; - break; - } - } - Kokkos::View*, Kokkos::HostSpace> positions("", oof.size()); - Kokkos::View*, Kokkos::HostSpace> gammabetas("", oof.size()); - auto iterQ = oof.begin(); - for (size_t i = 0; i < oof.size(); i++) { - assert_isreal(iterQ->gb[0]); - assert_isreal(iterQ->gb[1]); - assert_isreal(iterQ->gb[2]); - assert(iterQ->gb[2] != 0.0f); - scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); - assert_isreal(g); - scalar bz = iterQ->gb[2] / g; - assert_isreal(bz); - (void)bz; - positions(i) = iterQ->rnp; - gammabetas(i) = iterQ->gb; - ++iterQ; - } - if(oof.size() > bunch.getLocalNum()){ - bunch.create(oof.size() - bunch.getLocalNum()); - } - Kokkos::View*> dpositions ("", oof.size()); - Kokkos::View*> dgammabetas("", oof.size()); - - Kokkos::deep_copy(dpositions, positions); - Kokkos::deep_copy(dgammabetas, gammabetas); - Kokkos::deep_copy(bunch.R_nm1.getView(), positions); - Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas); - auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();; - Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){ - rview(i) = dpositions(i); - rm1view(i) = dpositions(i); - gbview(i) = dgammabetas(i); - }); - Kokkos::fence(); - - return oof.size(); -} - - - - - - - -//END LORENTZ FRAME AND UNDULATOR AND BUNCH - - - - - -//PREAMBLE - +#include template requires((std::is_floating_point_v)) KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ uint32_t dim = sizeof...(scalar); ippl::Vector vec{scalar1(x - mean)...}; + scalar1 vecsum(0); for(unsigned d = 0;d < dim;d++){ - vec[d] = vec[d] * vec[d]; + vecsum += vec[d] * vec[d]; + } #ifndef __CUDA_ARCH__ using std::exp; #endif - return exp(-(vec.sum()) / (stddev * stddev)); -} - -constexpr double sq(double x){ - return x * x; -} -constexpr float sq(float x){ - return x * x; + return exp(-(vecsum) / (stddev * stddev)); } - -template -constexpr KOKKOS_INLINE_FUNCTION auto first(){ - return a; -} -template -constexpr KOKKOS_INLINE_FUNCTION auto second(){ - return b; -} - -template -KOKKOS_INLINE_FUNCTION ippl::Vector cross_prod(const ippl::Vector& a, - const ippl::Vector& b) { - ippl::Vector ret{0.0, 0.0, 0.0}; - ret[0] = a[1] * b[2] - a[2] * b[1]; - ret[1] = a[2] * b[0] - a[0] * b[2]; - ret[2] = a[0] * b[1] - a[1] * b[0]; - return ret; -} -template -KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos){ - //return pear, ippl::Vector>{ippl::Vector{5,5,5}, ippl::Vector{0,0,0}}; - //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); - Kokkos::pair, ippl::Vector> ret; - //pos -= spacing * T(0.5); - ippl::Vector relpos = pos - origin; - ippl::Vector gridpos = relpos / hr; - ippl::Vector ipos; - ipos = gridpos.template cast(); - ippl::Vector fracpos; - for(unsigned k = 0;k < 3;k++){ - fracpos[k] = gridpos[k] - (int)ipos[k]; - } - //TODO: NGHOST!!!!!!! - ipos += ippl::Vector(1); - ret.first = ipos; - ret.second = fracpos; - return ret; -} -template -KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ - auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); - ipos -= ldom.first(); - //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; - if( - ipos[0] < 0 - ||ipos[1] < 0 - ||ipos[2] < 0 - ||size_t(ipos[0]) >= view.extent(0) - 1 - ||size_t(ipos[1]) >= view.extent(1) - 1 - ||size_t(ipos[2]) >= view.extent(2) - 1 - ||fracpos[0] < 0 - ||fracpos[1] < 0 - ||fracpos[2] < 0 - ){ - return; - } - assert(fracpos[0] >= 0.0f); - assert(fracpos[0] <= 1.0f); - assert(fracpos[1] >= 0.0f); - assert(fracpos[1] <= 1.0f); - assert(fracpos[2] >= 0.0f); - assert(fracpos[2] <= 1.0f); - ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; - assert(one_minus_fracpos[0] >= 0.0f); - assert(one_minus_fracpos[0] <= 1.0f); - assert(one_minus_fracpos[1] >= 0.0f); - assert(one_minus_fracpos[1] <= 1.0f); - assert(one_minus_fracpos[2] >= 0.0f); - assert(one_minus_fracpos[2] <= 1.0f); - scalar accum = 0; - - for(unsigned i = 0;i < 8;i++){ - scalar weight = 1; - ippl::Vector ipos_l = ipos; - for(unsigned d = 0;d < 3;d++){ - weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); - ipos_l[d] += !!(i & (1 << d)); - } - assert_isreal(value); - assert_isreal(weight); - accum += weight; - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); - } - assert(abs(accum - 1.0f) < 1e-6f); -} -template -KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ - //std::cout << "Value: " << value << "\n"; - auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); - ipos -= ldom.first(); - if( - ipos[0] < 0 - ||ipos[1] < 0 - ||ipos[2] < 0 - ||size_t(ipos[0]) >= view.extent(0) - 1 - ||size_t(ipos[1]) >= view.extent(1) - 1 - ||size_t(ipos[2]) >= view.extent(2) - 1 - ||fracpos[0] < 0 - ||fracpos[1] < 0 - ||fracpos[2] < 0 - ){ - //std::cout << "out of bounds\n"; - return; - } - assert(fracpos[0] >= 0.0f); - assert(fracpos[0] <= 1.0f); - assert(fracpos[1] >= 0.0f); - assert(fracpos[1] <= 1.0f); - assert(fracpos[2] >= 0.0f); - assert(fracpos[2] <= 1.0f); - ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; - assert(one_minus_fracpos[0] >= 0.0f); - assert(one_minus_fracpos[0] <= 1.0f); - assert(one_minus_fracpos[1] >= 0.0f); - assert(one_minus_fracpos[1] <= 1.0f); - assert(one_minus_fracpos[2] >= 0.0f); - assert(one_minus_fracpos[2] <= 1.0f); - scalar accum = 0; - - for(unsigned i = 0;i < 8;i++){ - scalar weight = 1; - ippl::Vector ipos_l = ipos; - for(unsigned d = 0;d < 3;d++){ - weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); - ipos_l[d] += !!(i & (1 << d)); - } - assert_isreal(weight); - accum += weight; - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); - } - assert(abs(accum - 1.0f) < 1e-6f); -} - -template -KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ - - - Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); - Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to ); - //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]); - for(int d = 0;d < 3;d++){ - //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ - // std::cout <(nghost); - //to_ipos += ippl::Vector(nghost); - - if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); - - return; - } - ippl::Vector relay; - const int nghost = 1; - const ippl::Vector orig = origin; - using Kokkos::max; - using Kokkos::min; - for (unsigned int i = 0; i < 3; i++) { - relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i], - max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i], - scalar(0.5) * (to[i] + from[i]))); - } - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); -} - -// END PREAMBLE - - -//BEGIN ABSORBING BOUNDARY CONDITION -template -struct second_order_abc_face{ - using scalar = _scalar; - scalar Cweights[5]; - int sign; - constexpr static unsigned main_axis = _main_axis; - KOKKOS_FUNCTION second_order_abc_face(ippl::Vector hr, scalar dt, int _sign) : sign(_sign){ - constexpr scalar c = 1; - constexpr unsigned side_axes[2] = {_side_axes...}; - static_assert( - (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) || - (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) || - (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1) - ); - assert(_main_axis != side_axes[0]); - assert(_main_axis != side_axes[1]); - assert(side_axes[0] != side_axes[1]); - constexpr scalar truncation_order = 2.0; - scalar p = ( 1.0 + 1 * 1 ) / ( 1 + 1 ); - scalar q = - 1.0 / ( 1 + 1 ); - - scalar d = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt); - - Cweights[0] = ( 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; - Cweights[1] = ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; - assert(abs(Cweights[1] + 1) < 1e-6); //Like literally - Cweights[2] = ( p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d; - Cweights[3] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d; - Cweights[4] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d; - } - template - KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ - uint32_t i = c[0]; - uint32_t j = c[1]; - uint32_t k = c[2]; - using ippl::apply; - constexpr unsigned side_axes[2] = {_side_axes...}; - ippl::Vector side_axis1_onehot = ippl::Vector{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2}; - ippl::Vector side_axis2_onehot = ippl::Vector{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2}; - ippl::Vector mainaxis_off = ippl::Vector{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast(); - return advanceBoundaryS( - A_nm1(i,j,k), A_n(i,j,k), - apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off), - apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off), - apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot), apply(A_n, c - side_axis1_onehot), - apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot) - ); - } - template - KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 , - const value_type& v3 , const value_type& v4 , const value_type& v5 , - const value_type& v6 , const value_type& v7 , const value_type& v8 , - const value_type& v9 , const value_type& v10, const value_type& v11, - const value_type& v12, const value_type& v13)const noexcept - { - - value_type v0 = - Cweights[0] * (v1 + v5) + - (Cweights[1]) * v3 + - (Cweights[2]) * ( v2 + v4 ) + - (Cweights[3]) * ( v6 + v7 + v10 + v11 ) + - (Cweights[4]) * ( v8 + v9 + v12 + v13 ); - return v0; - } -}; -template -struct second_order_abc_edge{ - using scalar = _scalar; - // - scalar Eweights[5]; - - KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector hr, scalar dt){ - static_assert(normal_axis1 != normal_axis2); - static_assert(edge_axis != normal_axis2); - static_assert(edge_axis != normal_axis1); - static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0)); - constexpr scalar c0_ = scalar(1); - scalar d = ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt ); - if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z) - Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; - Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; - } - else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y) - Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; - Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; - } - else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x) - Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; - Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; - } - else{ - assert(false); - } - - - - - } - template - KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ - uint32_t i = c[0]; - uint32_t j = c[1]; - uint32_t k = c[2]; - using ippl::apply; - //constexpr unsigned nax[2] = {normal_axis1, normal_axis2}; - ippl::Vector normal_axis1_onehot = ippl::Vector{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1); - ippl::Vector normal_axis2_onehot = ippl::Vector{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1); - ippl::Vector acc0 = {i, j, k}; - ippl::Vector acc1 = acc0 + normal_axis1_onehot.cast(); - ippl::Vector acc2 = acc0 + normal_axis2_onehot.cast(); - ippl::Vector acc3 = acc0 + normal_axis1_onehot.cast() + normal_axis2_onehot.cast(); - //ippl::Vector axism = (-ippl::Vector{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast(); - ippl::Vector axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2}; - //return A_n(i, j, k); - return advanceEdgeS( - A_n(i, j, k), A_nm1(i, j, k), - apply(A_np1, acc1), apply(A_n, acc1 ), apply(A_nm1, acc1), - apply(A_np1, acc2), apply(A_n, acc2 ), apply(A_nm1, acc2), - apply(A_np1, acc3), apply(A_n, acc3 ), apply(A_nm1, acc3), - apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp), - apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp) - ); - } - template - KOKKOS_INLINE_FUNCTION value_type advanceEdgeS - ( value_type v1 , value_type v2 , - value_type v3 , value_type v4 , value_type v5 , - value_type v6 , value_type v7 , value_type v8 , - value_type v9 , value_type v10, value_type v11, - value_type v12, value_type v13, value_type v14, - value_type v15, value_type v16, value_type v17, - value_type v18, value_type v19)const noexcept{ - value_type v0 = - Eweights[0] * (v3 + v8) + - Eweights[1] * (v5 + v6) + - Eweights[2] * (v2 + v9) + - Eweights[3] * (v1 + v4 + v7 + v10) + - Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11; - return v0; - } -}; -template -struct second_order_abc_corner{ - using scalar = _scalar; - scalar Cweights[17]; - KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector hr, scalar dt){ - constexpr scalar c0_ = scalar(1); - Cweights[0] = ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[1] = ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[2] = ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[3] = ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[4] = ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[5] = ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[6] = ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[7] = ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[8] = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[9] = - ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[12] = - ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[13] = - ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[15] = - ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt); - } - template - KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ - //First implementation: 0,0,0 corner - constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1); - constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1); - constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1); - using ippl::apply; - const ippl::Vector offsets[8] = { - ippl::Vector{0,0,0}, - ippl::Vector{xoff,0,0}, - ippl::Vector{0,yoff,0}, - ippl::Vector{0,0,zoff}, - ippl::Vector{xoff,yoff,0}, - ippl::Vector{xoff,0,zoff}, - ippl::Vector{0,yoff,zoff}, - ippl::Vector{xoff,yoff,zoff}, - }; - return advanceCornerS( - apply(A_n, c), apply(A_nm1, c), - apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]), - apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]), - apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]), - apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]), - apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]), - apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]), - apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7]) - ); - } - template - KOKKOS_INLINE_FUNCTION value_type advanceCornerS - ( value_type v1 , value_type v2 , - value_type v3 , value_type v4 , value_type v5 , - value_type v6 , value_type v7 , value_type v8 , - value_type v9 , value_type v10, value_type v11, - value_type v12, value_type v13, value_type v14, - value_type v15, value_type v16, value_type v17, - value_type v18, value_type v19, value_type v20, - value_type v21, value_type v22, value_type v23)const noexcept{ - return - ( v1 * (Cweights[16]) + v2 * (Cweights[8]) + - v3 * Cweights[1] + v4 * Cweights[16] + v5 * Cweights[9] + - v6 * Cweights[2] + v7 * Cweights[16] + v8 * Cweights[10] + - v9 * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] + - v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] + - v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] + - v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] + - v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0]; - } -}; - - - - - - -struct second_order_mur_boundary_conditions{ - template - void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr, ippl::NDIndex<3> lDom){ - using scalar = decltype(dt); - //TODO: tbh don't know - //const unsigned nghost = 1; - const ippl::Vector hr = FA_n.get_mesh().getMeshSpacing(); - //assert_isreal((betaMur[0])); - //assert_isreal((betaMur[1])); - //assert_isreal((betaMur[2])); - auto A_n = FA_n.getView(); - auto A_np1 = FA_np1.getView(); - auto A_nm1 = FA_nm1.getView(); - ippl::Vector local_nr{ - uint32_t(A_n.extent(0)), - uint32_t(A_n.extent(1)), - uint32_t(A_n.extent(2)) - }; - constexpr uint32_t min_abc_boundary = 1; - constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1; - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t ig = i + lDom.first()[0]; - uint32_t jg = j + lDom.first()[1]; - uint32_t kg = k + lDom.first()[2]; - - uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); - - if(Kokkos::popcount(lval) > 1)return; - uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) - + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); - - if(Kokkos::popcount(val) == 1){ - if(ig == min_abc_boundary){ - second_order_abc_face soa(hr, dt, 1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(jg == min_abc_boundary){ - second_order_abc_face soa(hr, dt, 1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(kg == min_abc_boundary){ - second_order_abc_face soa(hr, dt, 1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(ig == true_nr[0] - max_abc_boundary_sub){ - second_order_abc_face soa(hr, dt, -1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(jg == true_nr[1] - max_abc_boundary_sub){ - second_order_abc_face soa(hr, dt, -1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_face soa(hr, dt, -1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - } - }); - Kokkos::fence(); - //FA_np1.fillHalo(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t ig = i + lDom.first()[0]; - uint32_t jg = j + lDom.first()[1]; - uint32_t kg = k + lDom.first()[2]; - - uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); - - if(Kokkos::popcount(lval) > 2)return; - uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) - + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); - if(Kokkos::popcount(val) == 2){ //Edge - if(ig == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - - else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - - else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - - else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else{ - assert(false); - } - } - }); - Kokkos::fence(); - //FA_np1.fillHalo(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t ig = i + lDom.first()[0]; - uint32_t jg = j + lDom.first()[1]; - uint32_t kg = k + lDom.first()[2]; - - //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - // + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); - - //if(Kokkos::popcount(lval) > 1)return; - uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) - + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); - - if(Kokkos::popcount(val) == 3){ - //printf("Corner: %d, %d, %d\n", i, j, k); - if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else{ - assert(false); - } - } - }); - Kokkos::fence(); - } -}; -//END ABSORBING BOUNDARY CONDITION - - - - - - - - - -namespace ippl { - - template - struct undulator_parameters{ - scalar lambda; //MITHRA: lambda_u - scalar K; //Undulator parameter - scalar length; - scalar B_magnitude; - undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){ - B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u); - //std::cout << "Setting bmag: " << B_magnitude << "\n"; - } - undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){ - B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda); - } - }; - - template - struct nondispersive{ - scalar a1; - scalar a2; - scalar a4; - scalar a6; - scalar a8; - }; - template - struct Bunch : public ippl::ParticleBase { - using scalar = _scalar; - - // Constructor for the Bunch class, taking a PLayout reference - Bunch(PLayout& playout) - : ippl::ParticleBase(playout) { - // Add attributes to the particle bunch - this->addAttribute(Q); // Charge attribute - this->addAttribute(mass); // Mass attribute - this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) - this->addAttribute(R_np1); // Position attribute for the next time step - this->addAttribute(R_nm1); // Position attribute for the next time step - this->addAttribute(EB_gather); // Electric field attribute for particle gathering - } - - // Destructor for the Bunch class - ~Bunch() {} - - // Define container types for various attributes - using charge_container_type = ippl::ParticleAttrib; - using velocity_container_type = ippl::ParticleAttrib>; - using vector_container_type = ippl::ParticleAttrib>; - using vector4_container_type = ippl::ParticleAttrib>; - - // Declare instances of the attribute containers - charge_container_type Q; // Charge container - charge_container_type mass; // Mass container - velocity_container_type gamma_beta; // Gamma-beta container - typename ippl::ParticleBase::particle_position_type R_np1; // Position container for the next time step - typename ippl::ParticleBase::particle_position_type R_nm1; // Position container for the previous time step - ippl::ParticleAttrib, 2>> EB_gather; // Electric field container for particle gathering - - }; - - - template - // clang-format off - struct FDTDFieldState{ - - //Sorry, can't do more than 3d - - constexpr static unsigned int dim = 3; - using Vector_t = ippl::Vector; - using value_type = ippl::Vector; - using EB_type = ippl::Vector, 2>; - using Mesh_t = ippl::UniformCartesian; - - bool periodic_bc; - - using VField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; - using EBField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; - using view_type = VField_t::view_type; - using ev_view_type = EBField_t::view_type; - //Fields - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_np1; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_n; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_nm1; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> J; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> EB; - - //Discretization options - Vector_t hr_m; - ippl::Vector nr_global; - ippl::Vector nr_local; - scalar dt; - Mesh_t* mesh_mp; - FieldLayout* layout_mp; - using playout_type = ParticleSpatialLayout; - playout_type playout; - Bunch> particles; - using bunch_type = Bunch>; - config m_config; - - /** - * @brief Construct a new FDTDFieldState object - * Mesh and resolution parameter are technically redundant - * - * @param resolution - * @param layout - * @param mesch - */ - FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){ - FA_np1.initialize(mesch, layout, 1); - FA_n.initialize(mesch, layout, 1); - FA_nm1.initialize(mesch, layout, 1); - J.initialize(mesch, layout, 1); - EB.initialize(mesch, layout, 1); - FA_n = value_type(0); - FA_np1 = value_type(0); - FA_nm1 = value_type(0); - hr_m = mesch.getMeshSpacing(); - nr_global = ippl::Vector{ - uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), - uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), - uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1) - }; - nr_local = ippl::Vector{ - uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1), - uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1), - uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1) - }; - //std::cout << "NR_M_g: " << nr_global << "\n"; - //std::cout << "NR_M_l: " << nr_local << "\n"; - dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2])); - particles.create(nparticles); - setNoBoundaryConditions(); - - } - void setNoBoundaryConditions() { - periodic_bc = false; - typename VField_t::BConds_t vector_bcs; - auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { - vector_bcs[Idx] = std::make_shared>(Idx); - return 0; - }; - auto bcsetter = [bcsetter_single](const std::index_sequence&) { - int x = (bcsetter_single(std::index_sequence{}) ^ ...); - (void)x; - }; - bcsetter(std::make_index_sequence{}); - FA_n .setFieldBC(vector_bcs); - FA_np1.setFieldBC(vector_bcs); - FA_nm1.setFieldBC(vector_bcs); - } - void setPeriodicBoundaryConditions() { - periodic_bc = true; - typename VField_t::BConds_t vector_bcs; - auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { - vector_bcs[Idx] = std::make_shared>(Idx); - return 0; - }; - auto bcsetter = [bcsetter_single](const std::index_sequence&) { - int x = (bcsetter_single(std::index_sequence{}) ^ ...); - (void)x; - }; - bcsetter(std::make_index_sequence{}); - FA_n .setFieldBC(vector_bcs); - FA_np1.setFieldBC(vector_bcs); - FA_nm1.setFieldBC(vector_bcs); - } - - void scatterBunch(){ - //ippl::Vector* gammaBeta = this->gammaBeta; - const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; - assert_isreal(volume); - assert_isreal((scalar(1) / volume)); - auto Jview = J.getView(); - auto qview = particles.Q.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto orig = mesh_mp->getOrigin(); - auto hr = mesh_mp->getMeshSpacing(); - auto dt = this->dt; - bool space_charge = m_config.space_charge; - ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); - Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - Vector_t pos = rview(i); - Vector_t to = rview(i); - Vector_t from = rm1view(i); - if(space_charge){ - scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); - } - scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); - }); - Kokkos::fence(); - J.accumulateHalo(); - } - - void fieldStep(){ - const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1]))); - nondispersive ndisp{ - .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])), - .a2 = sq(dt / hr_m[0]), - .a4 = sq(dt / hr_m[1]), - .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), - .a8 = sq(dt) - }; - //if(periodic_bc){ - // FA_n.getFieldBC().apply(FA_n); - //} - auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); - auto source = this->J.getView(); - //FA_nm1.fillHalo(); - FA_n.fillHalo(); - ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; - const auto& ldom = layout_mp->getLocalNDIndex(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - uint32_t ig = i + ldom.first()[0]; - uint32_t jg = j + ldom.first()[1]; - uint32_t kg = k + ldom.first()[2]; - uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2) - + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5); - if(!val){ - A_np1(i, j, k) = -A_nm1(i,j,k) - + ndisp.a1 * A_n (i,j,k) - + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1)) - + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1)) - + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1)) - + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1)) - + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k); - } - }); - Kokkos::fence(); - - if(!periodic_bc){ - FA_np1.fillHalo(); - second_order_mur_boundary_conditions bc; - - - bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom); - } - Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView()); - Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView()); - //std::swap(this->A_n, this->A_nm1); - //std::swap(this->A_np1, this->A_n); - - evaluate_EB(); - } - void evaluate_EB(){ - FA_n.fillHalo();//FA_nm1.fillHalo(); - ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; - const scalar idt = scalar(1.0) / dt; - auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); - auto source = this->J.getView(); - auto EBv = this->EB.getView(); - Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; - ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; - ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; - ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; - - ippl::Vector grad_phi{ - dAdx[0], dAdy[0], dAdz[0] - }; - ippl::Vector curlA{ - dAdy[3] - dAdz[2], - dAdz[1] - dAdx[3], - dAdx[2] - dAdy[1], - }; - EBv(i,j,k)[0] = -dAdt - grad_phi; - EBv(i,j,k)[1] = curlA; - }); - Kokkos::fence(); - } - template - void updateBunch(scalar time, UniaxialLorentzframe ulb, callable undulator_field){ - - Kokkos::fence(); - auto gbview = particles.gamma_beta.getView(); - auto ebview = particles.EB_gather.getView(); - auto qview = particles.Q.getView(); - auto mview = particles.mass.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto rp1view = particles.R_np1.getView(); - scalar bunch_dt = dt / 3; - Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); - EB.fillHalo(); - Kokkos::fence(); - for(int bts = 0;bts < 3;bts++){ - - particles.EB_gather.gather(EB, particles.R); - Kokkos::fence(); - Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - const ippl::Vector pgammabeta = gbview(i); - ippl::Vector, 2> EB = ebview(i); - ippl::Vector labpos = rview(i); - - ulb.primedToUnprimed(labpos, time); - - Kokkos::pair, ippl::Vector> EB_undulator_frame = undulator_field(labpos); - Kokkos::pair, ippl::Vector> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame); - assert_isreal((EB_undulator_bunch.first[0])); - assert_isreal((EB_undulator_bunch.first[1])); - assert_isreal((EB_undulator_bunch.first[2])); - assert_isreal((EB_undulator_bunch.second[0])); - assert_isreal((EB_undulator_bunch.second[1])); - assert_isreal((EB_undulator_bunch.second[2])); - EB[0] += EB_undulator_bunch.first; - EB[1] += EB_undulator_bunch.second; - - const scalar charge = qview(i); - const scalar mass = mview(i); - const ippl::Vector t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); - const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); - const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); - const ippl::Vector t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); - const ippl::Vector ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); - - assert_isreal((ngammabeta[0])); - assert_isreal((ngammabeta[1])); - assert_isreal((ngammabeta[2])); - rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); - gbview(i) = ngammabeta; - }); - Kokkos::fence(); - } - Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); - size_t invalid_count = 0; - auto origo = mesh_mp->getOrigin(); - ippl::Vector extenz;// - extenz[0] = nr_global[0] * hr_m[0]; - extenz[1] = nr_global[1] * hr_m[1]; - extenz[2] = nr_global[2] * hr_m[2]; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, particles.getLocalNum()), - KOKKOS_LAMBDA(size_t i, size_t& ref){ - bool out_of_bounds = false; - ippl::Vector ppos = rview(i); - for(size_t d = 0;d < dim;d++){ - out_of_bounds |= (ppos[d] <= origo[d]); - out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain - } - invalid(i) = out_of_bounds; - ref += out_of_bounds; - }, - invalid_count); - particles.destroy(invalid, invalid_count); - Kokkos::fence(); - - } - }; - // clang-format on -} // namespace ippl -bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { - const int channels = 3; // RGB - const int stride = width * channels; - std::vector flippedData(data, data + stride * height); - - // Use stb_image_write to write the BMP image to the file descriptor - if (!stbi_write_bmp_to_func( - [](void* context, void* data, int size) { - FILE* f = reinterpret_cast(context); - fwrite(data, 1, size, f); - }, - fd, width, height, channels, flippedData.data())) { - return false; - } - - return true; -} -template -KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector& pos, const ippl::Vector& origin, const ippl::Vector& hr, const ippl::NDIndex<3>& lDom){ - using vector_type = ippl::Vector; - - vector_type l; - //vector_type origin = v.get_mesh().getOrigin(); - //auto lDom = v.getLayout().getLocalNDIndex(); - //vector_type hr = v.get_mesh().getMeshSpacing(); - for(unsigned k = 0;k < 3;k++){ - l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong - } - - ippl::Vector index{int(l[0]), int(l[1]), int(l[2])}; - ippl::Vector whi = l - index; - ippl::Vector wlo(1.0); - wlo -= whi; - //TODO: nghost - ippl::Vector args = index - lDom.first() + 1; - for(unsigned k = 0;k < 3;k++){ - if(args[k] >= v.extent(k) || args[k] == 0){ - return typename View::value_type(0); - } - } - //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n"; - return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/; - -} -template -scalar test_gauss_law(uint32_t n){ - - ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); - ippl::Vector nr{n / 2, n / 2, 2 * n}; - ippl::Vector extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths}; - std::array isParallel; - isParallel.fill(false); - isParallel[2] = true; - - // all parallel layout, standard domain, normal axis order - ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); - - //[-1, 1] box - ippl::Vector hx = extents / nr.cast(); - using vector_type = ippl::Vector; - ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; - ippl::UniformCartesian mesh(owned, hx, origin); - - uint32_t pcount = 1 << 20; - ippl::FDTDFieldState field_state(layout, mesh, pcount, config{.space_charge = true}); - field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; - field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant - auto pview = field_state.particles.R.getView(); - auto p1view = field_state.particles.R_nm1.getView(); - - //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; - Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); - //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); - - Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ - //bunch.gammaBeta[i].fill(scalar(0)); - auto state = random_pool.get_state(); - pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - p1view(i) = pview(i); - random_pool.free_state(state); - }); - Kokkos::fence(); - field_state.J = scalar(0.0); - field_state.scatterBunch(); - for(size_t i = 0;i < 8*n;i++){ - field_state.fieldStep(); - } - field_state.evaluate_EB(); - Kokkos::fence(); - auto lDom = field_state.EB.getLayout().getLocalNDIndex(); - - std::ofstream line("gauss_line.txt"); - typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); - //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); - for(unsigned i = 1;i < nr[2];i++){ - vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; - pos[2] += hx[2] * scalar(i); - ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); - //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; - line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; - } - return 0.0f; -} -template -scalar test_amperes_law(uint32_t n){ - - ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); - ippl::Vector nr{n / 2, n / 2, 2 * n}; - ippl::Vector extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths}; - std::array isParallel; - isParallel.fill(false); - isParallel[2] = true; - - // all parallel layout, standard domain, normal axis order - ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); - - //[-1, 1] box - ippl::Vector hx; - for(unsigned d = 0;d < 3;d++){ - hx[d] = extents[d] / (scalar)nr[d]; - } - using vector_type = ippl::Vector; - ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; - ippl::UniformCartesian mesh(owned, hx, origin); - - uint32_t pcount = 1 << 20; - ippl::FDTDFieldState field_state(layout, mesh, pcount, config{.space_charge = true}); - field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; - field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant - auto pview = field_state.particles.R.getView(); - auto p1view = field_state.particles.R_nm1.getView(); - constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; - scalar timestep = field_state.dt; - //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; - Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); - //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); - - Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ - //bunch.gammaBeta[i].fill(scalar(0)); - auto state = random_pool.get_state(); - pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1); - p1view(i) = pview(i); - p1view(i)[1] -= vy * timestep; - random_pool.free_state(state); - }); - Kokkos::fence(); - field_state.J = scalar(0.0); - field_state.scatterBunch(); - for(size_t i = 0;i < 8*n;i++){ - field_state.fieldStep(); - } - field_state.evaluate_EB(); - Kokkos::fence(); - auto lDom = field_state.EB.getLayout().getLocalNDIndex(); - - std::ofstream line("ampere_line.txt"); - - typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); - //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); - for(unsigned i = 1;i < nr[2];i++){ - vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; - pos[2] += hx[2] * scalar(i); - ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); - //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; - line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n"; - } - return 0.0f; -} -int main(int argc, char* argv[]) { - using scalar = double; +int main(int argc, char* argv[]){ ippl::initialize(argc, argv); { - - test_gauss_law(64); - test_amperes_law(64); - goto exit; - config cfg = read_config("../config.json"); - const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); - cfg.extents[2] *= frame_gamma; - cfg.total_time /= frame_gamma; - - const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma)); - const scalar frame_gammabeta = frame_gamma * frame_beta; - UniaxialLorentzframe frame_boost(frame_gammabeta); - ippl::undulator_parameters uparams(cfg); - const scalar k_u = scalar(2.0 * M_PI) / uparams.lambda; - const scalar distance_to_entry = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma); - auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector& position_in_lab_frame){ - Kokkos::pair, ippl::Vector> ret; - ret.first.fill(0); - ret.second.fill(0); - - if(position_in_lab_frame[2] < distance_to_entry){ - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator < 0); - scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); - ret.second[0] = 0; - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; - } - else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){ - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator >= 0); - ret.second[0] = 0; - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); - } - return ret; - - }; - BunchInitialize mithra_config = generate_mithra_config(cfg, frame_boost); - ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); - + using scalar = float; + const unsigned dim = 3; + using vector_type = ippl::Vector; + using vector4_type = ippl::Vector; + using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + constexpr size_t n = 160; + ippl::Vector nr{n/2, n/2, 2*n}; + ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); + ippl::Vector extents{scalar(1), scalar(1), scalar(1)}; std::array isParallel; isParallel.fill(false); isParallel[2] = true; - + // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); - //[-1, 1] box - ippl::Vector hx = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])}; - ippl::Vector origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)}; - ippl::UniformCartesian mesh(owned, hx, origin); - std::cout << "hx: " << hx << "\n"; - std::cout << "origin: " << origin << "\n"; - std::cout << "extents: " << cfg.extents << std::endl; - if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){ - std::cerr << "Dispersion relation not satisfiable\n"; - abort(); - } - - ippl::FDTDFieldState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); - - if(ippl::Comm->rank() == 0){ - std::cout << "Init particles: " << std::endl; - size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma); - fdtd_state.particles.Q = cfg.charge / actual_pc; - fdtd_state.particles.mass = cfg.mass / actual_pc; - } - else{ - fdtd_state.particles.create(0); - } - { - auto rview = fdtd_state.particles.R.getView(); - auto rm1view = fdtd_state.particles.R_nm1.getView(); - ippl::Vector meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum()); - - Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - rview(i) -= meanpos; - rm1view(i) -= meanpos; - }); - } - fdtd_state.particles.setParticleBC(ippl::NO); - //fdtd_state.scatterBunch(); - //std::cout << cfg.charge << "\n"; - - size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); - uint64_t starttime = nanoTime(); - std::ofstream rad; - FILE* ffmpeg_file = nullptr; - if(ippl::Comm->rank() == 0){ - rad = std::ofstream("radiation.txt"); - const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv"; - if(cfg.output_rhythm != 0){ - ffmpeg_file = popen(ffmpegCmd, "w"); - } + ippl::Vector hx; + for(unsigned d = 0;d < 3;d++){ + hx[d] = extents[d] / (scalar)nr[d]; } - - - for(size_t i = 0;i < timesteps_required;i++){ - fdtd_state.J = scalar(0.0); - fdtd_state.playout.update(fdtd_state.particles); - fdtd_state.scatterBunch(); - //std::cout << fdtd_state.J.getVolumeIntegral() << "\n"; - fdtd_state.fieldStep(); - fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); - auto ldom = layout.getLocalNDIndex(); - auto nrg = fdtd_state.nr_global; - auto ebv = fdtd_state.EB.getView(); - double radiation = 0.0; - Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ - //uint32_t ig = i + ldom.first()[0]; - //uint32_t jg = j + ldom.first()[1]; - Kokkos::pair, ippl::Vector> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]}; - ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; - ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; - uint32_t kg = k + ldom.first()[2]; - if(kg == nrg[2] - 3){ - ref += Elab.cross(Blab)[2]; - } - - }, radiation); - double radiation_in_watt_on_this_rank = radiation * - double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) * - fdtd_state.hr_m[0] * fdtd_state.hr_m[1]; - double radiation_in_watt_global = 0.0; - MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); - if(ippl::Comm->rank() == 0){ - ippl::Vector pos{0,0,0}; - frame_boost.primedToUnprimed(pos, fdtd_state.dt * i); - rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; - } - //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n"; - //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n"; - int rank = ippl::Comm->rank(); - int size = ippl::Comm->size(); - if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ - - - int img_height = 400; - int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]); - float* imagedata = new float[img_width * img_height * 3]; - std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); - float* idata_recvbuffer = new float[img_width * img_height * 3]; - int floatcount = img_width * img_height * 3; - uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; - std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float)); - auto phmirror = fdtd_state.particles.R.getHostMirror(); - Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView()); - for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){ - ippl::Vector ppos = phmirror(hi); - ppos -= mesh.getOrigin(); - ppos /= cfg.extents.cast(); - int x_imgcoord = ppos[2] * img_width; - int y_imgcoord = ppos[0] * img_height; - //printf("%d, %d\n", x_imgcoord, y_imgcoord); - if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){ - const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles); - //std::cout << intensity << "\n"; - imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = - std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity); - } - }; - auto ebh = fdtd_state.EB.getHostMirror(); - Kokkos::deep_copy(ebh, fdtd_state.EB.getView()); - - //double exp_avg = double(exp_sum) / double(acount); - { - for(int i = 1;i < img_width;i++){ - for(int j = 1;j < img_height;j++){ - int i_remap = (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2; - int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2; - if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){ - if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){ - ippl::Vector, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); - - ippl::Vector poynting = acc[0].cross(acc[1]); - Kokkos::pair, ippl::Vector> eblab = frame_boost.inverse_transform_EB( - Kokkos::make_pair, ippl::Vector>(acc[0], acc[1]) - ); - ippl::Vector poyntinglab = eblab.first.cross(eblab.second); - poynting = poyntinglab; - if(poynting.norm() > 0){ - //std::cout << poynting.norm() << "\n"; - } - float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f; - int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f)); - imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); - imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); - imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); - } - } - } - } - } - int mask = 1; - while (mask < size) { - int partner = rank ^ mask; - //if((rank & (mask - 1)) == 0) - { - if ((rank & mask) == 0) { - // Send data to partner - MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE); - // Apply image summation - for(int f = 0;f < floatcount;f++){ - imagedata[f] += idata_recvbuffer[f]; - } - } else { - MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator()); - // Receive data from partner and apply reduction + ippl::Vector origin = {0,0,0}; + ippl::UniformCartesian mesh(owned, hx, origin); + FourField source(mesh, layout); + ThreeField E(mesh, layout); + ThreeField B(mesh, layout); + source = vector4_type(0); + ippl::NonStandardFDTDSolver sfdsolver(source, E, B); + sfdsolver.setPeriodicBoundaryConditions(); + auto aview = sfdsolver.A_n.getView(); + auto am1view = sfdsolver.A_nm1.getView(); + auto lDom = layout.getLocalNDIndex(); + size_t nghost = sfdsolver.A_n.getNghost(); + Kokkos::parallel_for(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + const size_t ig = i + lDom.first()[0] - nghost; + const size_t jg = j + lDom.first()[1] - nghost; + const size_t kg = k + lDom.first()[2] - nghost; + scalar x = scalar(ig) / nr[0]; + scalar y = scalar(jg) / nr[1]; + scalar z = scalar(kg) / nr[2]; + //std::cout << x << ", " << y << ", " << z << "\n"; + (void)x; + (void)y; + (void)z; + const scalar magnitude = gauss(scalar(0.5), scalar(0.01), z); + aview (i,j,k) = vector4_type{scalar(0), x, y, z}; + am1view(i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)}; + }); + Kokkos::fence(); + sfdsolver.A_n.getFieldBC().apply(sfdsolver.A_n); + Kokkos::fence(); + std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n"; + std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n"; + goto exit; + sfdsolver.A_np1.getFieldBC().apply(sfdsolver.A_np1); + sfdsolver.A_nm1.getFieldBC().apply(sfdsolver.A_nm1); + sfdsolver.A_n.fillHalo(); + sfdsolver.A_nm1.fillHalo(); + int img_width = 500; + int img_height = 500; + float* imagedata = new float[img_width * img_height * 3]; + std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); + uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; + for(size_t s = 0;s < 2 * n;s++){ + auto ebh = sfdsolver.A_n.getHostMirror(); + Kokkos::deep_copy(ebh, sfdsolver.A_n.getView()); + for(int i = 1;i < img_width;i++){ + for(int j = 1;j < img_height;j++){ + int i_remap = (double(i) / (img_width - 1)) * (nr[2] - 4) + 2; + int j_remap = (double(j) / (img_height - 1)) * (nr[0] - 4) + 2; + if(i_remap >= lDom.first()[2] && i_remap <= lDom.last()[2]){ + if(j_remap >= lDom.first()[0] && j_remap <= lDom.last()[0]){ + ippl::Vector acc = ebh(j_remap + 1 - lDom.first()[0], nr[1] / 2, i_remap + 1 - lDom.first()[2]); + + float normalized_colorscale_value = acc.norm(); + //int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f)); + imagedata[(j * img_width + i) * 3 + 0] = normalized_colorscale_value * 255.0f; + imagedata[(j * img_width + i) * 3 + 1] = normalized_colorscale_value * 255.0f; + imagedata[(j * img_width + i) * 3 + 2] = normalized_colorscale_value * 255.0f; } } - mask <<= 1; // Move to next bit position for pairing } - if(rank == 0){ - char output[1024] = {0}; - - snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i); - std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); - if(ffmpeg_file != nullptr) - writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final); - } - delete[] imagedata; - delete[] idata_recvbuffer; - delete[] imagedata_final; - } - } - uint64_t endtime = nanoTime(); - std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; + } + std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); + char output[1024] = {0}; + snprintf(output, 1023, "%soutimage%.05lu.bmp", "data/", s); + if(s % 4 == 0) + stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); + sfdsolver.solve(); + } + double sum_error = 0; + Kokkos::parallel_reduce(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k, double& ref){ + const size_t ig = i + lDom.first()[0] - nghost; + const size_t jg = j + lDom.first()[1] - nghost; + const size_t kg = k + lDom.first()[2] - nghost; + scalar x = scalar(ig) / nr[0]; + scalar y = scalar(jg) / nr[1]; + scalar z = scalar(kg) / nr[2]; + (void)x; + (void)y; + (void)z; + ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.01), z) - aview(i,j,k)[2]); + }, sum_error); + std::cout << "Sum: " << sum_error / double(n * n * n) << "\n"; } exit: ippl::finalize(); From 68b3111b3ce4320a631232236c90b9a9cf1da379 Mon Sep 17 00:00:00 2001 From: Manuel Date: Thu, 2 May 2024 20:28:30 +0200 Subject: [PATCH 17/32] Second order ABCs working --- src/MaxwellSolvers/AbsorbingBC.h | 427 +++++++++++++++++++++++++++++++ src/MaxwellSolvers/FDTD.h | 55 ++-- test/solver/TestFDTDSolver.cpp | 24 +- 3 files changed, 478 insertions(+), 28 deletions(-) create mode 100644 src/MaxwellSolvers/AbsorbingBC.h diff --git a/src/MaxwellSolvers/AbsorbingBC.h b/src/MaxwellSolvers/AbsorbingBC.h new file mode 100644 index 000000000..4a87df71c --- /dev/null +++ b/src/MaxwellSolvers/AbsorbingBC.h @@ -0,0 +1,427 @@ +#ifndef ABC_H +#define ABC_H +#include +#include "Types/Vector.h" +#include "Field/Field.h" +template +constexpr KOKKOS_INLINE_FUNCTION auto first(){ + return a; +} +template +constexpr KOKKOS_INLINE_FUNCTION auto second(){ + return b; +} +template +struct second_order_abc_face{ + using scalar = _scalar; + scalar Cweights[5]; + int sign; + constexpr static unsigned main_axis = _main_axis; + KOKKOS_FUNCTION second_order_abc_face(ippl::Vector hr, scalar dt, int _sign) : sign(_sign){ + constexpr scalar c = 1; + constexpr unsigned side_axes[2] = {_side_axes...}; + static_assert( + (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) || + (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) || + (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1) + ); + assert(_main_axis != side_axes[0]); + assert(_main_axis != side_axes[1]); + assert(side_axes[0] != side_axes[1]); + constexpr scalar truncation_order = 2.0; + scalar p = ( 1.0 + 1 * 1 ) / ( 1 + 1 ); + scalar q = - 1.0 / ( 1 + 1 ); + + scalar d = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt); + + Cweights[0] = ( 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + Cweights[1] = ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + assert(abs(Cweights[1] + 1) < 1e-6); //Like literally + Cweights[2] = ( p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d; + Cweights[3] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d; + Cweights[4] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d; + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + constexpr unsigned side_axes[2] = {_side_axes...}; + ippl::Vector side_axis1_onehot = ippl::Vector{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2}; + ippl::Vector side_axis2_onehot = ippl::Vector{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2}; + ippl::Vector mainaxis_off = ippl::Vector{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast(); + return advanceBoundaryS( + A_nm1(i,j,k), A_n(i,j,k), + apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off), + apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off), + apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot), apply(A_n, c - side_axis1_onehot), + apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot) + ); + } + template + KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 , + const value_type& v3 , const value_type& v4 , const value_type& v5 , + const value_type& v6 , const value_type& v7 , const value_type& v8 , + const value_type& v9 , const value_type& v10, const value_type& v11, + const value_type& v12, const value_type& v13)const noexcept + { + + value_type v0 = + Cweights[0] * (v1 + v5) + + (Cweights[1]) * v3 + + (Cweights[2]) * ( v2 + v4 ) + + (Cweights[3]) * ( v6 + v7 + v10 + v11 ) + + (Cweights[4]) * ( v8 + v9 + v12 + v13 ); + return v0; + } +}; +template +struct second_order_abc_edge{ + using scalar = _scalar; + // + scalar Eweights[5]; + + KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector hr, scalar dt){ + static_assert(normal_axis1 != normal_axis2); + static_assert(edge_axis != normal_axis2); + static_assert(edge_axis != normal_axis1); + static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0)); + constexpr scalar c0_ = scalar(1); + scalar d = ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt ); + if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else{ + assert(false); + } + + + + + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + //constexpr unsigned nax[2] = {normal_axis1, normal_axis2}; + ippl::Vector normal_axis1_onehot = ippl::Vector{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1); + ippl::Vector normal_axis2_onehot = ippl::Vector{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1); + ippl::Vector acc0 = {i, j, k}; + ippl::Vector acc1 = acc0 + normal_axis1_onehot.cast(); + ippl::Vector acc2 = acc0 + normal_axis2_onehot.cast(); + ippl::Vector acc3 = acc0 + normal_axis1_onehot.cast() + normal_axis2_onehot.cast(); + //ippl::Vector axism = (-ippl::Vector{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast(); + ippl::Vector axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2}; + //return A_n(i, j, k); + return advanceEdgeS( + A_n(i, j, k), A_nm1(i, j, k), + apply(A_np1, acc1), apply(A_n, acc1 ), apply(A_nm1, acc1), + apply(A_np1, acc2), apply(A_n, acc2 ), apply(A_nm1, acc2), + apply(A_np1, acc3), apply(A_n, acc3 ), apply(A_nm1, acc3), + apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp), + apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceEdgeS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19)const noexcept{ + value_type v0 = + Eweights[0] * (v3 + v8) + + Eweights[1] * (v5 + v6) + + Eweights[2] * (v2 + v9) + + Eweights[3] * (v1 + v4 + v7 + v10) + + Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11; + return v0; + } +}; +template +struct second_order_abc_corner{ + using scalar = _scalar; + scalar Cweights[17]; + KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector hr, scalar dt){ + constexpr scalar c0_ = scalar(1); + Cweights[0] = ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[1] = ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[2] = ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[3] = ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[4] = ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[5] = ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[6] = ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[7] = ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[8] = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[9] = - ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[12] = - ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[13] = - ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[15] = - ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt); + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + //First implementation: 0,0,0 corner + constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1); + constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1); + constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1); + using ippl::apply; + const ippl::Vector offsets[8] = { + ippl::Vector{0,0,0}, + ippl::Vector{xoff,0,0}, + ippl::Vector{0,yoff,0}, + ippl::Vector{0,0,zoff}, + ippl::Vector{xoff,yoff,0}, + ippl::Vector{xoff,0,zoff}, + ippl::Vector{0,yoff,zoff}, + ippl::Vector{xoff,yoff,zoff}, + }; + return advanceCornerS( + apply(A_n, c), apply(A_nm1, c), + apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]), + apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]), + apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]), + apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]), + apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]), + apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]), + apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7]) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceCornerS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19, value_type v20, + value_type v21, value_type v22, value_type v23)const noexcept{ + return - ( v1 * (Cweights[16]) + v2 * (Cweights[8]) + + v3 * Cweights[1] + v4 * Cweights[16] + v5 * Cweights[9] + + v6 * Cweights[2] + v7 * Cweights[16] + v8 * Cweights[10] + + v9 * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] + + v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] + + v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] + + v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] + + v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0]; + } +}; + + + + + + +struct second_order_mur_boundary_conditions{ + template + void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr, ippl::NDIndex<3> lDom){ + using scalar = decltype(dt); + //TODO: tbh don't know + //const unsigned nghost = 1; + const ippl::Vector hr = FA_n.get_mesh().getMeshSpacing(); + //assert_isreal((betaMur[0])); + //assert_isreal((betaMur[1])); + //assert_isreal((betaMur[2])); + auto A_n = FA_n.getView(); + auto A_np1 = FA_np1.getView(); + auto A_nm1 = FA_nm1.getView(); + ippl::Vector local_nr{ + uint32_t(A_n.extent(0)), + uint32_t(A_n.extent(1)), + uint32_t(A_n.extent(2)) + }; + constexpr uint32_t min_abc_boundary = 1; + constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1; + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + + if(Kokkos::popcount(val) == 1){ + if(ig == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(jg == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(kg == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(ig == true_nr[0] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + } + }); + Kokkos::fence(); + //FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 2)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + if(Kokkos::popcount(val) == 2){ //Edge + if(ig == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + //FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + // + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + //if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + + if(Kokkos::popcount(val) == 3){ + //printf("Corner: %d, %d, %d\n", i, j, k); + if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + } +}; +#endif \ No newline at end of file diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 827ed7fef..772ff538c 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -4,11 +4,12 @@ using std::size_t; #include "Types/Vector.h" -#include "Field/Field.h" + #include "FieldLayout/FieldLayout.h" #include "Meshes/UniformCartesian.h" #include "MaxwellSolvers/Maxwell.h" +#include "MaxwellSolvers/AbsorbingBC.h" constexpr double sqrt_4pi = 3.54490770181103205459; constexpr double alpha_scaling_factor = 1e30; constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor; @@ -102,6 +103,19 @@ namespace ippl { Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); } + void applyBCs(){ + if constexpr(boundary_conditions == periodic){ + A_n.getFieldBC().apply(A_n); + A_nm1.getFieldBC().apply(A_nm1); + A_np1.getFieldBC().apply(A_np1); + } + else{ + Vector true_nr = nr_m; + true_nr += (A_n.getNghost() * 2); + second_order_mur_boundary_conditions bcs{}; + bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); + } + } void step(){ const auto& ldom = layout_mp->getLocalNDIndex(); @@ -118,18 +132,16 @@ namespace ippl { const scalar a8 = sq(dt); Vector true_nr = nr_m; true_nr += (nghost * 2); - A_n.getFieldBC().apply(A_n); - A_np1.getFieldBC().apply(A_np1); - A_nm1.getFieldBC().apply(A_nm1); + constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0; Kokkos::parallel_for( - "Scalar potential update", ippl::getRangePolicy(aview, nghost), + "Four potential update", ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { // global indices const uint32_t ig = i + ldom.first()[0]; const uint32_t jg = j + ldom.first()[1]; const uint32_t kg = k + ldom.first()[2]; - uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) - + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) + + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); if(val == 0){ FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k) @@ -144,8 +156,8 @@ namespace ippl { } }); Kokkos::fence(); + applyBCs(); A_np1.fillHalo(); - A_np1.getFieldBC().apply(A_np1); } void evaluate_EB(){ ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; @@ -266,6 +278,20 @@ namespace ippl { Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); } + + void applyBCs(){ + if constexpr(boundary_conditions == periodic){ + A_n.getFieldBC().apply(A_n); + A_nm1.getFieldBC().apply(A_nm1); + A_np1.getFieldBC().apply(A_np1); + } + else{ + Vector true_nr = nr_m; + true_nr += (A_n.getNghost() * 2); + second_order_mur_boundary_conditions bcs{}; + bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); + } + } template struct nondispersive{ scalar a1; @@ -292,17 +318,14 @@ namespace ippl { }; Vector true_nr = nr_m; true_nr += (nghost * 2); - std::cout << hr_m << "\n"; - std::cout << dt << "\n"; - A_n.getFieldBC().apply(A_n); - A_np1.getFieldBC().apply(A_np1); - A_nm1.getFieldBC().apply(A_nm1); + constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0; Kokkos::parallel_for(ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ uint32_t ig = i + ldom.first()[0]; uint32_t jg = j + ldom.first()[1]; uint32_t kg = k + ldom.first()[2]; - uint32_t val = uint32_t(ig == 0) + (uint32_t(jg == 0) << 1) + (uint32_t(kg == 0) << 2) - + (uint32_t(ig == true_nr[0] - 1) << 3) + (uint32_t(jg == true_nr[1] - 1) << 4) + (uint32_t(kg == true_nr[2] - 1) << 5); + uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) + + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); + if(!val){ anp1view(i, j, k) = -anm1view(i,j,k) + ndisp.a1 * aview(i,j,k) @@ -315,7 +338,7 @@ namespace ippl { }); Kokkos::fence(); A_np1.fillHalo(); - A_np1.getFieldBC().apply(A_np1); + applyBCs(); } void evaluate_EB(){ ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index 6cf9d4d15..b45c96298 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -32,7 +32,7 @@ int main(int argc, char* argv[]){ using vector4_type = ippl::Vector; using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; - constexpr size_t n = 160; + constexpr size_t n = 100; ippl::Vector nr{n/2, n/2, 2*n}; ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); ippl::Vector extents{scalar(1), scalar(1), scalar(1)}; @@ -53,7 +53,7 @@ int main(int argc, char* argv[]){ ThreeField E(mesh, layout); ThreeField B(mesh, layout); source = vector4_type(0); - ippl::NonStandardFDTDSolver sfdsolver(source, E, B); + ippl::StandardFDTDSolver sfdsolver(source, E, B); sfdsolver.setPeriodicBoundaryConditions(); auto aview = sfdsolver.A_n.getView(); auto am1view = sfdsolver.A_nm1.getView(); @@ -70,16 +70,16 @@ int main(int argc, char* argv[]){ (void)x; (void)y; (void)z; - const scalar magnitude = gauss(scalar(0.5), scalar(0.01), z); - aview (i,j,k) = vector4_type{scalar(0), x, y, z}; + const scalar magnitude = gauss(scalar(0.5), scalar(0.05), z); + aview (i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)}; am1view(i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)}; }); Kokkos::fence(); sfdsolver.A_n.getFieldBC().apply(sfdsolver.A_n); - Kokkos::fence(); - std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n"; - std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n"; - goto exit; + //Kokkos::fence(); + //std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n"; + //std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n"; + //goto exit; sfdsolver.A_np1.getFieldBC().apply(sfdsolver.A_np1); sfdsolver.A_nm1.getFieldBC().apply(sfdsolver.A_nm1); sfdsolver.A_n.fillHalo(); @@ -89,7 +89,7 @@ int main(int argc, char* argv[]){ float* imagedata = new float[img_width * img_height * 3]; std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; - for(size_t s = 0;s < 2 * n;s++){ + for(size_t s = 0;s < 4 * n;s++){ auto ebh = sfdsolver.A_n.getHostMirror(); Kokkos::deep_copy(ebh, sfdsolver.A_n.getView()); for(int i = 1;i < img_width;i++){ @@ -112,7 +112,7 @@ int main(int argc, char* argv[]){ } std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); char output[1024] = {0}; - snprintf(output, 1023, "%soutimage%.05lu.bmp", "data/", s); + snprintf(output, 1023, "%soutimage%.05lu.bmp", "renderdata/", s); if(s % 4 == 0) stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); sfdsolver.solve(); @@ -128,10 +128,10 @@ int main(int argc, char* argv[]){ (void)x; (void)y; (void)z; - ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.01), z) - aview(i,j,k)[2]); + ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.05), z) - aview(i,j,k)[2]); }, sum_error); std::cout << "Sum: " << sum_error / double(n * n * n) << "\n"; } - exit: + //exit: ippl::finalize(); } \ No newline at end of file From 10495c7db0196996452d02ad0ba3ba5fe3997710 Mon Sep 17 00:00:00 2001 From: Manuel Date: Fri, 3 May 2024 10:17:22 +0200 Subject: [PATCH 18/32] Add Solver with particles --- alpine/FreeElectronLaser.cpp | 446 +------------------- src/Expression/IpplOperations.h | 3 + src/MaxwellSolvers/FDTD.h | 336 ++++++++++++++- test/solver/CMakeLists.txt | 6 + test/solver/TestFDTDSolver.cpp | 4 +- test/solver/TestFDTDSolverWithParticles.cpp | 59 +++ 6 files changed, 408 insertions(+), 446 deletions(-) create mode 100644 test/solver/TestFDTDSolverWithParticles.cpp diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp index d15dd6e76..5c014e96a 100644 --- a/alpine/FreeElectronLaser.cpp +++ b/alpine/FreeElectronLaser.cpp @@ -311,7 +311,6 @@ KOKKOS_INLINE_FUNCTION bool isINF(double x){ return std::isinf(x); #endif } -#define assert_isreal(X) assert(!isNaN(X) && !isINF(X)) template KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector from, ippl::Vector to, Ts... args){ if constexpr(sizeof...(Ts) == Dim){ @@ -1074,17 +1073,6 @@ KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ } - - -template -constexpr KOKKOS_INLINE_FUNCTION auto first(){ - return a; -} -template -constexpr KOKKOS_INLINE_FUNCTION auto second(){ - return b; -} - template KOKKOS_INLINE_FUNCTION ippl::Vector cross_prod(const ippl::Vector& a, const ippl::Vector& b) { @@ -1250,430 +1238,6 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view // END PREAMBLE -//BEGIN ABSORBING BOUNDARY CONDITION -template -struct second_order_abc_face{ - using scalar = _scalar; - scalar Cweights[5]; - int sign; - constexpr static unsigned main_axis = _main_axis; - KOKKOS_FUNCTION second_order_abc_face(ippl::Vector hr, scalar dt, int _sign) : sign(_sign){ - constexpr scalar c = 1; - constexpr unsigned side_axes[2] = {_side_axes...}; - static_assert( - (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) || - (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) || - (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1) - ); - assert(_main_axis != side_axes[0]); - assert(_main_axis != side_axes[1]); - assert(side_axes[0] != side_axes[1]); - constexpr scalar truncation_order = 2.0; - scalar p = ( 1.0 + 1 * 1 ) / ( 1 + 1 ); - scalar q = - 1.0 / ( 1 + 1 ); - - scalar d = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt); - - Cweights[0] = ( 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; - Cweights[1] = ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; - assert(abs(Cweights[1] + 1) < 1e-6); //Like literally - Cweights[2] = ( p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d; - Cweights[3] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d; - Cweights[4] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d; - } - template - KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ - uint32_t i = c[0]; - uint32_t j = c[1]; - uint32_t k = c[2]; - using ippl::apply; - constexpr unsigned side_axes[2] = {_side_axes...}; - ippl::Vector side_axis1_onehot = ippl::Vector{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2}; - ippl::Vector side_axis2_onehot = ippl::Vector{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2}; - ippl::Vector mainaxis_off = ippl::Vector{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast(); - return advanceBoundaryS( - A_nm1(i,j,k), A_n(i,j,k), - apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off), - apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off), - apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot), apply(A_n, c - side_axis1_onehot), - apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot) - ); - } - template - KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 , - const value_type& v3 , const value_type& v4 , const value_type& v5 , - const value_type& v6 , const value_type& v7 , const value_type& v8 , - const value_type& v9 , const value_type& v10, const value_type& v11, - const value_type& v12, const value_type& v13)const noexcept - { - - value_type v0 = - Cweights[0] * (v1 + v5) + - (Cweights[1]) * v3 + - (Cweights[2]) * ( v2 + v4 ) + - (Cweights[3]) * ( v6 + v7 + v10 + v11 ) + - (Cweights[4]) * ( v8 + v9 + v12 + v13 ); - return v0; - } -}; -template -struct second_order_abc_edge{ - using scalar = _scalar; - // - scalar Eweights[5]; - - KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector hr, scalar dt){ - static_assert(normal_axis1 != normal_axis2); - static_assert(edge_axis != normal_axis2); - static_assert(edge_axis != normal_axis1); - static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0)); - constexpr scalar c0_ = scalar(1); - scalar d = ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt ); - if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z) - Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; - Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; - } - else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y) - Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; - Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; - } - else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x) - Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; - Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; - Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; - } - else{ - assert(false); - } - - - - - } - template - KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ - uint32_t i = c[0]; - uint32_t j = c[1]; - uint32_t k = c[2]; - using ippl::apply; - //constexpr unsigned nax[2] = {normal_axis1, normal_axis2}; - ippl::Vector normal_axis1_onehot = ippl::Vector{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1); - ippl::Vector normal_axis2_onehot = ippl::Vector{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1); - ippl::Vector acc0 = {i, j, k}; - ippl::Vector acc1 = acc0 + normal_axis1_onehot.cast(); - ippl::Vector acc2 = acc0 + normal_axis2_onehot.cast(); - ippl::Vector acc3 = acc0 + normal_axis1_onehot.cast() + normal_axis2_onehot.cast(); - //ippl::Vector axism = (-ippl::Vector{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast(); - ippl::Vector axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2}; - //return A_n(i, j, k); - return advanceEdgeS( - A_n(i, j, k), A_nm1(i, j, k), - apply(A_np1, acc1), apply(A_n, acc1 ), apply(A_nm1, acc1), - apply(A_np1, acc2), apply(A_n, acc2 ), apply(A_nm1, acc2), - apply(A_np1, acc3), apply(A_n, acc3 ), apply(A_nm1, acc3), - apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp), - apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp) - ); - } - template - KOKKOS_INLINE_FUNCTION value_type advanceEdgeS - ( value_type v1 , value_type v2 , - value_type v3 , value_type v4 , value_type v5 , - value_type v6 , value_type v7 , value_type v8 , - value_type v9 , value_type v10, value_type v11, - value_type v12, value_type v13, value_type v14, - value_type v15, value_type v16, value_type v17, - value_type v18, value_type v19)const noexcept{ - value_type v0 = - Eweights[0] * (v3 + v8) + - Eweights[1] * (v5 + v6) + - Eweights[2] * (v2 + v9) + - Eweights[3] * (v1 + v4 + v7 + v10) + - Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11; - return v0; - } -}; -template -struct second_order_abc_corner{ - using scalar = _scalar; - scalar Cweights[17]; - KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector hr, scalar dt){ - constexpr scalar c0_ = scalar(1); - Cweights[0] = ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[1] = ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[2] = ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[3] = ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[4] = ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[5] = ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[6] = ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[7] = ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[8] = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[9] = - ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[12] = - ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[13] = - ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[15] = - ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); - Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt); - } - template - KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ - //First implementation: 0,0,0 corner - constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1); - constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1); - constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1); - using ippl::apply; - const ippl::Vector offsets[8] = { - ippl::Vector{0,0,0}, - ippl::Vector{xoff,0,0}, - ippl::Vector{0,yoff,0}, - ippl::Vector{0,0,zoff}, - ippl::Vector{xoff,yoff,0}, - ippl::Vector{xoff,0,zoff}, - ippl::Vector{0,yoff,zoff}, - ippl::Vector{xoff,yoff,zoff}, - }; - return advanceCornerS( - apply(A_n, c), apply(A_nm1, c), - apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]), - apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]), - apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]), - apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]), - apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]), - apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]), - apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7]) - ); - } - template - KOKKOS_INLINE_FUNCTION value_type advanceCornerS - ( value_type v1 , value_type v2 , - value_type v3 , value_type v4 , value_type v5 , - value_type v6 , value_type v7 , value_type v8 , - value_type v9 , value_type v10, value_type v11, - value_type v12, value_type v13, value_type v14, - value_type v15, value_type v16, value_type v17, - value_type v18, value_type v19, value_type v20, - value_type v21, value_type v22, value_type v23)const noexcept{ - return - ( v1 * (Cweights[16]) + v2 * (Cweights[8]) + - v3 * Cweights[1] + v4 * Cweights[16] + v5 * Cweights[9] + - v6 * Cweights[2] + v7 * Cweights[16] + v8 * Cweights[10] + - v9 * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] + - v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] + - v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] + - v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] + - v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0]; - } -}; - - - - - - -struct second_order_mur_boundary_conditions{ - template - void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr, ippl::NDIndex<3> lDom){ - using scalar = decltype(dt); - //TODO: tbh don't know - //const unsigned nghost = 1; - const ippl::Vector hr = FA_n.get_mesh().getMeshSpacing(); - //assert_isreal((betaMur[0])); - //assert_isreal((betaMur[1])); - //assert_isreal((betaMur[2])); - auto A_n = FA_n.getView(); - auto A_np1 = FA_np1.getView(); - auto A_nm1 = FA_nm1.getView(); - ippl::Vector local_nr{ - uint32_t(A_n.extent(0)), - uint32_t(A_n.extent(1)), - uint32_t(A_n.extent(2)) - }; - constexpr uint32_t min_abc_boundary = 1; - constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1; - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t ig = i + lDom.first()[0]; - uint32_t jg = j + lDom.first()[1]; - uint32_t kg = k + lDom.first()[2]; - - uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); - - if(Kokkos::popcount(lval) > 1)return; - uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) - + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); - - if(Kokkos::popcount(val) == 1){ - if(ig == min_abc_boundary){ - second_order_abc_face soa(hr, dt, 1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(jg == min_abc_boundary){ - second_order_abc_face soa(hr, dt, 1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(kg == min_abc_boundary){ - second_order_abc_face soa(hr, dt, 1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(ig == true_nr[0] - max_abc_boundary_sub){ - second_order_abc_face soa(hr, dt, -1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(jg == true_nr[1] - max_abc_boundary_sub){ - second_order_abc_face soa(hr, dt, -1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - if(kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_face soa(hr, dt, -1); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - } - }); - Kokkos::fence(); - //FA_np1.fillHalo(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t ig = i + lDom.first()[0]; - uint32_t jg = j + lDom.first()[1]; - uint32_t kg = k + lDom.first()[2]; - - uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); - - if(Kokkos::popcount(lval) > 2)return; - uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) - + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); - if(Kokkos::popcount(val) == 2){ //Edge - if(ig == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - - else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - - else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - - else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_edge soa(hr, dt); - A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else{ - assert(false); - } - } - }); - Kokkos::fence(); - //FA_np1.fillHalo(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ - uint32_t ig = i + lDom.first()[0]; - uint32_t jg = j + lDom.first()[1]; - uint32_t kg = k + lDom.first()[2]; - - //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) - // + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); - - //if(Kokkos::popcount(lval) > 1)return; - uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) - + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); - - if(Kokkos::popcount(val) == 3){ - //printf("Corner: %d, %d, %d\n", i, j, k); - if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ - second_order_abc_corner coa(hr, dt); - A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); - } - else{ - assert(false); - } - } - }); - Kokkos::fence(); - } -}; -//END ABSORBING BOUNDARY CONDITION - - - - - - - - - namespace ippl { template @@ -1700,11 +1264,11 @@ namespace ippl { scalar a8; }; template - struct Bunch : public ippl::ParticleBase { + struct Bunch_eb : public ippl::ParticleBase { using scalar = _scalar; // Constructor for the Bunch class, taking a PLayout reference - Bunch(PLayout& playout) + Bunch_eb(PLayout& playout) : ippl::ParticleBase(playout) { // Add attributes to the particle bunch this->addAttribute(Q); // Charge attribute @@ -1716,7 +1280,7 @@ namespace ippl { } // Destructor for the Bunch class - ~Bunch() {} + ~Bunch_eb() {} // Define container types for various attributes using charge_container_type = ippl::ParticleAttrib; @@ -1770,8 +1334,8 @@ namespace ippl { FieldLayout* layout_mp; using playout_type = ParticleSpatialLayout; playout_type playout; - Bunch> particles; - using bunch_type = Bunch>; + Bunch_eb> particles; + using bunch_type = Bunch_eb>; config m_config; /** diff --git a/src/Expression/IpplOperations.h b/src/Expression/IpplOperations.h index 198f20b44..3aa3177d2 100644 --- a/src/Expression/IpplOperations.h +++ b/src/Expression/IpplOperations.h @@ -276,6 +276,9 @@ namespace ippl { } return res; } + KOKKOS_INLINE_FUNCTION operator typename E1::value_type() const{ + return apply(); + } /* * This is required for BareField::dot diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 772ff538c..6ba94672e 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -9,6 +9,7 @@ using std::size_t; #include "FieldLayout/FieldLayout.h" #include "Meshes/UniformCartesian.h" #include "MaxwellSolvers/Maxwell.h" +#include "Particle/ParticleBase.h" #include "MaxwellSolvers/AbsorbingBC.h" constexpr double sqrt_4pi = 3.54490770181103205459; constexpr double alpha_scaling_factor = 1e30; @@ -51,7 +52,7 @@ constexpr double sq(double x){ constexpr float sq(float x){ return x * x; } - +#define assert_isreal(X) assert(!Kokkos::isnan(X) && !Kokkos::isinf(X)) namespace ippl { enum fdtd_bc{ periodic, absorbing @@ -95,7 +96,8 @@ namespace ippl { FourField A_n; FourField A_np1; FourField A_nm1; - + scalar dt; + private: void timeShift(){ @@ -116,7 +118,7 @@ namespace ippl { bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); } } - + public: void step(){ const auto& ldom = layout_mp->getLocalNDIndex(); const int nghost = A_n.getNghost(); @@ -194,7 +196,8 @@ namespace ippl { FieldLayout* layout_mp; NDIndex domain_m; Vector_t hr_m; - scalar dt; + + Vector nr_m; void initialize() { @@ -300,6 +303,7 @@ namespace ippl { scalar a6; scalar a8; }; + public: void step(){ const auto& ldom = layout_mp->getLocalNDIndex(); const int nghost = A_n.getNghost(); @@ -401,6 +405,330 @@ namespace ippl { A_np1 = 0.0; }; }; + + template + struct Bunch : public ippl::ParticleBase { + using scalar = _scalar; + + // Constructor for the Bunch class, taking a PLayout reference + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) { + // Add attributes to the particle bunch + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(R_np1); // Position attribute for the next time step + this->addAttribute(R_nm1); // Position attribute for the next time step + this->addAttribute(E_gather); // Electric field attribute for particle gathering + this->addAttribute(B_gather); // Magnedit field attribute for particle gathering + } + + // Destructor for the Bunch class + ~Bunch() {} + + // Define container types for various attributes + using charge_container_type = ippl::ParticleAttrib; + using velocity_container_type = ippl::ParticleAttrib>; + using vector_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; + + // Declare instances of the attribute containers + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type R_nm1; // Position container for the previous time step + ippl::ParticleAttrib> E_gather; // Electric field container for particle gathering + ippl::ParticleAttrib> B_gather; // Magnetio field container for particle gathering + + }; + template + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos){ + //return pear, ippl::Vector>{ippl::Vector{5,5,5}, ippl::Vector{0,0,0}}; + //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); + Kokkos::pair, ippl::Vector> ret; + //pos -= spacing * T(0.5); + ippl::Vector relpos = pos - origin; + ippl::Vector gridpos = relpos / hr; + ippl::Vector ipos; + ipos = gridpos.template cast(); + ippl::Vector fracpos; + for(unsigned k = 0;k < 3;k++){ + fracpos[k] = gridpos[k] - (int)ipos[k]; + } + //TODO: NGHOST!!!!!!! + ipos += ippl::Vector(1); + ret.first = ipos; + ret.second = fracpos; + return ret; + } + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; + if( + ipos[0] < 0 + ||ipos[1] < 0 + ||ipos[2] < 0 + ||size_t(ipos[0]) >= view.extent(0) - 1 + ||size_t(ipos[1]) >= view.extent(1) - 1 + ||size_t(ipos[2]) >= view.extent(2) - 1 + ||fracpos[0] < 0 + ||fracpos[1] < 0 + ||fracpos[2] < 0 + ){ + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for(unsigned i = 0;i < 8;i++){ + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for(unsigned d = 0;d < 3;d++){ + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(value); + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); + } + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ + //std::cout << "Value: " << value << "\n"; + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + if( + ipos[0] < 0 + ||ipos[1] < 0 + ||ipos[2] < 0 + ||size_t(ipos[0]) >= view.extent(0) - 1 + ||size_t(ipos[1]) >= view.extent(1) - 1 + ||size_t(ipos[2]) >= view.extent(2) - 1 + ||fracpos[0] < 0 + ||fracpos[1] < 0 + ||fracpos[2] < 0 + ){ + //std::cout << "out of bounds\n"; + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for(unsigned i = 0;i < 8;i++){ + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for(unsigned d = 0;d < 3;d++){ + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); + } + template + KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ + + + Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to ); + //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]); + for(int d = 0;d < 3;d++){ + //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ + // std::cout <(nghost); + //to_ipos += ippl::Vector(nghost); + + if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); + + return; + } + ippl::Vector relay; + const int nghost = 1; + const ippl::Vector orig = origin; + using Kokkos::max; + using Kokkos::min; + for (unsigned int i = 0; i < 3; i++) { + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i], + scalar(0.5) * (to[i] + from[i]))); + } + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); + } + template + class NSFDSolverWithParticles{ + public: + constexpr static unsigned dim = 3; + using vector_type = ippl::Vector; + using vector4_type = ippl::Vector; + using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using playout_type = ParticleSpatialLayout; + using bunch_type = Bunch>; + using Mesh_t = ippl::UniformCartesian; + FieldLayout* layout_mp; + Mesh_t* mesh_mp; + playout_type playout; + Bunch> particles; + FourField J; + ThreeField E; + ThreeField B; + NonStandardFDTDSolver field_solver; + + ippl::Vector nr_global; + ippl::Vector hr_m; + size_t steps_taken; + NSFDSolverWithParticles(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) : layout_mp(&layout), mesh_mp(&mesch), playout(layout, mesch), particles(playout), J(mesch, layout), E(mesch, layout), B(mesch, layout), field_solver(J, E, B){ + particles.create(nparticles); + nr_global = ippl::Vector{ + uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), + uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), + uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1) + }; + hr_m = mesh_mp->getMeshSpacing(); + steps_taken = 0; + //field_solver.setEMFields(E, B); + } + template + void scatterBunch(){ + //ippl::Vector* gammaBeta = this->gammaBeta; + auto hr_m = mesh_mp->getMeshSpacing(); + const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; + assert_isreal(volume); + assert_isreal((scalar(1) / volume)); + J = typename decltype(J)::value_type(0); + auto Jview = J.getView(); + auto qview = particles.Q.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto orig = mesh_mp->getOrigin(); + auto hr = mesh_mp->getMeshSpacing(); + auto dt = field_solver.dt; + bool sc = space_charge; + ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); + Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + vector_type pos = rview(i); + vector_type to = rview(i); + vector_type from = rm1view(i); + if(sc){ + scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); + } + scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); + }); + Kokkos::fence(); + J.accumulateHalo(); + } + template + void updateBunch(scalar /*time*/){ + + Kokkos::fence(); + auto gbview = particles.gamma_beta.getView(); + auto eview = particles.E_gather.getView(); + auto bview = particles.B_gather.getView(); + auto qview = particles.Q.getView(); + auto mview = particles.mass.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto rp1view = particles.R_np1.getView(); + scalar bunch_dt = field_solver.dt / 3; + Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); + E.fillHalo(); + B.fillHalo(); + Kokkos::fence(); + for(int bts = 0;bts < 3;bts++){ + + particles.E_gather.gather(E, particles.R); + particles.B_gather.gather(B, particles.R); + Kokkos::fence(); + Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + const ippl::Vector pgammabeta = gbview(i); + ippl::Vector E = eview(i); + ippl::Vector B = bview(i); + ippl::Vector labpos = rview(i); + ippl::Vector, 2> EB{E, B}; + + const scalar charge = qview(i); + const scalar mass = mview(i); + const ippl::Vector t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); + const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); + const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); + const ippl::Vector t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); + const ippl::Vector ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); + + rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); + gbview(i) = ngammabeta; + }); + Kokkos::fence(); + } + Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); + size_t invalid_count = 0; + auto origo = mesh_mp->getOrigin(); + ippl::Vector extenz;// + extenz[0] = nr_global[0] * hr_m[0]; + extenz[1] = nr_global[1] * hr_m[1]; + extenz[2] = nr_global[2] * hr_m[2]; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, particles.getLocalNum()), + KOKKOS_LAMBDA(size_t i, size_t& ref){ + bool out_of_bounds = false; + ippl::Vector ppos = rview(i); + for(size_t d = 0;d < dim;d++){ + out_of_bounds |= (ppos[d] <= origo[d]); + out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain + } + invalid(i) = out_of_bounds; + ref += out_of_bounds; + }, + invalid_count); + particles.destroy(invalid, invalid_count); + Kokkos::fence(); + + } + void solve(){ + scatterBunch(); + field_solver.solve(); + updateBunch(field_solver.dt * steps_taken); + + } + }; } #endif \ No newline at end of file diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt index dda292fff..7bf9f853b 100644 --- a/test/solver/CMakeLists.txt +++ b/test/solver/CMakeLists.txt @@ -31,6 +31,12 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp) +target_link_libraries ( + TestFDTDSolverWithParticles + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) if (ENABLE_FFT) add_executable (TestGaussian_convergence TestGaussian_convergence.cpp) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp index b45c96298..62088acc9 100644 --- a/test/solver/TestFDTDSolver.cpp +++ b/test/solver/TestFDTDSolver.cpp @@ -33,7 +33,7 @@ int main(int argc, char* argv[]){ using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; constexpr size_t n = 100; - ippl::Vector nr{n/2, n/2, 2*n}; + ippl::Vector nr{n / 2, n / 2, 2 * n}; ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); ippl::Vector extents{scalar(1), scalar(1), scalar(1)}; std::array isParallel; @@ -131,6 +131,8 @@ int main(int argc, char* argv[]){ ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.05), z) - aview(i,j,k)[2]); }, sum_error); std::cout << "Sum: " << sum_error / double(n * n * n) << "\n"; + + } //exit: ippl::finalize(); diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp new file mode 100644 index 000000000..f7b6dbcfd --- /dev/null +++ b/test/solver/TestFDTDSolverWithParticles.cpp @@ -0,0 +1,59 @@ +#include +using std::size_t; +#include +#include "Ippl.h" +#include "Types/Vector.h" +#include "Field/Field.h" +#include "MaxwellSolvers/FDTD.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include +template + requires((std::is_floating_point_v)) +KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ + uint32_t dim = sizeof...(scalar); + ippl::Vector vec{scalar1(x - mean)...}; + scalar1 vecsum(0); + for(unsigned d = 0;d < dim;d++){ + vecsum += vec[d] * vec[d]; + + } + #ifndef __CUDA_ARCH__ + using std::exp; + #endif + return exp(-(vecsum) / (stddev * stddev)); +} + +int main(int argc, char* argv[]){ + ippl::initialize(argc, argv); + { + using scalar = float; + //const unsigned dim = 3; + //using vector_type = ippl::Vector; + //using vector4_type = ippl::Vector; + //using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + //using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + constexpr size_t n = 100; + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); + ippl::Vector extents{scalar(1), scalar(1), scalar(1)}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + ippl::Vector hx; + for(unsigned d = 0;d < 3;d++){ + hx[d] = extents[d] / (scalar)nr[d]; + } + ippl::Vector origin = {0,0,0}; + ippl::UniformCartesian mesh(owned, hx, origin); + + ippl::NSFDSolverWithParticles solver(layout, mesh, 1 << 17); + for(int i = 0;i < 10;i++) + solver.solve(); + } + //exit: + ippl::finalize(); +} \ No newline at end of file From ae9472c9df5fe9171424a0e919e2fb81e7453b12 Mon Sep 17 00:00:00 2001 From: Manuel Date: Fri, 3 May 2024 10:34:26 +0200 Subject: [PATCH 19/32] Support for external fields --- src/MaxwellSolvers/FDTD.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 6ba94672e..5ad333171 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -657,7 +657,7 @@ namespace ippl { J.accumulateHalo(); } template - void updateBunch(scalar /*time*/){ + void updateBunch(scalar time, callable external_field){ Kokkos::fence(); auto gbview = particles.gamma_beta.getView(); @@ -680,10 +680,16 @@ namespace ippl { Kokkos::fence(); Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ const ippl::Vector pgammabeta = gbview(i); - ippl::Vector E = eview(i); - ippl::Vector B = bview(i); - ippl::Vector labpos = rview(i); - ippl::Vector, 2> EB{E, B}; + ippl::Vector E_grid = eview(i); + ippl::Vector B_grid = bview(i); + + ippl::Vector bunchpos = rview(i); + Kokkos::pair, ippl::Vector> external_eb = external_field(bunchpos, time); + + ippl::Vector, 2> EB{ + ippl::Vector(E_grid + external_eb.first), + ippl::Vector(B_grid + external_eb.second) + }; const scalar charge = qview(i); const scalar mass = mview(i); @@ -725,8 +731,16 @@ namespace ippl { void solve(){ scatterBunch(); field_solver.solve(); - updateBunch(field_solver.dt * steps_taken); - + updateBunch(field_solver.dt * steps_taken, /*no external field*/[]KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/){return Kokkos::pair{ + vector_type(0), + vector_type(0) + };}); + } + template + void solve(callable external_field){ + scatterBunch(); + field_solver.solve(); + updateBunch(field_solver.dt * steps_taken, external_field); } }; } From a72e92ffd2ccaa803cbb7ee0b0b509fc3df6fb78 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 4 May 2024 01:04:56 +0200 Subject: [PATCH 20/32] Particle solwer --- alpine/FreeElectronLaser.cpp | 4 ++-- src/MaxwellSolvers/FDTD.h | 3 ++- src/Particle/ParticleAttrib.hpp | 1 + test/solver/TestFDTDSolverWithParticles.cpp | 26 +++++++++++++++++++-- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp index 5c014e96a..fc26b93b8 100644 --- a/alpine/FreeElectronLaser.cpp +++ b/alpine/FreeElectronLaser.cpp @@ -1774,7 +1774,7 @@ int main(int argc, char* argv[]) { test_gauss_law(64); test_amperes_law(64); - goto exit; + //goto exit; config cfg = read_config("../config.json"); const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); cfg.extents[2] *= frame_gamma; @@ -1873,7 +1873,7 @@ int main(int argc, char* argv[]) { fdtd_state.J = scalar(0.0); fdtd_state.playout.update(fdtd_state.particles); fdtd_state.scatterBunch(); - //std::cout << fdtd_state.J.getVolumeIntegral() << "\n"; + std::cout << i << "\n"; fdtd_state.fieldStep(); fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); auto ldom = layout.getLocalNDIndex(); diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 5ad333171..c3187579c 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -682,7 +682,8 @@ namespace ippl { const ippl::Vector pgammabeta = gbview(i); ippl::Vector E_grid = eview(i); ippl::Vector B_grid = bview(i); - + //std::cout << "E_grid: " << E_grid << "\n"; + //std::cout << "B_grid: " << B_grid << "\n"; ippl::Vector bunchpos = rview(i); Kokkos::pair, ippl::Vector> external_eb = external_field(bunchpos, time); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 3b37f920a..e7ee0b12c 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -194,6 +194,7 @@ namespace ippl { Vector args = index - lDom.first() + nghost; // gather + //std::cout << index << std::endl; dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{}, view, wlo, whi, args); }); diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp index f7b6dbcfd..4b58a1f6c 100644 --- a/test/solver/TestFDTDSolverWithParticles.cpp +++ b/test/solver/TestFDTDSolverWithParticles.cpp @@ -7,6 +7,8 @@ using std::size_t; #include "MaxwellSolvers/FDTD.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include + +#include template requires((std::is_floating_point_v)) KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ @@ -35,7 +37,9 @@ int main(int argc, char* argv[]){ constexpr size_t n = 100; ippl::Vector nr{n / 2, n / 2, 2 * n}; ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); - ippl::Vector extents{scalar(1), scalar(1), scalar(1)}; + ippl::Vector extents{meter_in_unit_lengths, + meter_in_unit_lengths, + meter_in_unit_lengths}; std::array isParallel; isParallel.fill(false); isParallel[2] = true; @@ -50,7 +54,25 @@ int main(int argc, char* argv[]){ ippl::Vector origin = {0,0,0}; ippl::UniformCartesian mesh(owned, hx, origin); - ippl::NSFDSolverWithParticles solver(layout, mesh, 1 << 17); + ippl::NSFDSolverWithParticles solver(layout, mesh, 1); + + auto pview = solver.particles.R.getView(); + auto p1view = solver.particles.R_nm1.getView(); + auto gbview = solver.particles.gamma_beta.getView(); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::parallel_for(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.01 * extents[0]); + pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.01 * extents[1]); + pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.01 * extents[2]); + p1view(i) = pview(i); + gbview(i) = 0; + random_pool.free_state(state); + }); + solver.playout.update(solver.particles); + solver.particles.Q = electron_charge_in_unit_charges; + solver.particles.mass = electron_mass_in_unit_masses; for(int i = 0;i < 10;i++) solver.solve(); } From c4ff2762930e05c0ba7fe056586e9677a11e9a7b Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 4 May 2024 09:44:04 +0200 Subject: [PATCH 21/32] ParticleAttribute: add custom reduction loop --- src/Particle/ParticleAttrib.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 70168a79b..093e2b519 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -130,6 +130,17 @@ namespace ippl { T max(); T min(); T prod(); + template + requires (std::is_invocable_v) + returnType customReduction(reductionLambda lambda, returnType identity){ + auto dview = this->dview_m; + Kokkos::parallel_reduce(*(this->localNum_mp), KOKKOS_LAMBDA(size_t i, returnType& ref){ + lambda(dview(i), ref); + }, identity); + returnType globaltemp; + Comm->allreduce(identity, globaltemp, 1, std::plus()); + return globaltemp; + } private: view_type dview_m; From fd86bdc12dcd018a1a06fc7dc667a1c83dcbe4a5 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 4 May 2024 09:54:18 +0200 Subject: [PATCH 22/32] fdtd particle test --- src/MaxwellSolvers/FDTD.h | 1 + test/solver/TestFDTDSolverWithParticles.cpp | 28 +++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index c3187579c..26ebfbbb6 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -727,6 +727,7 @@ namespace ippl { invalid_count); particles.destroy(invalid, invalid_count); Kokkos::fence(); + playout.update(particles); } void solve(){ diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp index 4b58a1f6c..a48a829e1 100644 --- a/test/solver/TestFDTDSolverWithParticles.cpp +++ b/test/solver/TestFDTDSolverWithParticles.cpp @@ -54,7 +54,7 @@ int main(int argc, char* argv[]){ ippl::Vector origin = {0,0,0}; ippl::UniformCartesian mesh(owned, hx, origin); - ippl::NSFDSolverWithParticles solver(layout, mesh, 1); + ippl::NSFDSolverWithParticles solver(layout, mesh, 1 << 15); auto pview = solver.particles.R.getView(); auto p1view = solver.particles.R_nm1.getView(); @@ -63,18 +63,36 @@ int main(int argc, char* argv[]){ Kokkos::Random_XorShift64_Pool<> random_pool(12345); Kokkos::parallel_for(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ auto state = random_pool.get_state(); - pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.01 * extents[0]); - pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.01 * extents[1]); - pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.01 * extents[2]); + pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.04 * extents[0]); + pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.04 * extents[1]); + pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.04 * extents[2]); p1view(i) = pview(i); gbview(i) = 0; random_pool.free_state(state); }); + { + double var = 0; + Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){ + ippl::Vector pd(pview(i) - (origin + extents * 0.5)); + ref += pd.dot(pd); + }, var); + std::cout << ippl::Vector(var * (1.0 / solver.particles.getLocalNum())) << "\n"; + } solver.playout.update(solver.particles); solver.particles.Q = electron_charge_in_unit_charges; solver.particles.mass = electron_mass_in_unit_masses; - for(int i = 0;i < 10;i++) + for(int i = 0;i < 200;i++){ solver.solve(); + } + { + double var = 0; + Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){ + ippl::Vector pd(pview(i) - (origin + extents * 0.5)); + ref += pd.dot(pd); + }, var); + std::cout << ippl::Vector(var * (1.0 / solver.particles.getLocalNum())) << "\n"; + } + } //exit: ippl::finalize(); From 5731b7f5042f73c98744bd124dcf62e53ed491af Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 4 May 2024 16:03:17 +0200 Subject: [PATCH 23/32] Hack gather routine --- src/MaxwellSolvers/FDTD.h | 4 +++- src/Particle/ParticleAttrib.hpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 26ebfbbb6..32bbdec3f 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -345,6 +345,8 @@ namespace ippl { applyBCs(); } void evaluate_EB(){ + *(Maxwell::En_mp) = typename EMField::value_type(0); + *(Maxwell::Bn_mp) = typename EMField::value_type(0); ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; const scalar idt = scalar(1.0) / dt; auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView(); @@ -611,7 +613,7 @@ namespace ippl { FourField J; ThreeField E; ThreeField B; - NonStandardFDTDSolver field_solver; + NonStandardFDTDSolver field_solver; ippl::Vector nr_global; ippl::Vector hr_m; diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index e7ee0b12c..4a8f0010a 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -186,7 +186,7 @@ namespace ippl { "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + 0.5; + vector_type l = (pp(idx) - origin) * invdx + 1.0; Vector index = l; Vector whi = l - index; Vector wlo = 1.0 - whi; From 427b5f78263655c0d2ae907cb625d3978324232f Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 5 May 2024 10:40:33 +0200 Subject: [PATCH 24/32] helper structs --- alpine/FreeElectronLaser.cpp | 79 +++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp index fc26b93b8..e4d679b4e 100644 --- a/alpine/FreeElectronLaser.cpp +++ b/alpine/FreeElectronLaser.cpp @@ -661,10 +661,30 @@ struct UniaxialLorentzframe{ scalar beta_m; scalar gammaBeta_m; scalar gamma_m; + KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma){ + + UniaxialLorentzframe ret; + ret.gamma_m = gamma; + scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma)); + scalar gammabeta = gamma * beta; + ret.beta_m = beta; + ret.gammaBeta_m = gammabeta; + return ret; + } + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe negative()const noexcept{ + UniaxialLorentzframe ret; + ret.beta_m = -beta_m; + ret.gammaBeta_m = -gammaBeta_m; + ret.gamma_m = gamma_m; + return ret; + } + + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default; KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){ + using Kokkos::sqrt; gammaBeta_m = gammaBeta; - beta_m = gammaBeta / sqrt(1 + gammaBeta * (gammaBeta)); - gamma_m = sqrt(1 + gammaBeta * (gammaBeta)); + beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta); + gamma_m = sqrt(1 + gammaBeta * gammaBeta); } KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{ arg[axis] = gamma_m * (arg[axis] + beta_m * time); @@ -680,7 +700,7 @@ struct UniaxialLorentzframe{ return ret; } KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> inverse_transform_EB(const Kokkos::pair, ippl::Vector>& primedEB)const noexcept{ - return UniaxialLorentzframe(-gammaBeta_m).transform_EB(primedEB); + return negative().transform_EB(primedEB); } }; template @@ -748,7 +768,7 @@ void initializeBunchEllipsoid (BunchInitialize bunchInit, ChargeVector gb = bunchInit.initialGamma_ * bunchInit.betaVector_; FieldVector r (0.0); FieldVector t (0.0); - Double t0, g; + Double t0;//, g; Double zmin = 1e100; Double Ne, bF, bFi; unsigned int bmi; @@ -1297,11 +1317,40 @@ namespace ippl { ippl::ParticleAttrib, 2>> EB_gather; // Electric field container for particle gathering }; + template + struct Undulator{ + undulator_parameters uparams; + scalar distance_to_entry; + scalar k_u; + KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda){} + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> operator()(const ippl::Vector& position_in_lab_frame)const noexcept{ + Kokkos::pair, ippl::Vector> ret; + ret.first.fill(0); + ret.second.fill(0); + if(position_in_lab_frame[2] < distance_to_entry){ + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator < 0); + scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); + ret.second[0] = 0; + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; + } + else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){ + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator >= 0); + ret.second[0] = 0; + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); + } + return ret; + }; + }; + template // clang-format off - struct FDTDFieldState{ + struct FELSimulationState{ //Sorry, can't do more than 3d @@ -1337,16 +1386,18 @@ namespace ippl { Bunch_eb> particles; using bunch_type = Bunch_eb>; config m_config; - + UniaxialLorentzframe ulb; + undulator_parameters uparams; + Undulator undulator; /** * @brief Construct a new FDTDFieldState object * Mesh and resolution parameter are technically redundant - * + * @details ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5) is the frame's gamma factor * @param resolution * @param layout * @param mesch */ - FDTDFieldState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg){ + FELSimulationState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg), ulb(UniaxialLorentzframe::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){ FA_np1.initialize(mesch, layout, 1); FA_n.initialize(mesch, layout, 1); FA_nm1.initialize(mesch, layout, 1); @@ -1658,7 +1709,7 @@ scalar test_gauss_law(uint32_t n){ uint32_t pcount = 1 << 20; config cfg{}; cfg.space_charge = true; - ippl::FDTDFieldState field_state(layout, mesh, pcount, cfg); + ippl::FELSimulationState field_state(layout, mesh, pcount, cfg); field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant auto pview = field_state.particles.R.getView(); @@ -1688,7 +1739,7 @@ scalar test_gauss_law(uint32_t n){ auto lDom = field_state.EB.getLayout().getLocalNDIndex(); std::ofstream line("gauss_line.txt"); - typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + typename ippl::FELSimulationState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); for(unsigned i = 1;i < nr[2];i++){ vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; @@ -1723,7 +1774,7 @@ scalar test_amperes_law(uint32_t n){ uint32_t pcount = 1 << 20; config cfg{};cfg.space_charge = true; - ippl::FDTDFieldState field_state(layout, mesh, pcount, cfg); + ippl::FELSimulationState field_state(layout, mesh, pcount, cfg); field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant auto pview = field_state.particles.R.getView(); @@ -1756,7 +1807,7 @@ scalar test_amperes_law(uint32_t n){ std::ofstream line("ampere_line.txt"); - typename ippl::FDTDFieldState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + typename ippl::FELSimulationState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); for(unsigned i = 1;i < nr[2];i++){ vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; @@ -1831,7 +1882,7 @@ int main(int argc, char* argv[]) { abort(); } - ippl::FDTDFieldState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); + ippl::FELSimulationState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); if(ippl::Comm->rank() == 0){ std::cout << "Init particles: " << std::endl; @@ -2001,6 +2052,6 @@ int main(int argc, char* argv[]) { uint64_t endtime = nanoTime(); std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; } - exit: + //exit: ippl::finalize(); } \ No newline at end of file From 059807ac100669d5910effa6c78b12affeaab6ce Mon Sep 17 00:00:00 2001 From: Manuel Date: Sun, 5 May 2024 13:37:53 +0200 Subject: [PATCH 25/32] FEL-IR working --- alpine/FreeElectronLaser.cpp | 363 ++++++++--------------------------- config.json | 5 +- src/MaxwellSolvers/FDTD.h | 5 +- 3 files changed, 85 insertions(+), 288 deletions(-) diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp index e4d679b4e..10b66f94f 100644 --- a/alpine/FreeElectronLaser.cpp +++ b/alpine/FreeElectronLaser.cpp @@ -1324,6 +1324,11 @@ namespace ippl { scalar k_u; KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda){} KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> operator()(const ippl::Vector& position_in_lab_frame)const noexcept{ + using Kokkos::sin; + using Kokkos::sinh; + using Kokkos::cos; + using Kokkos::cosh; + using Kokkos::exp; Kokkos::pair, ippl::Vector> ret; ret.first.fill(0); ret.second.fill(0); @@ -1365,26 +1370,18 @@ namespace ippl { using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; using VField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; using EBField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; - using view_type = VField_t::view_type; - using ev_view_type = EBField_t::view_type; + using view_type = typename VField_t::view_type; + using ev_view_type = typename EBField_t::view_type; + using e_view_type = typename ThreeField::view_type; + using b_view_type = typename ThreeField::view_type; //Fields - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_np1; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_n; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> FA_nm1; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> J; - ippl::Field, typename ippl::UniformCartesian::DefaultCentering> EB; + ippl::NSFDSolverWithParticles fieldsAndParticles; //Discretization options Vector_t hr_m; ippl::Vector nr_global; ippl::Vector nr_local; - scalar dt; - Mesh_t* mesh_mp; - FieldLayout* layout_mp; - using playout_type = ParticleSpatialLayout; - playout_type playout; - Bunch_eb> particles; - using bunch_type = Bunch_eb>; + //scalar dt; config m_config; UniaxialLorentzframe ulb; undulator_parameters uparams; @@ -1397,18 +1394,8 @@ namespace ippl { * @param layout * @param mesch */ - FELSimulationState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : mesh_mp(&mesch), layout_mp(&layout), playout(layout, mesch), particles(playout), m_config(cfg), ulb(UniaxialLorentzframe::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){ - FA_np1.initialize(mesch, layout, 1); - FA_n.initialize(mesch, layout, 1); - FA_nm1.initialize(mesch, layout, 1); - J.initialize(mesch, layout, 1); - EB.initialize(mesch, layout, 1); - ThreeField Edummy; - Edummy.initialize(mesch, layout, 1); - FA_n = value_type(0); - FA_np1 = value_type(0); - FA_nm1 = value_type(0); - ippl::StandardFDTDSolver solver(J, Edummy, Edummy); + FELSimulationState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : fieldsAndParticles(layout, mesch, nparticles), m_config(cfg), ulb(UniaxialLorentzframe::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){ + hr_m = mesch.getMeshSpacing(); nr_global = ippl::Vector{ uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), @@ -1422,223 +1409,22 @@ namespace ippl { }; //std::cout << "NR_M_g: " << nr_global << "\n"; //std::cout << "NR_M_l: " << nr_local << "\n"; - dt = hr_m[2];//0.5 * std::min(hr_m[0], std::min(hr_m[1], hr_m[2])); - particles.create(nparticles); - setNoBoundaryConditions(); - - } - void setNoBoundaryConditions() { - periodic_bc = false; - typename VField_t::BConds_t vector_bcs; - auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { - vector_bcs[Idx] = std::make_shared>(Idx); - return 0; - }; - auto bcsetter = [bcsetter_single](const std::index_sequence&) { - int x = (bcsetter_single(std::index_sequence{}) ^ ...); - (void)x; - }; - bcsetter(std::make_index_sequence{}); - FA_n .setFieldBC(vector_bcs); - FA_np1.setFieldBC(vector_bcs); - FA_nm1.setFieldBC(vector_bcs); } - void setPeriodicBoundaryConditions() { - periodic_bc = true; - typename VField_t::BConds_t vector_bcs; - auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { - vector_bcs[Idx] = std::make_shared>(Idx); - return 0; - }; - auto bcsetter = [bcsetter_single](const std::index_sequence&) { - int x = (bcsetter_single(std::index_sequence{}) ^ ...); - (void)x; - }; - bcsetter(std::make_index_sequence{}); - FA_n .setFieldBC(vector_bcs); - FA_np1.setFieldBC(vector_bcs); - FA_nm1.setFieldBC(vector_bcs); + scalar dt()const noexcept{ + return fieldsAndParticles.field_solver.dt; } - - void scatterBunch(){ - //ippl::Vector* gammaBeta = this->gammaBeta; - const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; - assert_isreal(volume); - assert_isreal((scalar(1) / volume)); - auto Jview = J.getView(); - auto qview = particles.Q.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto orig = mesh_mp->getOrigin(); - auto hr = mesh_mp->getMeshSpacing(); - auto dt = this->dt; - bool space_charge = m_config.space_charge; - ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); - Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - Vector_t pos = rview(i); - Vector_t to = rview(i); - Vector_t from = rm1view(i); - if(space_charge){ - scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); - } - scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); + void step(){ + //scalar time = fieldsAndParticles.steps_taken * fieldsAndParticles.field_solver.dt; + auto und = this->undulator; + auto lb = this->ulb; + fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector pos, scalar time){ + lb.primedToUnprimed(pos, time); + auto eb = und(pos); + return lb.transform_EB(eb); }); - Kokkos::fence(); - J.accumulateHalo(); } + void computeRadiation(){ - void fieldStep(){ - const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1]))); - nondispersive ndisp{ - .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])), - .a2 = sq(dt / hr_m[0]), - .a4 = sq(dt / hr_m[1]), - .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), - .a8 = sq(dt) - }; - //if(periodic_bc){ - // FA_n.getFieldBC().apply(FA_n); - //} - auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); - auto source = this->J.getView(); - //FA_nm1.fillHalo(); - FA_n.fillHalo(); - ippl::Vector true_nr{this->nr_global[0] + 2, this->nr_global[1] + 2, this->nr_global[2] + 2}; - const auto& ldom = layout_mp->getLocalNDIndex(); - Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - uint32_t ig = i + ldom.first()[0]; - uint32_t jg = j + ldom.first()[1]; - uint32_t kg = k + ldom.first()[2]; - uint32_t val = uint32_t(ig == 1) + (uint32_t(jg == 1) << 1) + (uint32_t(kg == 1) << 2) - + (uint32_t(ig == true_nr[0] - 2) << 3) + (uint32_t(jg == true_nr[1] - 2) << 4) + (uint32_t(kg == true_nr[2] - 2) << 5); - if(!val){ - A_np1(i, j, k) = -A_nm1(i,j,k) - + ndisp.a1 * A_n (i,j,k) - + ndisp.a2 * (calA * A_n(i + 1, j, k - 1) + (1 - 2 * calA) * A_n(i + 1, j, k) + calA * A_n(i + 1, j, k + 1)) - + ndisp.a2 * (calA * A_n(i - 1, j, k - 1) + (1 - 2 * calA) * A_n(i - 1, j, k) + calA * A_n(i - 1, j, k + 1)) - + ndisp.a4 * (calA * A_n(i, j + 1, k - 1) + (1 - 2 * calA) * A_n(i, j + 1, k) + calA * A_n(i, j + 1, k + 1)) - + ndisp.a4 * (calA * A_n(i, j - 1, k - 1) + (1 - 2 * calA) * A_n(i, j - 1, k) + calA * A_n(i, j - 1, k + 1)) - + ndisp.a6 * A_n(i, j, k + 1) + ndisp.a6 * A_n(i, j, k - 1) + ndisp.a8 * source(i, j, k); - } - }); - Kokkos::fence(); - - if(!periodic_bc){ - FA_np1.fillHalo(); - second_order_mur_boundary_conditions bc; - - - bc.apply(this->FA_n, this->FA_nm1, this->FA_np1, dt, true_nr, ldom); - } - Kokkos::deep_copy(this->FA_nm1.getView(), this->FA_n.getView()); - Kokkos::deep_copy(this->FA_n.getView(), this->FA_np1.getView()); - //std::swap(this->A_n, this->A_nm1); - //std::swap(this->A_np1, this->A_n); - - evaluate_EB(); - } - void evaluate_EB(){ - FA_n.fillHalo();//FA_nm1.fillHalo(); - ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; - const scalar idt = scalar(1.0) / dt; - auto A_np1 = this->FA_np1.getView(), A_n = this->FA_n.getView(), A_nm1 = this->FA_nm1.getView(); - auto source = this->J.getView(); - auto EBv = this->EB.getView(); - Kokkos::parallel_for(this->FA_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; - ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; - ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; - ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; - - ippl::Vector grad_phi{ - dAdx[0], dAdy[0], dAdz[0] - }; - ippl::Vector curlA{ - dAdy[3] - dAdz[2], - dAdz[1] - dAdx[3], - dAdx[2] - dAdy[1], - }; - EBv(i,j,k)[0] = -dAdt - grad_phi; - EBv(i,j,k)[1] = curlA; - }); - Kokkos::fence(); - } - template - void updateBunch(scalar time, UniaxialLorentzframe ulb, callable undulator_field){ - - Kokkos::fence(); - auto gbview = particles.gamma_beta.getView(); - auto ebview = particles.EB_gather.getView(); - auto qview = particles.Q.getView(); - auto mview = particles.mass.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto rp1view = particles.R_np1.getView(); - scalar bunch_dt = dt / 3; - Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); - EB.fillHalo(); - Kokkos::fence(); - for(int bts = 0;bts < 3;bts++){ - - particles.EB_gather.gather(EB, particles.R); - Kokkos::fence(); - Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - const ippl::Vector pgammabeta = gbview(i); - ippl::Vector, 2> EB = ebview(i); - ippl::Vector labpos = rview(i); - - ulb.primedToUnprimed(labpos, time); - - Kokkos::pair, ippl::Vector> EB_undulator_frame = undulator_field(labpos); - Kokkos::pair, ippl::Vector> EB_undulator_bunch = ulb.transform_EB(EB_undulator_frame); - assert_isreal((EB_undulator_bunch.first[0])); - assert_isreal((EB_undulator_bunch.first[1])); - assert_isreal((EB_undulator_bunch.first[2])); - assert_isreal((EB_undulator_bunch.second[0])); - assert_isreal((EB_undulator_bunch.second[1])); - assert_isreal((EB_undulator_bunch.second[2])); - EB[0] += EB_undulator_bunch.first; - EB[1] += EB_undulator_bunch.second; - - const scalar charge = qview(i); - const scalar mass = mview(i); - const ippl::Vector t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); - const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); - const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); - const ippl::Vector t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); - const ippl::Vector ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); - - assert_isreal((ngammabeta[0])); - assert_isreal((ngammabeta[1])); - assert_isreal((ngammabeta[2])); - rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); - gbview(i) = ngammabeta; - }); - Kokkos::fence(); - } - Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); - size_t invalid_count = 0; - auto origo = mesh_mp->getOrigin(); - ippl::Vector extenz;// - extenz[0] = nr_global[0] * hr_m[0]; - extenz[1] = nr_global[1] * hr_m[1]; - extenz[2] = nr_global[2] * hr_m[2]; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, particles.getLocalNum()), - KOKKOS_LAMBDA(size_t i, size_t& ref){ - bool out_of_bounds = false; - ippl::Vector ppos = rview(i); - for(size_t d = 0;d < dim;d++){ - out_of_bounds |= (ppos[d] <= origo[d]); - out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain - } - invalid(i) = out_of_bounds; - ref += out_of_bounds; - }, - invalid_count); - particles.destroy(invalid, invalid_count); - Kokkos::fence(); - } }; // clang-format on @@ -1707,9 +1493,9 @@ scalar test_gauss_law(uint32_t n){ ippl::UniformCartesian mesh(owned, hx, origin); uint32_t pcount = 1 << 20; - config cfg{}; - cfg.space_charge = true; - ippl::FELSimulationState field_state(layout, mesh, pcount, cfg); + //config cfg{}; + //cfg.space_charge = true; + ippl::NSFDSolverWithParticles field_state(layout, mesh, pcount); field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant auto pview = field_state.particles.R.getView(); @@ -1732,21 +1518,21 @@ scalar test_gauss_law(uint32_t n){ field_state.J = scalar(0.0); field_state.scatterBunch(); for(size_t i = 0;i < 8*n;i++){ - field_state.fieldStep(); + field_state.field_solver.solve(); } - field_state.evaluate_EB(); + Kokkos::fence(); - auto lDom = field_state.EB.getLayout().getLocalNDIndex(); + auto lDom = field_state.E.getLayout().getLocalNDIndex(); std::ofstream line("gauss_line.txt"); - typename ippl::FELSimulationState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + typename ippl::FELSimulationState::e_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.E.getView()); //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); for(unsigned i = 1;i < nr[2];i++){ vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; pos[2] += hx[2] * scalar(i); - ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); + ippl::Vector ebg = gather_helper(view, pos, origin, hx, lDom); //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; - line << pos.norm() * unit_length_in_meters << " " << ebg[0].norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; } return 0.0f; } @@ -1773,14 +1559,13 @@ scalar test_amperes_law(uint32_t n){ ippl::UniformCartesian mesh(owned, hx, origin); uint32_t pcount = 1 << 20; - config cfg{};cfg.space_charge = true; - ippl::FELSimulationState field_state(layout, mesh, pcount, cfg); + ippl::NSFDSolverWithParticles field_state(layout, mesh, pcount); field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant auto pview = field_state.particles.R.getView(); auto p1view = field_state.particles.R_nm1.getView(); constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; - scalar timestep = field_state.dt; + scalar timestep = field_state.field_solver.dt; //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); @@ -1799,22 +1584,22 @@ scalar test_amperes_law(uint32_t n){ field_state.J = scalar(0.0); field_state.scatterBunch(); for(size_t i = 0;i < 8*n;i++){ - field_state.fieldStep(); + field_state.field_solver.solve(); } - field_state.evaluate_EB(); + field_state.field_solver.evaluate_EB(); Kokkos::fence(); - auto lDom = field_state.EB.getLayout().getLocalNDIndex(); + auto lDom = field_state.B.getLayout().getLocalNDIndex(); std::ofstream line("ampere_line.txt"); - typename ippl::FELSimulationState::ev_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.EB.getView()); + typename ippl::FELSimulationState::b_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.B.getView()); //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); for(unsigned i = 1;i < nr[2];i++){ vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; pos[2] += hx[2] * scalar(i); - ippl::Vector, 2> ebg = gather_helper(view, pos, origin, hx, lDom); + ippl::Vector ebg = gather_helper(view, pos, origin, hx, lDom); //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; - line << pos.norm() * unit_length_in_meters << " " << ebg[1][0] * unit_magnetic_fluxdensity_in_tesla << "\n"; + line << pos.norm() * unit_length_in_meters << " " << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n"; } return 0.0f; } @@ -1835,7 +1620,7 @@ int main(int argc, char* argv[]) { const scalar frame_gammabeta = frame_gamma * frame_beta; UniaxialLorentzframe frame_boost(frame_gammabeta); ippl::undulator_parameters uparams(cfg); - const scalar k_u = scalar(2.0 * M_PI) / uparams.lambda; + /*const scalar k_u = scalar(2.0 * M_PI) / uparams.lambda; const scalar distance_to_entry = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma); auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector& position_in_lab_frame){ Kokkos::pair, ippl::Vector> ret; @@ -1859,7 +1644,7 @@ int main(int argc, char* argv[]) { } return ret; - }; + };*/ BunchInitialize mithra_config = generate_mithra_config(cfg, frame_boost); ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); @@ -1886,28 +1671,28 @@ int main(int argc, char* argv[]) { if(ippl::Comm->rank() == 0){ std::cout << "Init particles: " << std::endl; - size_t actual_pc = initialize_bunch_mithra(fdtd_state.particles, mithra_config, frame_gamma); - fdtd_state.particles.Q = cfg.charge / actual_pc; - fdtd_state.particles.mass = cfg.mass / actual_pc; + size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles, mithra_config, frame_gamma); + fdtd_state.fieldsAndParticles.particles.Q = cfg.charge / actual_pc; + fdtd_state.fieldsAndParticles.particles.mass = cfg.mass / actual_pc; } else{ - fdtd_state.particles.create(0); + fdtd_state.fieldsAndParticles.particles.create(0); } { - auto rview = fdtd_state.particles.R.getView(); - auto rm1view = fdtd_state.particles.R_nm1.getView(); - ippl::Vector meanpos = fdtd_state.particles.R.sum() * (1.0 / fdtd_state.particles.getTotalNum()); + auto rview = fdtd_state.fieldsAndParticles.particles.R.getView(); + auto rm1view = fdtd_state.fieldsAndParticles.particles.R_nm1.getView(); + ippl::Vector meanpos = fdtd_state.fieldsAndParticles.particles.R.sum() * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum()); - Kokkos::parallel_for(fdtd_state.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + Kokkos::parallel_for(fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ rview(i) -= meanpos; rm1view(i) -= meanpos; }); } - fdtd_state.particles.setParticleBC(ippl::NO); + fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO); //fdtd_state.scatterBunch(); //std::cout << cfg.charge << "\n"; - size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt); + size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt()); uint64_t starttime = nanoTime(); std::ofstream rad; FILE* ffmpeg_file = nullptr; @@ -1921,20 +1706,24 @@ int main(int argc, char* argv[]) { for(size_t i = 0;i < timesteps_required;i++){ - fdtd_state.J = scalar(0.0); - fdtd_state.playout.update(fdtd_state.particles); - fdtd_state.scatterBunch(); + + //fdtd_state.J = scalar(0.0); + //fdtd_state.playout.update(fdtd_state.particles); + //fdtd_state.scatterBunch(); std::cout << i << "\n"; - fdtd_state.fieldStep(); - fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); + fdtd_state.step(); + //fdtd_state.fieldStep(); + //fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); auto ldom = layout.getLocalNDIndex(); auto nrg = fdtd_state.nr_global; - auto ebv = fdtd_state.EB.getView(); + auto eview = fdtd_state.fieldsAndParticles.E.getView(); + auto bview = fdtd_state.fieldsAndParticles.B.getView(); + //auto ebv = fdtd_state.EB.getView(); double radiation = 0.0; - Kokkos::parallel_reduce(ippl::getRangePolicy(fdtd_state.EB.getView(), 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ + Kokkos::parallel_reduce(ippl::getRangePolicy(eview, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ //uint32_t ig = i + ldom.first()[0]; //uint32_t jg = j + ldom.first()[1]; - Kokkos::pair, ippl::Vector> buncheb{ebv(i,j,k)[0], ebv(i,j,k)[1]}; + Kokkos::pair, ippl::Vector> buncheb{eview(i,j,k), bview(i,j,k)}; ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; uint32_t kg = k + ldom.first()[2]; @@ -1950,7 +1739,7 @@ int main(int argc, char* argv[]) { MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); if(ippl::Comm->rank() == 0){ ippl::Vector pos{0,0,0}; - frame_boost.primedToUnprimed(pos, fdtd_state.dt * i); + frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i); rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; } //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n"; @@ -1968,9 +1757,9 @@ int main(int argc, char* argv[]) { int floatcount = img_width * img_height * 3; uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float)); - auto phmirror = fdtd_state.particles.R.getHostMirror(); - Kokkos::deep_copy(phmirror, fdtd_state.particles.R.getView()); - for(size_t hi = 0;hi < fdtd_state.particles.getLocalNum();hi++){ + auto phmirror = fdtd_state.fieldsAndParticles.particles.R.getHostMirror(); + Kokkos::deep_copy(phmirror, fdtd_state.fieldsAndParticles.particles.R.getView()); + for(size_t hi = 0;hi < fdtd_state.fieldsAndParticles.particles.getLocalNum();hi++){ ippl::Vector ppos = phmirror(hi); ppos -= mesh.getOrigin(); ppos /= cfg.extents.cast(); @@ -1984,8 +1773,11 @@ int main(int argc, char* argv[]) { std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity); } }; - auto ebh = fdtd_state.EB.getHostMirror(); - Kokkos::deep_copy(ebh, fdtd_state.EB.getView()); + + auto eh = fdtd_state.fieldsAndParticles.E.getHostMirror(); + auto bh = fdtd_state.fieldsAndParticles.B.getHostMirror(); + Kokkos::deep_copy(eh, fdtd_state.fieldsAndParticles.E.getView()); + Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView()); //double exp_avg = double(exp_sum) / double(acount); { @@ -1995,11 +1787,12 @@ int main(int argc, char* argv[]) { int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2; if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){ if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){ - ippl::Vector, 2> acc = ebh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); + ippl::Vector E = eh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); + ippl::Vector B = bh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); - ippl::Vector poynting = acc[0].cross(acc[1]); + ippl::Vector poynting = E.cross(B); Kokkos::pair, ippl::Vector> eblab = frame_boost.inverse_transform_EB( - Kokkos::make_pair, ippl::Vector>(acc[0], acc[1]) + Kokkos::make_pair, ippl::Vector>(E, B) ); ippl::Vector poyntinglab = eblab.first.cross(eblab.second); poynting = poyntinglab; diff --git a/config.json b/config.json index 0948650ee..f5cd3b99f 100644 --- a/config.json +++ b/config.json @@ -4,7 +4,7 @@ "length-scale" : "micrometers", "time-scale" : "picoseconds", "extents": [3400.0, 3400.0, 280.0], - "resolution": [32, 32, 800], + "resolution": [64, 64, 2800], "mesh-center": [0.0, 0.0, 0.0], "total-time": 30000.0, "bunch-time-step": 1.6, @@ -19,7 +19,7 @@ "_mass": 1, "charge": 1.846e8, "mass": 1.846e8, - "number-of-particles": 50000, + "number-of-particles": 1000000, "gamma": 100.41, "_gamma": 1.0, "direction (ignored)": [0.0, 0.0, 1.0], @@ -61,6 +61,7 @@ "output":{ "rhythm" : 30, "count" : 20, + "path": "../renderdata", "type" : "E", "track" : { "radiation" : "boundary_radiation.txt", diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 32bbdec3f..574026d73 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -687,7 +687,7 @@ namespace ippl { //std::cout << "E_grid: " << E_grid << "\n"; //std::cout << "B_grid: " << B_grid << "\n"; ippl::Vector bunchpos = rview(i); - Kokkos::pair, ippl::Vector> external_eb = external_field(bunchpos, time); + Kokkos::pair, ippl::Vector> external_eb = external_field(bunchpos, time + bunch_dt * bts); ippl::Vector, 2> EB{ ippl::Vector(E_grid + external_eb.first), @@ -739,12 +739,15 @@ namespace ippl { vector_type(0), vector_type(0) };}); + ++steps_taken; } template void solve(callable external_field){ scatterBunch(); field_solver.solve(); + //std::cout << field_solver.dt * steps_taken << "\n"; updateBunch(field_solver.dt * steps_taken, external_field); + ++steps_taken; } }; } From b1ed73b19bc7c9354751d182707f9e0df00056bc Mon Sep 17 00:00:00 2001 From: Manuel Date: Tue, 7 May 2024 15:03:52 +0200 Subject: [PATCH 26/32] Fix FDTD visibility --- src/MaxwellSolvers/FDTD.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 574026d73..da6c727e2 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -295,6 +295,8 @@ namespace ippl { bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); } } + + public: template struct nondispersive{ scalar a1; @@ -303,7 +305,6 @@ namespace ippl { scalar a6; scalar a8; }; - public: void step(){ const auto& ldom = layout_mp->getLocalNDIndex(); const int nghost = A_n.getNghost(); From e164a96930d204d4e97911cc0fa41761ecc25b16 Mon Sep 17 00:00:00 2001 From: Manuel Date: Tue, 7 May 2024 16:20:07 +0200 Subject: [PATCH 27/32] hack and fix --- alpine/FreeElectronLaser.cpp | 11 ++++++----- src/MaxwellSolvers/FDTD.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp index 10b66f94f..aaafd3b65 100644 --- a/alpine/FreeElectronLaser.cpp +++ b/alpine/FreeElectronLaser.cpp @@ -1604,12 +1604,12 @@ scalar test_amperes_law(uint32_t n){ return 0.0f; } int main(int argc, char* argv[]) { - using scalar = double; + using scalar = float; ippl::initialize(argc, argv); { - test_gauss_law(64); - test_amperes_law(64); + //test_gauss_law(64); + //test_amperes_law(64); //goto exit; config cfg = read_config("../config.json"); const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); @@ -1738,7 +1738,7 @@ int main(int argc, char* argv[]) { double radiation_in_watt_global = 0.0; MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); if(ippl::Comm->rank() == 0){ - ippl::Vector pos{0,0,0}; + ippl::Vector pos{0,0,(float)cfg.extents[2]}; frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i); rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; } @@ -1843,7 +1843,8 @@ int main(int argc, char* argv[]) { } } uint64_t endtime = nanoTime(); - std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; + if(ippl::Comm->rank() == 0) + std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; } //exit: ippl::finalize(); diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index da6c727e2..a1cecb131 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -630,7 +630,7 @@ namespace ippl { steps_taken = 0; //field_solver.setEMFields(E, B); } - template + template void scatterBunch(){ //ippl::Vector* gammaBeta = this->gammaBeta; auto hr_m = mesh_mp->getMeshSpacing(); From 7f1f3963b438b4e5a60530359ca98b0c45156379 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 18 May 2024 13:35:02 +0200 Subject: [PATCH 28/32] Add comments, remove dead lines --- alpine/FreeElectronLaser.cpp | 397 ++++++++++++++++---------------- src/MaxwellSolvers/FDTD.h | 6 +- src/Particle/ParticleAttrib.h | 2 +- src/Particle/ParticleAttrib.hpp | 7 +- 4 files changed, 210 insertions(+), 202 deletions(-) diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp index aaafd3b65..f03190583 100644 --- a/alpine/FreeElectronLaser.cpp +++ b/alpine/FreeElectronLaser.cpp @@ -329,39 +329,39 @@ KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector f struct config { using scalar = double; - - //using length_unit = funits::length; - //using duration_unit = funits::time; + // GRID PARAMETERS - ippl::Vector resolution; - - ippl::Vector extents; - scalar total_time; - scalar timestep_ratio; - - scalar length_scale_in_jobfile, temporal_scale_in_jobfile; - - // All in unit_charge, or unit_mass - scalar charge, mass; - - uint64_t num_particles; - bool space_charge; - - // BUNCH PARAMETERS - ippl::Vector mean_position; - ippl::Vector sigma_position; - ippl::Vector position_truncations; - ippl::Vector sigma_momentum; - scalar bunch_gamma; - - scalar undulator_K; - scalar undulator_period; - scalar undulator_length; - - uint32_t output_rhythm; - std::string output_path; - std::unordered_map experiment_options; + ippl::Vector resolution; // Grid resolution in 3D + ippl::Vector extents; // Physical extents of the grid in each dimension + scalar total_time; // Total simulation time + scalar timestep_ratio; // Ratio of timestep to some reference value + + scalar length_scale_in_jobfile; // Length scale defined in the jobfile + scalar temporal_scale_in_jobfile; // Temporal scale defined in the jobfile + + // PARTICLE PARAMETERS + scalar charge; // Particle charge in unit_charge + scalar mass; // Particle mass in unit_mass + uint64_t num_particles; // Number of particles in the simulation + bool space_charge; // Flag for considering space charge effects + + // BUNCH PARAMETERS + ippl::Vector mean_position; // Mean initial position of the particle bunch + ippl::Vector sigma_position; // Standard deviation of the initial position distribution + ippl::Vector position_truncations; // Truncations of the position distribution + ippl::Vector sigma_momentum; // Standard deviation of the initial momentum distribution + scalar bunch_gamma; // Relativistic gamma factor of the bunch + + // UNDULATOR PARAMETERS + scalar undulator_K; // Undulator parameter K + scalar undulator_period; // Period of the undulator + scalar undulator_length; // Length of the undulator + + uint32_t output_rhythm; // Frequency of output in timesteps + std::string output_path; // Path to output files + std::unordered_map experiment_options; // Additional experimental options }; + template ippl::Vector getVector(const nlohmann::json& j){ if(j.is_array()){ @@ -646,12 +646,6 @@ struct BunchInitialize { // BunchInitialize (); }; - - - - -//END CONFIG - //LORENTZ FRAME AND UNDULATOR template struct UniaxialLorentzframe{ @@ -849,7 +843,7 @@ void initializeBunchEllipsoid (BunchInitialize bunchInit, ChargeVector bunchInit, ChargeVector bunchInit, ChargeVector bunchInit, ChargeVector bunchInit, ChargeVector bunchInit, ChargeVector bunchInit, ChargeVector cross_prod(const ippl::Vector& a return ret; } template -KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos){ - //return pear, ippl::Vector>{ippl::Vector{5,5,5}, ippl::Vector{0,0,0}}; - //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); +KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos, int nghost = 1){ Kokkos::pair, ippl::Vector> ret; - //pos -= spacing * T(0.5); ippl::Vector relpos = pos - origin; ippl::Vector gridpos = relpos / hr; ippl::Vector ipos; @@ -1116,17 +1096,15 @@ KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gr for(unsigned k = 0;k < 3;k++){ fracpos[k] = gridpos[k] - (int)ipos[k]; } - //TODO: NGHOST!!!!!!! - ipos += ippl::Vector(1); + ipos += ippl::Vector(nghost); ret.first = ipos; ret.second = fracpos; return ret; } template -KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ - auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value, int nghost = 1){ + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost); ipos -= ldom.first(); - //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; if( ipos[0] < 0 ||ipos[1] < 0 @@ -1170,9 +1148,9 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view assert(abs(accum - 1.0f) < 1e-6f); } template -KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value, int nghost = 1){ //std::cout << "Value: " << value << "\n"; - auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost); ipos -= ldom.first(); if( ipos[0] < 0 @@ -1220,21 +1198,11 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view } template -KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ +KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor, int nghost = 1){ Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to ); - //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]); - for(int d = 0;d < 3;d++){ - //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ - // std::cout <(nghost); - //to_ipos += ippl::Vector(nghost); if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); @@ -1242,7 +1210,6 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view return; } ippl::Vector relay; - const int nghost = 1; const ippl::Vector orig = origin; using Kokkos::max; using Kokkos::min; @@ -1255,23 +1222,21 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); } -// END PREAMBLE namespace ippl { template struct undulator_parameters{ - scalar lambda; //MITHRA: lambda_u + scalar lambda_u; //MITHRA: lambda_u scalar K; //Undulator parameter scalar length; scalar B_magnitude; - undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda(lambda_u), K(K_undulator_parameter), length(_length){ + undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda_u(lambda_u), K(K_undulator_parameter), length(_length){ B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u); - //std::cout << "Setting bmag: " << B_magnitude << "\n"; } - undulator_parameters(const config& cfg): lambda(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){ - B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda); + undulator_parameters(const config& cfg): lambda_u(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){ + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u); } }; @@ -1317,115 +1282,179 @@ namespace ippl { ippl::ParticleAttrib, 2>> EB_gather; // Electric field container for particle gathering }; + + + /** + * @brief Struct representing an undulator. + * + * @tparam scalar Type of the scalar values (e.g., float, double). + */ template - struct Undulator{ - undulator_parameters uparams; - scalar distance_to_entry; - scalar k_u; - KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda){} - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> operator()(const ippl::Vector& position_in_lab_frame)const noexcept{ + struct Undulator { + undulator_parameters uparams; ///< Parameters of the undulator. + scalar distance_to_entry; ///< Distance to the entry of the undulator. + scalar k_u; ///< Wavenumber of the undulator. + + /** + * @brief Constructor to initialize undulator parameters and calculate k_u. + * + * @param p Parameters of the undulator. + * @param dte Distance to the entry of the undulator. + */ + KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) + : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda_u) {} + + /** + * @brief Overloaded operator() to compute magnetic field components. + * + * @param position_in_lab_frame Position vector in the lab frame. + * @return Kokkos::pair, ippl::Vector> + * Pair containing magnetic field and its derivative. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + operator()(const ippl::Vector& position_in_lab_frame) const noexcept { using Kokkos::sin; using Kokkos::sinh; using Kokkos::cos; using Kokkos::cosh; using Kokkos::exp; - Kokkos::pair, ippl::Vector> ret; - ret.first.fill(0); - ret.second.fill(0); - if(position_in_lab_frame[2] < distance_to_entry){ + + Kokkos::pair, ippl::Vector> ret; // Return pair containing magnetic field and its derivative. + ret.first.fill(0); // Initialize magnetic field vector. + ret.second.fill(0); // Initialize derivative vector. + + // If the position is before the undulator entry. + if (position_in_lab_frame[2] < distance_to_entry) { scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator < 0); - scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); - ret.second[0] = 0; - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; + assert(z_in_undulator < 0); // Ensure we are in the correct region. + scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); // Gaussian decay factor. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; // z-component. } - else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){ + // If the position is within the undulator. + else if (position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length) { scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator >= 0); - ret.second[0] = 0; - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); + assert(z_in_undulator >= 0); // Ensure we are in the correct region. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); // z-component. } return ret; - }; + } }; - + /** + * @brief Struct representing the state of an FEL (Free Electron Laser) simulation. + * + * @tparam scalar Type of the scalar values (e.g., float, double). + */ template - // clang-format off - struct FELSimulationState{ - - //Sorry, can't do more than 3d + struct FELSimulationState { + // clang-format off - constexpr static unsigned int dim = 3; - using Vector_t = ippl::Vector; - using value_type = ippl::Vector; - using EB_type = ippl::Vector, 2>; - using Mesh_t = ippl::UniformCartesian; + constexpr static unsigned int dim = 3; ///< Dimensionality of the simulation (3D). + using Vector_t = ippl::Vector; ///< Type alias for a 3D vector. + using value_type = ippl::Vector; ///< Type alias for a 4D vector. + using EB_type = ippl::Vector, 2>; ///< Type alias for an electric and magnetic field vector pair. + using Mesh_t = ippl::UniformCartesian; ///< Type alias for a uniform Cartesian mesh. - bool periodic_bc; + bool periodic_bc; ///< Flag indicating if periodic boundary conditions are used. + + /// Type alias for a field with 4D vector values. using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for a field with 3D vector values. using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for a field with 4D vector values (repeated for consistency). using VField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; - using EBField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for a field with electric and magnetic field vector pairs. + using EBField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for the view type of a field with 4D vector values. using view_type = typename VField_t::view_type; + + /// Type alias for the view type of a field with electric and magnetic field vector pairs. using ev_view_type = typename EBField_t::view_type; + + /// Type alias for the view type of a field with 3D vector values (electric field). using e_view_type = typename ThreeField::view_type; + + /// Type alias for the view type of a field with 3D vector values (magnetic field). using b_view_type = typename ThreeField::view_type; - //Fields + + /// Solver and particle handler. ippl::NSFDSolverWithParticles fieldsAndParticles; - - //Discretization options - Vector_t hr_m; - ippl::Vector nr_global; - ippl::Vector nr_local; - //scalar dt; - config m_config; - UniaxialLorentzframe ulb; - undulator_parameters uparams; - Undulator undulator; + + Vector_t hr_m; ///< Mesh spacing vector. + ippl::Vector nr_global; ///< Global number of cells in each dimension. + ippl::Vector nr_local; ///< Local number of cells in each dimension. + config m_config; ///< Configuration object. + UniaxialLorentzframe ulb; ///< Uniaxial Lorentz frame transformation along z-axis. + undulator_parameters uparams; ///< Undulator parameters. + Undulator undulator; ///< Undulator object. + /** - * @brief Construct a new FDTDFieldState object - * Mesh and resolution parameter are technically redundant - * @details ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5) is the frame's gamma factor - * @param resolution - * @param layout - * @param mesch + * @brief Construct a new FELSimulationState object. + * + * Initializes the simulation state with the given field layout, mesh, number of particles, and configuration. + * + * @details + * - `ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5)` is the frame's gamma factor. + * + * @param layout Field layout. + * @param mesch Mesh object. + * @param nparticles Number of particles. + * @param cfg Configuration object. */ - FELSimulationState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) : fieldsAndParticles(layout, mesch, nparticles), m_config(cfg), ulb(UniaxialLorentzframe::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), uparams(cfg), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m){ + FELSimulationState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) + : fieldsAndParticles(layout, mesch, nparticles), + m_config(cfg), + ulb(UniaxialLorentzframe::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), + uparams(cfg), + undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m) { hr_m = mesch.getMeshSpacing(); - nr_global = ippl::Vector{ + nr_global = ippl::Vector{ uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1) }; - nr_local = ippl::Vector{ + nr_local = ippl::Vector{ uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1), uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1), uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1) }; - //std::cout << "NR_M_g: " << nr_global << "\n"; - //std::cout << "NR_M_l: " << nr_local << "\n"; } - scalar dt()const noexcept{ + + /** + * @brief Get the time step size. + * + * @return scalar Time step size. + */ + scalar dt() const noexcept { return fieldsAndParticles.field_solver.dt; } - void step(){ - //scalar time = fieldsAndParticles.steps_taken * fieldsAndParticles.field_solver.dt; + + /** + * @brief Perform a simulation step. + * + * Solves the fields and updates the state using the undulator and Lorentz frame transformations. + */ + void step() { auto und = this->undulator; auto lb = this->ulb; - fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector pos, scalar time){ + fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector pos, scalar time) { lb.primedToUnprimed(pos, time); auto eb = und(pos); return lb.transform_EB(eb); }); } - void computeRadiation(){ - - } }; // clang-format on } // namespace ippl @@ -1446,22 +1475,41 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { return true; } +/** + * @brief Helper function to gather data from a field using interpolation. + * + * This function computes the value at a given position by interpolating the values in the field. + * The position is transformed from physical coordinates to grid coordinates and then used to + * gather data from the field. The interpolation weights are computed based on the relative + * positions within the grid cells. + * + * @tparam View Type of the field view. + * @tparam T Scalar type for coordinates and grid spacing. + * @tparam Dim Dimensionality of the position vector. + * + * @param v The field view from which data is to be gathered. + * @param pos The position vector in physical coordinates where the data is to be gathered. + * @param origin The origin of the grid in physical coordinates. + * @param hr The grid spacing vector. + * @param lDom The local domain indices of the grid. + * + * @return typename View::value_type The interpolated value at the specified position. + */ template KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector& pos, const ippl::Vector& origin, const ippl::Vector& hr, const ippl::NDIndex<3>& lDom){ using vector_type = ippl::Vector; vector_type l; - //vector_type origin = v.get_mesh().getOrigin(); - //auto lDom = v.getLayout().getLocalNDIndex(); - //vector_type hr = v.get_mesh().getMeshSpacing(); + for(unsigned k = 0;k < 3;k++){ - l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented wrong + l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented in a way such that this 1 is necessary here } ippl::Vector index{int(l[0]), int(l[1]), int(l[2])}; ippl::Vector whi = l - index; ippl::Vector wlo(1.0); wlo -= whi; + //TODO: nghost ippl::Vector args = index - lDom.first() + 1; for(unsigned k = 0;k < 3;k++){ @@ -1469,7 +1517,6 @@ KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, co return typename View::value_type(0); } } - //std::cout << pos << " 's Gather args (will have 1 subtracted): " << args << "\n"; return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/; } @@ -1501,12 +1548,9 @@ scalar test_gauss_law(uint32_t n){ auto pview = field_state.particles.R.getView(); auto p1view = field_state.particles.R_nm1.getView(); - //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); - //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ - //bunch.gammaBeta[i].fill(scalar(0)); auto state = random_pool.get_state(); pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths); @@ -1526,12 +1570,10 @@ scalar test_gauss_law(uint32_t n){ std::ofstream line("gauss_line.txt"); typename ippl::FELSimulationState::e_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.E.getView()); - //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); for(unsigned i = 1;i < nr[2];i++){ vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; pos[2] += hx[2] * scalar(i); ippl::Vector ebg = gather_helper(view, pos, origin, hx, lDom); - //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; line << pos.norm() * unit_length_in_meters << " " << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; } return 0.0f; @@ -1546,10 +1588,10 @@ scalar test_amperes_law(uint32_t n){ isParallel.fill(false); isParallel[2] = true; - // all parallel layout, standard domain, normal axis order + // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); - //[-1, 1] box + //[-1, 1] box ippl::Vector hx; for(unsigned d = 0;d < 3;d++){ hx[d] = extents[d] / (scalar)nr[d]; @@ -1566,9 +1608,7 @@ scalar test_amperes_law(uint32_t n){ auto p1view = field_state.particles.R_nm1.getView(); constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; scalar timestep = field_state.field_solver.dt; - //constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); - //scalar dt = 0.5 ** std::min_element(hx.begin(), hx.end()); Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ //bunch.gammaBeta[i].fill(scalar(0)); @@ -1620,31 +1660,7 @@ int main(int argc, char* argv[]) { const scalar frame_gammabeta = frame_gamma * frame_beta; UniaxialLorentzframe frame_boost(frame_gammabeta); ippl::undulator_parameters uparams(cfg); - /*const scalar k_u = scalar(2.0 * M_PI) / uparams.lambda; - const scalar distance_to_entry = std::max(0.0 * uparams.lambda, 2.0 * cfg.sigma_position[2] * frame_gamma * frame_gamma); - auto undulator_field = KOKKOS_LAMBDA(const ippl::Vector& position_in_lab_frame){ - Kokkos::pair, ippl::Vector> ret; - ret.first.fill(0); - ret.second.fill(0); - - if(position_in_lab_frame[2] < distance_to_entry){ - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator < 0); - scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); - ret.second[0] = 0; - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; - } - else if(position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length){ - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator >= 0); - ret.second[0] = 0; - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); - } - return ret; - };*/ BunchInitialize mithra_config = generate_mithra_config(cfg, frame_boost); ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); @@ -1689,8 +1705,6 @@ int main(int argc, char* argv[]) { }); } fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO); - //fdtd_state.scatterBunch(); - //std::cout << cfg.charge << "\n"; size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt()); uint64_t starttime = nanoTime(); @@ -1706,23 +1720,16 @@ int main(int argc, char* argv[]) { for(size_t i = 0;i < timesteps_required;i++){ - - //fdtd_state.J = scalar(0.0); - //fdtd_state.playout.update(fdtd_state.particles); - //fdtd_state.scatterBunch(); - std::cout << i << "\n"; + if(ippl::Comm->rank() == 0){ + std::cout << "Doing step: " << i << std::endl; + } fdtd_state.step(); - //fdtd_state.fieldStep(); - //fdtd_state.updateBunch(i * fdtd_state.dt, frame_boost, undulator_field); auto ldom = layout.getLocalNDIndex(); auto nrg = fdtd_state.nr_global; auto eview = fdtd_state.fieldsAndParticles.E.getView(); auto bview = fdtd_state.fieldsAndParticles.B.getView(); - //auto ebv = fdtd_state.EB.getView(); double radiation = 0.0; Kokkos::parallel_reduce(ippl::getRangePolicy(eview, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ - //uint32_t ig = i + ldom.first()[0]; - //uint32_t jg = j + ldom.first()[1]; Kokkos::pair, ippl::Vector> buncheb{eview(i,j,k), bview(i,j,k)}; ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; @@ -1742,8 +1749,7 @@ int main(int argc, char* argv[]) { frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i); rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; } - //std::cout << "A: " << fdtd_state.FA_n.getVolumeIntegral() << "\n"; - //std::cout << "J: " << fdtd_state.J.getVolumeIntegral() << "\n"; + int rank = ippl::Comm->rank(); int size = ippl::Comm->size(); if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ @@ -1779,7 +1785,6 @@ int main(int argc, char* argv[]) { Kokkos::deep_copy(eh, fdtd_state.fieldsAndParticles.E.getView()); Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView()); - //double exp_avg = double(exp_sum) / double(acount); { for(int i = 1;i < img_width;i++){ for(int j = 1;j < img_height;j++){ @@ -1809,10 +1814,12 @@ int main(int argc, char* argv[]) { } } } + /* + * This code should be replaced by the Image Gathering anyway, but that's a different PR + */ int mask = 1; while (mask < size) { int partner = rank ^ mask; - //if((rank & (mask - 1)) == 0) { if ((rank & mask) == 0) { // Send data to partner diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index a1cecb131..52ec14529 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -57,7 +57,7 @@ namespace ippl { enum fdtd_bc{ periodic, absorbing }; - + template class StandardFDTDSolver : Maxwell { public: @@ -678,8 +678,8 @@ namespace ippl { Kokkos::fence(); for(int bts = 0;bts < 3;bts++){ - particles.E_gather.gather(E, particles.R); - particles.B_gather.gather(B, particles.R); + particles.E_gather.gather(E, particles.R, /*offset = */0.0); + particles.B_gather.gather(B, particles.R, /*offset = */0.0); Kokkos::fence(); Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ const ippl::Vector pgammabeta = gbview(i); diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 093e2b519..0702489f3 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -124,7 +124,7 @@ namespace ippl { const ParticleAttrib, Properties...>& pp) const; template - void gather(Field& f, const ParticleAttrib, Properties...>& pp); + void gather(Field& f, const ParticleAttrib, Properties...>& pp, const typename Field::Mesh_t::vector_type::value_type offset = 0.5); T sum(); T max(); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 4a8f0010a..7e0a58fbf 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -155,7 +155,8 @@ namespace ippl { template template void ParticleAttrib::gather( - Field& f, const ParticleAttrib, Properties...>& pp) { + Field& f, const ParticleAttrib, Properties...>& pp, + const typename Field::Mesh_t::vector_type::value_type offset) { constexpr unsigned Dim = Field::dim; using PositionType = typename Field::Mesh_t::value_type; @@ -186,7 +187,7 @@ namespace ippl { "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + 1.0; + vector_type l = (pp(idx) - origin) * invdx + (1.0 - offset); Vector index = l; Vector whi = l - index; Vector wlo = 1.0 - whi; @@ -194,7 +195,7 @@ namespace ippl { Vector args = index - lDom.first() + nghost; // gather - //std::cout << index << std::endl; + // std::cout << index << std::endl; dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{}, view, wlo, whi, args); }); From 5d6c9743095c0643f548532e17389288e358f026 Mon Sep 17 00:00:00 2001 From: Manuel Date: Sat, 18 May 2024 14:49:50 +0200 Subject: [PATCH 29/32] File Split and clang-format --- alpine/FreeElectronLaser.cpp | 1924 +++++++++---------- alpine/Undulator.h | 238 +++ alpine/units.h | 37 + src/MaxwellSolvers/FDTD.h | 784 ++++---- test/solver/TestFDTDSolverWithParticles.cpp | 2 +- 5 files changed, 1551 insertions(+), 1434 deletions(-) create mode 100644 alpine/Undulator.h create mode 100644 alpine/units.h diff --git a/alpine/FreeElectronLaser.cpp b/alpine/FreeElectronLaser.cpp index f03190583..e2a0c8551 100644 --- a/alpine/FreeElectronLaser.cpp +++ b/alpine/FreeElectronLaser.cpp @@ -1,412 +1,238 @@ #include "Ippl.h" +#include +#include // For popen + #include "Types/Vector.h" #include "Field/Field.h" + #include "MaxwellSolvers/FDTD.h" -#include -#include // For popen -#define JSON_HAS_RANGES 0 //Merlin compilation -#include +#define JSON_HAS_RANGES 0 // Merlin compilation +#include #include +#include #include -#include #define STB_IMAGE_WRITE_IMPLEMENTATION #include +#include "Undulator.h" +#include "units.h" constexpr float turbo_cm[256][3] = { - {0.18995,0.07176,0.23217}, - {0.19483,0.08339,0.26149}, - {0.19956,0.09498,0.29024}, - {0.20415,0.10652,0.31844}, - {0.20860,0.11802,0.34607}, - {0.21291,0.12947,0.37314}, - {0.21708,0.14087,0.39964}, - {0.22111,0.15223,0.42558}, - {0.22500,0.16354,0.45096}, - {0.22875,0.17481,0.47578}, - {0.23236,0.18603,0.50004}, - {0.23582,0.19720,0.52373}, - {0.23915,0.20833,0.54686}, - {0.24234,0.21941,0.56942}, - {0.24539,0.23044,0.59142}, - {0.24830,0.24143,0.61286}, - {0.25107,0.25237,0.63374}, - {0.25369,0.26327,0.65406}, - {0.25618,0.27412,0.67381}, - {0.25853,0.28492,0.69300}, - {0.26074,0.29568,0.71162}, - {0.26280,0.30639,0.72968}, - {0.26473,0.31706,0.74718}, - {0.26652,0.32768,0.76412}, - {0.26816,0.33825,0.78050}, - {0.26967,0.34878,0.79631}, - {0.27103,0.35926,0.81156}, - {0.27226,0.36970,0.82624}, - {0.27334,0.38008,0.84037}, - {0.27429,0.39043,0.85393}, - {0.27509,0.40072,0.86692}, - {0.27576,0.41097,0.87936}, - {0.27628,0.42118,0.89123}, - {0.27667,0.43134,0.90254}, - {0.27691,0.44145,0.91328}, - {0.27701,0.45152,0.92347}, - {0.27698,0.46153,0.93309}, - {0.27680,0.47151,0.94214}, - {0.27648,0.48144,0.95064}, - {0.27603,0.49132,0.95857}, - {0.27543,0.50115,0.96594}, - {0.27469,0.51094,0.97275}, - {0.27381,0.52069,0.97899}, - {0.27273,0.53040,0.98461}, - {0.27106,0.54015,0.98930}, - {0.26878,0.54995,0.99303}, - {0.26592,0.55979,0.99583}, - {0.26252,0.56967,0.99773}, - {0.25862,0.57958,0.99876}, - {0.25425,0.58950,0.99896}, - {0.24946,0.59943,0.99835}, - {0.24427,0.60937,0.99697}, - {0.23874,0.61931,0.99485}, - {0.23288,0.62923,0.99202}, - {0.22676,0.63913,0.98851}, - {0.22039,0.64901,0.98436}, - {0.21382,0.65886,0.97959}, - {0.20708,0.66866,0.97423}, - {0.20021,0.67842,0.96833}, - {0.19326,0.68812,0.96190}, - {0.18625,0.69775,0.95498}, - {0.17923,0.70732,0.94761}, - {0.17223,0.71680,0.93981}, - {0.16529,0.72620,0.93161}, - {0.15844,0.73551,0.92305}, - {0.15173,0.74472,0.91416}, - {0.14519,0.75381,0.90496}, - {0.13886,0.76279,0.89550}, - {0.13278,0.77165,0.88580}, - {0.12698,0.78037,0.87590}, - {0.12151,0.78896,0.86581}, - {0.11639,0.79740,0.85559}, - {0.11167,0.80569,0.84525}, - {0.10738,0.81381,0.83484}, - {0.10357,0.82177,0.82437}, - {0.10026,0.82955,0.81389}, - {0.09750,0.83714,0.80342}, - {0.09532,0.84455,0.79299}, - {0.09377,0.85175,0.78264}, - {0.09287,0.85875,0.77240}, - {0.09267,0.86554,0.76230}, - {0.09320,0.87211,0.75237}, - {0.09451,0.87844,0.74265}, - {0.09662,0.88454,0.73316}, - {0.09958,0.89040,0.72393}, - {0.10342,0.89600,0.71500}, - {0.10815,0.90142,0.70599}, - {0.11374,0.90673,0.69651}, - {0.12014,0.91193,0.68660}, - {0.12733,0.91701,0.67627}, - {0.13526,0.92197,0.66556}, - {0.14391,0.92680,0.65448}, - {0.15323,0.93151,0.64308}, - {0.16319,0.93609,0.63137}, - {0.17377,0.94053,0.61938}, - {0.18491,0.94484,0.60713}, - {0.19659,0.94901,0.59466}, - {0.20877,0.95304,0.58199}, - {0.22142,0.95692,0.56914}, - {0.23449,0.96065,0.55614}, - {0.24797,0.96423,0.54303}, - {0.26180,0.96765,0.52981}, - {0.27597,0.97092,0.51653}, - {0.29042,0.97403,0.50321}, - {0.30513,0.97697,0.48987}, - {0.32006,0.97974,0.47654}, - {0.33517,0.98234,0.46325}, - {0.35043,0.98477,0.45002}, - {0.36581,0.98702,0.43688}, - {0.38127,0.98909,0.42386}, - {0.39678,0.99098,0.41098}, - {0.41229,0.99268,0.39826}, - {0.42778,0.99419,0.38575}, - {0.44321,0.99551,0.37345}, - {0.45854,0.99663,0.36140}, - {0.47375,0.99755,0.34963}, - {0.48879,0.99828,0.33816}, - {0.50362,0.99879,0.32701}, - {0.51822,0.99910,0.31622}, - {0.53255,0.99919,0.30581}, - {0.54658,0.99907,0.29581}, - {0.56026,0.99873,0.28623}, - {0.57357,0.99817,0.27712}, - {0.58646,0.99739,0.26849}, - {0.59891,0.99638,0.26038}, - {0.61088,0.99514,0.25280}, - {0.62233,0.99366,0.24579}, - {0.63323,0.99195,0.23937}, - {0.64362,0.98999,0.23356}, - {0.65394,0.98775,0.22835}, - {0.66428,0.98524,0.22370}, - {0.67462,0.98246,0.21960}, - {0.68494,0.97941,0.21602}, - {0.69525,0.97610,0.21294}, - {0.70553,0.97255,0.21032}, - {0.71577,0.96875,0.20815}, - {0.72596,0.96470,0.20640}, - {0.73610,0.96043,0.20504}, - {0.74617,0.95593,0.20406}, - {0.75617,0.95121,0.20343}, - {0.76608,0.94627,0.20311}, - {0.77591,0.94113,0.20310}, - {0.78563,0.93579,0.20336}, - {0.79524,0.93025,0.20386}, - {0.80473,0.92452,0.20459}, - {0.81410,0.91861,0.20552}, - {0.82333,0.91253,0.20663}, - {0.83241,0.90627,0.20788}, - {0.84133,0.89986,0.20926}, - {0.85010,0.89328,0.21074}, - {0.85868,0.88655,0.21230}, - {0.86709,0.87968,0.21391}, - {0.87530,0.87267,0.21555}, - {0.88331,0.86553,0.21719}, - {0.89112,0.85826,0.21880}, - {0.89870,0.85087,0.22038}, - {0.90605,0.84337,0.22188}, - {0.91317,0.83576,0.22328}, - {0.92004,0.82806,0.22456}, - {0.92666,0.82025,0.22570}, - {0.93301,0.81236,0.22667}, - {0.93909,0.80439,0.22744}, - {0.94489,0.79634,0.22800}, - {0.95039,0.78823,0.22831}, - {0.95560,0.78005,0.22836}, - {0.96049,0.77181,0.22811}, - {0.96507,0.76352,0.22754}, - {0.96931,0.75519,0.22663}, - {0.97323,0.74682,0.22536}, - {0.97679,0.73842,0.22369}, - {0.98000,0.73000,0.22161}, - {0.98289,0.72140,0.21918}, - {0.98549,0.71250,0.21650}, - {0.98781,0.70330,0.21358}, - {0.98986,0.69382,0.21043}, - {0.99163,0.68408,0.20706}, - {0.99314,0.67408,0.20348}, - {0.99438,0.66386,0.19971}, - {0.99535,0.65341,0.19577}, - {0.99607,0.64277,0.19165}, - {0.99654,0.63193,0.18738}, - {0.99675,0.62093,0.18297}, - {0.99672,0.60977,0.17842}, - {0.99644,0.59846,0.17376}, - {0.99593,0.58703,0.16899}, - {0.99517,0.57549,0.16412}, - {0.99419,0.56386,0.15918}, - {0.99297,0.55214,0.15417}, - {0.99153,0.54036,0.14910}, - {0.98987,0.52854,0.14398}, - {0.98799,0.51667,0.13883}, - {0.98590,0.50479,0.13367}, - {0.98360,0.49291,0.12849}, - {0.98108,0.48104,0.12332}, - {0.97837,0.46920,0.11817}, - {0.97545,0.45740,0.11305}, - {0.97234,0.44565,0.10797}, - {0.96904,0.43399,0.10294}, - {0.96555,0.42241,0.09798}, - {0.96187,0.41093,0.09310}, - {0.95801,0.39958,0.08831}, - {0.95398,0.38836,0.08362}, - {0.94977,0.37729,0.07905}, - {0.94538,0.36638,0.07461}, - {0.94084,0.35566,0.07031}, - {0.93612,0.34513,0.06616}, - {0.93125,0.33482,0.06218}, - {0.92623,0.32473,0.05837}, - {0.92105,0.31489,0.05475}, - {0.91572,0.30530,0.05134}, - {0.91024,0.29599,0.04814}, - {0.90463,0.28696,0.04516}, - {0.89888,0.27824,0.04243}, - {0.89298,0.26981,0.03993}, - {0.88691,0.26152,0.03753}, - {0.88066,0.25334,0.03521}, - {0.87422,0.24526,0.03297}, - {0.86760,0.23730,0.03082}, - {0.86079,0.22945,0.02875}, - {0.85380,0.22170,0.02677}, - {0.84662,0.21407,0.02487}, - {0.83926,0.20654,0.02305}, - {0.83172,0.19912,0.02131}, - {0.82399,0.19182,0.01966}, - {0.81608,0.18462,0.01809}, - {0.80799,0.17753,0.01660}, - {0.79971,0.17055,0.01520}, - {0.79125,0.16368,0.01387}, - {0.78260,0.15693,0.01264}, - {0.77377,0.15028,0.01148}, - {0.76476,0.14374,0.01041}, - {0.75556,0.13731,0.00942}, - {0.74617,0.13098,0.00851}, - {0.73661,0.12477,0.00769}, - {0.72686,0.11867,0.00695}, - {0.71692,0.11268,0.00629}, - {0.70680,0.10680,0.00571}, - {0.69650,0.10102,0.00522}, - {0.68602,0.09536,0.00481}, - {0.67535,0.08980,0.00449}, - {0.66449,0.08436,0.00424}, - {0.65345,0.07902,0.00408}, - {0.64223,0.07380,0.00401}, - {0.63082,0.06868,0.00401}, - {0.61923,0.06367,0.00410}, - {0.60746,0.05878,0.00427}, - {0.59550,0.05399,0.00453}, - {0.58336,0.04931,0.00486}, - {0.57103,0.04474,0.00529}, - {0.55852,0.04028,0.00579}, - {0.54583,0.03593,0.00638}, - {0.53295,0.03169,0.00705}, - {0.51989,0.02756,0.00780}, - {0.50664,0.02354,0.00863}, - {0.49321,0.01963,0.00955}, - {0.47960,0.01583,0.01055} -}; - - - - -uint64_t nanoTime(){ + {0.18995, 0.07176, 0.23217}, {0.19483, 0.08339, 0.26149}, {0.19956, 0.09498, 0.29024}, + {0.20415, 0.10652, 0.31844}, {0.20860, 0.11802, 0.34607}, {0.21291, 0.12947, 0.37314}, + {0.21708, 0.14087, 0.39964}, {0.22111, 0.15223, 0.42558}, {0.22500, 0.16354, 0.45096}, + {0.22875, 0.17481, 0.47578}, {0.23236, 0.18603, 0.50004}, {0.23582, 0.19720, 0.52373}, + {0.23915, 0.20833, 0.54686}, {0.24234, 0.21941, 0.56942}, {0.24539, 0.23044, 0.59142}, + {0.24830, 0.24143, 0.61286}, {0.25107, 0.25237, 0.63374}, {0.25369, 0.26327, 0.65406}, + {0.25618, 0.27412, 0.67381}, {0.25853, 0.28492, 0.69300}, {0.26074, 0.29568, 0.71162}, + {0.26280, 0.30639, 0.72968}, {0.26473, 0.31706, 0.74718}, {0.26652, 0.32768, 0.76412}, + {0.26816, 0.33825, 0.78050}, {0.26967, 0.34878, 0.79631}, {0.27103, 0.35926, 0.81156}, + {0.27226, 0.36970, 0.82624}, {0.27334, 0.38008, 0.84037}, {0.27429, 0.39043, 0.85393}, + {0.27509, 0.40072, 0.86692}, {0.27576, 0.41097, 0.87936}, {0.27628, 0.42118, 0.89123}, + {0.27667, 0.43134, 0.90254}, {0.27691, 0.44145, 0.91328}, {0.27701, 0.45152, 0.92347}, + {0.27698, 0.46153, 0.93309}, {0.27680, 0.47151, 0.94214}, {0.27648, 0.48144, 0.95064}, + {0.27603, 0.49132, 0.95857}, {0.27543, 0.50115, 0.96594}, {0.27469, 0.51094, 0.97275}, + {0.27381, 0.52069, 0.97899}, {0.27273, 0.53040, 0.98461}, {0.27106, 0.54015, 0.98930}, + {0.26878, 0.54995, 0.99303}, {0.26592, 0.55979, 0.99583}, {0.26252, 0.56967, 0.99773}, + {0.25862, 0.57958, 0.99876}, {0.25425, 0.58950, 0.99896}, {0.24946, 0.59943, 0.99835}, + {0.24427, 0.60937, 0.99697}, {0.23874, 0.61931, 0.99485}, {0.23288, 0.62923, 0.99202}, + {0.22676, 0.63913, 0.98851}, {0.22039, 0.64901, 0.98436}, {0.21382, 0.65886, 0.97959}, + {0.20708, 0.66866, 0.97423}, {0.20021, 0.67842, 0.96833}, {0.19326, 0.68812, 0.96190}, + {0.18625, 0.69775, 0.95498}, {0.17923, 0.70732, 0.94761}, {0.17223, 0.71680, 0.93981}, + {0.16529, 0.72620, 0.93161}, {0.15844, 0.73551, 0.92305}, {0.15173, 0.74472, 0.91416}, + {0.14519, 0.75381, 0.90496}, {0.13886, 0.76279, 0.89550}, {0.13278, 0.77165, 0.88580}, + {0.12698, 0.78037, 0.87590}, {0.12151, 0.78896, 0.86581}, {0.11639, 0.79740, 0.85559}, + {0.11167, 0.80569, 0.84525}, {0.10738, 0.81381, 0.83484}, {0.10357, 0.82177, 0.82437}, + {0.10026, 0.82955, 0.81389}, {0.09750, 0.83714, 0.80342}, {0.09532, 0.84455, 0.79299}, + {0.09377, 0.85175, 0.78264}, {0.09287, 0.85875, 0.77240}, {0.09267, 0.86554, 0.76230}, + {0.09320, 0.87211, 0.75237}, {0.09451, 0.87844, 0.74265}, {0.09662, 0.88454, 0.73316}, + {0.09958, 0.89040, 0.72393}, {0.10342, 0.89600, 0.71500}, {0.10815, 0.90142, 0.70599}, + {0.11374, 0.90673, 0.69651}, {0.12014, 0.91193, 0.68660}, {0.12733, 0.91701, 0.67627}, + {0.13526, 0.92197, 0.66556}, {0.14391, 0.92680, 0.65448}, {0.15323, 0.93151, 0.64308}, + {0.16319, 0.93609, 0.63137}, {0.17377, 0.94053, 0.61938}, {0.18491, 0.94484, 0.60713}, + {0.19659, 0.94901, 0.59466}, {0.20877, 0.95304, 0.58199}, {0.22142, 0.95692, 0.56914}, + {0.23449, 0.96065, 0.55614}, {0.24797, 0.96423, 0.54303}, {0.26180, 0.96765, 0.52981}, + {0.27597, 0.97092, 0.51653}, {0.29042, 0.97403, 0.50321}, {0.30513, 0.97697, 0.48987}, + {0.32006, 0.97974, 0.47654}, {0.33517, 0.98234, 0.46325}, {0.35043, 0.98477, 0.45002}, + {0.36581, 0.98702, 0.43688}, {0.38127, 0.98909, 0.42386}, {0.39678, 0.99098, 0.41098}, + {0.41229, 0.99268, 0.39826}, {0.42778, 0.99419, 0.38575}, {0.44321, 0.99551, 0.37345}, + {0.45854, 0.99663, 0.36140}, {0.47375, 0.99755, 0.34963}, {0.48879, 0.99828, 0.33816}, + {0.50362, 0.99879, 0.32701}, {0.51822, 0.99910, 0.31622}, {0.53255, 0.99919, 0.30581}, + {0.54658, 0.99907, 0.29581}, {0.56026, 0.99873, 0.28623}, {0.57357, 0.99817, 0.27712}, + {0.58646, 0.99739, 0.26849}, {0.59891, 0.99638, 0.26038}, {0.61088, 0.99514, 0.25280}, + {0.62233, 0.99366, 0.24579}, {0.63323, 0.99195, 0.23937}, {0.64362, 0.98999, 0.23356}, + {0.65394, 0.98775, 0.22835}, {0.66428, 0.98524, 0.22370}, {0.67462, 0.98246, 0.21960}, + {0.68494, 0.97941, 0.21602}, {0.69525, 0.97610, 0.21294}, {0.70553, 0.97255, 0.21032}, + {0.71577, 0.96875, 0.20815}, {0.72596, 0.96470, 0.20640}, {0.73610, 0.96043, 0.20504}, + {0.74617, 0.95593, 0.20406}, {0.75617, 0.95121, 0.20343}, {0.76608, 0.94627, 0.20311}, + {0.77591, 0.94113, 0.20310}, {0.78563, 0.93579, 0.20336}, {0.79524, 0.93025, 0.20386}, + {0.80473, 0.92452, 0.20459}, {0.81410, 0.91861, 0.20552}, {0.82333, 0.91253, 0.20663}, + {0.83241, 0.90627, 0.20788}, {0.84133, 0.89986, 0.20926}, {0.85010, 0.89328, 0.21074}, + {0.85868, 0.88655, 0.21230}, {0.86709, 0.87968, 0.21391}, {0.87530, 0.87267, 0.21555}, + {0.88331, 0.86553, 0.21719}, {0.89112, 0.85826, 0.21880}, {0.89870, 0.85087, 0.22038}, + {0.90605, 0.84337, 0.22188}, {0.91317, 0.83576, 0.22328}, {0.92004, 0.82806, 0.22456}, + {0.92666, 0.82025, 0.22570}, {0.93301, 0.81236, 0.22667}, {0.93909, 0.80439, 0.22744}, + {0.94489, 0.79634, 0.22800}, {0.95039, 0.78823, 0.22831}, {0.95560, 0.78005, 0.22836}, + {0.96049, 0.77181, 0.22811}, {0.96507, 0.76352, 0.22754}, {0.96931, 0.75519, 0.22663}, + {0.97323, 0.74682, 0.22536}, {0.97679, 0.73842, 0.22369}, {0.98000, 0.73000, 0.22161}, + {0.98289, 0.72140, 0.21918}, {0.98549, 0.71250, 0.21650}, {0.98781, 0.70330, 0.21358}, + {0.98986, 0.69382, 0.21043}, {0.99163, 0.68408, 0.20706}, {0.99314, 0.67408, 0.20348}, + {0.99438, 0.66386, 0.19971}, {0.99535, 0.65341, 0.19577}, {0.99607, 0.64277, 0.19165}, + {0.99654, 0.63193, 0.18738}, {0.99675, 0.62093, 0.18297}, {0.99672, 0.60977, 0.17842}, + {0.99644, 0.59846, 0.17376}, {0.99593, 0.58703, 0.16899}, {0.99517, 0.57549, 0.16412}, + {0.99419, 0.56386, 0.15918}, {0.99297, 0.55214, 0.15417}, {0.99153, 0.54036, 0.14910}, + {0.98987, 0.52854, 0.14398}, {0.98799, 0.51667, 0.13883}, {0.98590, 0.50479, 0.13367}, + {0.98360, 0.49291, 0.12849}, {0.98108, 0.48104, 0.12332}, {0.97837, 0.46920, 0.11817}, + {0.97545, 0.45740, 0.11305}, {0.97234, 0.44565, 0.10797}, {0.96904, 0.43399, 0.10294}, + {0.96555, 0.42241, 0.09798}, {0.96187, 0.41093, 0.09310}, {0.95801, 0.39958, 0.08831}, + {0.95398, 0.38836, 0.08362}, {0.94977, 0.37729, 0.07905}, {0.94538, 0.36638, 0.07461}, + {0.94084, 0.35566, 0.07031}, {0.93612, 0.34513, 0.06616}, {0.93125, 0.33482, 0.06218}, + {0.92623, 0.32473, 0.05837}, {0.92105, 0.31489, 0.05475}, {0.91572, 0.30530, 0.05134}, + {0.91024, 0.29599, 0.04814}, {0.90463, 0.28696, 0.04516}, {0.89888, 0.27824, 0.04243}, + {0.89298, 0.26981, 0.03993}, {0.88691, 0.26152, 0.03753}, {0.88066, 0.25334, 0.03521}, + {0.87422, 0.24526, 0.03297}, {0.86760, 0.23730, 0.03082}, {0.86079, 0.22945, 0.02875}, + {0.85380, 0.22170, 0.02677}, {0.84662, 0.21407, 0.02487}, {0.83926, 0.20654, 0.02305}, + {0.83172, 0.19912, 0.02131}, {0.82399, 0.19182, 0.01966}, {0.81608, 0.18462, 0.01809}, + {0.80799, 0.17753, 0.01660}, {0.79971, 0.17055, 0.01520}, {0.79125, 0.16368, 0.01387}, + {0.78260, 0.15693, 0.01264}, {0.77377, 0.15028, 0.01148}, {0.76476, 0.14374, 0.01041}, + {0.75556, 0.13731, 0.00942}, {0.74617, 0.13098, 0.00851}, {0.73661, 0.12477, 0.00769}, + {0.72686, 0.11867, 0.00695}, {0.71692, 0.11268, 0.00629}, {0.70680, 0.10680, 0.00571}, + {0.69650, 0.10102, 0.00522}, {0.68602, 0.09536, 0.00481}, {0.67535, 0.08980, 0.00449}, + {0.66449, 0.08436, 0.00424}, {0.65345, 0.07902, 0.00408}, {0.64223, 0.07380, 0.00401}, + {0.63082, 0.06868, 0.00401}, {0.61923, 0.06367, 0.00410}, {0.60746, 0.05878, 0.00427}, + {0.59550, 0.05399, 0.00453}, {0.58336, 0.04931, 0.00486}, {0.57103, 0.04474, 0.00529}, + {0.55852, 0.04028, 0.00579}, {0.54583, 0.03593, 0.00638}, {0.53295, 0.03169, 0.00705}, + {0.51989, 0.02756, 0.00780}, {0.50664, 0.02354, 0.00863}, {0.49321, 0.01963, 0.00955}, + {0.47960, 0.01583, 0.01055}}; + +uint64_t nanoTime() { using namespace std; using namespace chrono; return duration_cast(high_resolution_clock::now().time_since_epoch()).count(); } -//CONFIG -KOKKOS_INLINE_FUNCTION bool isNaN(float x){ - #ifdef __CUDA_ARCH__ +// CONFIG +KOKKOS_INLINE_FUNCTION bool isNaN(float x) { +#ifdef __CUDA_ARCH__ return isnan(x); - #else +#else return std::isnan(x); - #endif +#endif } -KOKKOS_INLINE_FUNCTION bool isINF(float x){ - #ifdef __CUDA_ARCH__ +KOKKOS_INLINE_FUNCTION bool isINF(float x) { +#ifdef __CUDA_ARCH__ return isinf(x); - #else +#else return std::isinf(x); - #endif +#endif } -KOKKOS_INLINE_FUNCTION bool isNaN(double x){ - #ifdef __CUDA_ARCH__ +KOKKOS_INLINE_FUNCTION bool isNaN(double x) { +#ifdef __CUDA_ARCH__ return isnan(x); - #else +#else return std::isnan(x); - #endif +#endif } -KOKKOS_INLINE_FUNCTION bool isINF(double x){ - #ifdef __CUDA_ARCH__ +KOKKOS_INLINE_FUNCTION bool isINF(double x) { +#ifdef __CUDA_ARCH__ return isinf(x); - #else +#else return std::isinf(x); - #endif +#endif } -template -KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector from, ippl::Vector to, Ts... args){ - if constexpr(sizeof...(Ts) == Dim){ +template +KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector from, + ippl::Vector to, Ts... args) { + if constexpr (sizeof...(Ts) == Dim) { c(args...); - } - else{ - for(uint32_t i = from[sizeof...(Ts)];i < to[sizeof...(Ts)];i++){ + } else { + for (uint32_t i = from[sizeof...(Ts)]; i < to[sizeof...(Ts)]; i++) { serial_for(c, from, to, args..., i); } } } - - - - struct config { using scalar = double; - + // GRID PARAMETERS - ippl::Vector resolution; // Grid resolution in 3D - ippl::Vector extents; // Physical extents of the grid in each dimension - scalar total_time; // Total simulation time - scalar timestep_ratio; // Ratio of timestep to some reference value + ippl::Vector resolution; // Grid resolution in 3D + ippl::Vector extents; // Physical extents of the grid in each dimension + scalar total_time; // Total simulation time + scalar timestep_ratio; // Ratio of timestep to some reference value - scalar length_scale_in_jobfile; // Length scale defined in the jobfile - scalar temporal_scale_in_jobfile; // Temporal scale defined in the jobfile + scalar length_scale_in_jobfile; // Length scale defined in the jobfile + scalar temporal_scale_in_jobfile; // Temporal scale defined in the jobfile // PARTICLE PARAMETERS - scalar charge; // Particle charge in unit_charge - scalar mass; // Particle mass in unit_mass - uint64_t num_particles; // Number of particles in the simulation - bool space_charge; // Flag for considering space charge effects + scalar charge; // Particle charge in unit_charge + scalar mass; // Particle mass in unit_mass + uint64_t num_particles; // Number of particles in the simulation + bool space_charge; // Flag for considering space charge effects // BUNCH PARAMETERS - ippl::Vector mean_position; // Mean initial position of the particle bunch - ippl::Vector sigma_position; // Standard deviation of the initial position distribution - ippl::Vector position_truncations; // Truncations of the position distribution - ippl::Vector sigma_momentum; // Standard deviation of the initial momentum distribution - scalar bunch_gamma; // Relativistic gamma factor of the bunch + ippl::Vector mean_position; // Mean initial position of the particle bunch + ippl::Vector + sigma_position; // Standard deviation of the initial position distribution + ippl::Vector position_truncations; // Truncations of the position distribution + ippl::Vector + sigma_momentum; // Standard deviation of the initial momentum distribution + scalar bunch_gamma; // Relativistic gamma factor of the bunch // UNDULATOR PARAMETERS - scalar undulator_K; // Undulator parameter K - scalar undulator_period; // Period of the undulator - scalar undulator_length; // Length of the undulator + scalar undulator_K; // Undulator parameter K + scalar undulator_period; // Period of the undulator + scalar undulator_length; // Length of the undulator - uint32_t output_rhythm; // Frequency of output in timesteps - std::string output_path; // Path to output files - std::unordered_map experiment_options; // Additional experimental options + uint32_t output_rhythm; // Frequency of output in timesteps + std::string output_path; // Path to output files + std::unordered_map experiment_options; // Additional experimental options }; -template -ippl::Vector getVector(const nlohmann::json& j){ - if(j.is_array()){ +template +ippl::Vector getVector(const nlohmann::json& j) { + if (j.is_array()) { assert(j.size() == Dim); ippl::Vector ret; - for(unsigned i = 0;i < Dim;i++) + for (unsigned i = 0; i < Dim; i++) ret[i] = (scalar)j[i]; return ret; - } - else{ + } else { std::cerr << "Warning: Obtaining Vector from scalar json\n"; ippl::Vector ret; ret.fill((scalar)j); return ret; } } -template +template struct DefaultedStringLiteral { - constexpr DefaultedStringLiteral(const char (&str)[N], const T val) : value(val) { + constexpr DefaultedStringLiteral(const char (&str)[N], const T val) + : value(val) { std::copy_n(str, N, key); } - + T value; char key[N]; }; -template +template struct StringLiteral { - constexpr StringLiteral(const char (&str)[N]) { - std::copy_n(str, N, value); - } - + constexpr StringLiteral(const char (&str)[N]) { std::copy_n(str, N, value); } + char value[N]; - constexpr DefaultedStringLiteral operator>>(int t)const noexcept{ + constexpr DefaultedStringLiteral operator>>(int t) const noexcept { return DefaultedStringLiteral(value, t); } - constexpr size_t size()const noexcept{return N - 1;} + constexpr size_t size() const noexcept { return N - 1; } }; -template -constexpr size_t chash(){ +template +constexpr size_t chash() { size_t hash = 5381; int c; - for(size_t i = 0;i < lit.size();i++){ - c = lit.value[i]; - hash = ((hash << 5) + hash) + c; // hash * 33 + c + for (size_t i = 0; i < lit.size(); i++) { + c = lit.value[i]; + hash = ((hash << 5) + hash) + c; // hash * 33 + c } return hash; @@ -416,18 +242,18 @@ size_t chash(const char* val) { int c; while ((c = *val++)) { - hash = ((hash << 5) + hash) + c; // hash * 33 + c + hash = ((hash << 5) + hash) + c; // hash * 33 + c } return hash; } size_t chash(const std::string& _val) { - size_t hash = 5381; + size_t hash = 5381; const char* val = _val.c_str(); int c; while ((c = *val++)) { - hash = ((hash << 5) + hash) + c; // hash * 33 + c + hash = ((hash << 5) + hash) + c; // hash * 33 + c } return hash; @@ -443,39 +269,41 @@ std::string lowercase_singular(std::string str) { return str; } -double get_time_multiplier(const nlohmann::json& j){ +double get_time_multiplier(const nlohmann::json& j) { std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]); - double time_factor = 1.0; + double time_factor = 1.0; switch (chash(length_scale_string)) { case chash<"planck-time">(): case chash<"plancktime">(): case chash<"pt">(): case chash<"natural">(): time_factor = unit_time_in_seconds; - break; + break; case chash<"picosecond">(): time_factor = 1e-12; - break; + break; case chash<"nanosecond">(): time_factor = 1e-9; - break; + break; case chash<"microsecond">(): time_factor = 1e-6; - break; + break; case chash<"millisecond">(): time_factor = 1e-3; - break; + break; case chash<"second">(): time_factor = 1.0; - break; + break; default: - std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] << "\n"; - break; + std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] + << "\n"; + break; } return time_factor; } -double get_length_multiplier(const nlohmann::json& options){ - std::string length_scale_string = lowercase_singular((std::string)options["mesh"]["length-scale"]); +double get_length_multiplier(const nlohmann::json& options) { + std::string length_scale_string = + lowercase_singular((std::string)options["mesh"]["length-scale"]); double length_factor = 1.0; switch (chash(length_scale_string)) { case chash<"planck-length">(): @@ -483,29 +311,30 @@ double get_length_multiplier(const nlohmann::json& options){ case chash<"pl">(): case chash<"natural">(): length_factor = unit_length_in_meters; - break; + break; case chash<"picometer">(): length_factor = 1e-12; - break; + break; case chash<"nanometer">(): length_factor = 1e-9; - break; + break; case chash<"micrometer">(): length_factor = 1e-6; - break; + break; case chash<"millimeter">(): length_factor = 1e-3; - break; + break; case chash<"meter">(): length_factor = 1.0; - break; + break; default: - std::cerr << "Unrecognized length scale: " << (std::string)options["mesh"]["length-scale"] << "\n"; - break; + std::cerr << "Unrecognized length scale: " + << (std::string)options["mesh"]["length-scale"] << "\n"; + break; } return length_factor; } -config read_config(const char *filepath){ +config read_config(const char* filepath) { std::ifstream cfile(filepath); nlohmann::json j; cfile >> j; @@ -518,493 +347,492 @@ config read_config(const char *filepath){ ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters; ret.resolution = getVector(j["mesh"]["resolution"]); - //std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n"; - //std::cerr << "Tmult: " << tmult << "\n"; - if(j.contains("timestep-ratio")){ + // std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n"; + // std::cerr << "Tmult: " << tmult << "\n"; + if (j.contains("timestep-ratio")) { ret.timestep_ratio = (config::scalar)j["timestep-ratio"]; } - else{ + else { ret.timestep_ratio = 1; } - ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds; + ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds; ret.space_charge = (bool)(j["mesh"]["space-charge"]); - ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]); - if(ret.bunch_gamma < config::scalar(1)){ + ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]); + if (ret.bunch_gamma < config::scalar(1)) { std::cerr << "Gamma must be >= 1\n"; exit(1); } assert(j.contains("undulator")); assert(j["undulator"].contains("static-undulator")); - ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"]; - ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) / unit_length_in_meters; - ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) / unit_length_in_meters; + ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"]; + ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) + / unit_length_in_meters; + ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) + / unit_length_in_meters; assert(!std::isnan(ret.undulator_length)); assert(!std::isnan(ret.undulator_period)); assert(!std::isnan(ret.extents[0])); assert(!std::isnan(ret.extents[1])); assert(!std::isnan(ret.extents[2])); assert(!std::isnan(ret.total_time)); - ret.length_scale_in_jobfile = get_length_multiplier(j); + ret.length_scale_in_jobfile = get_length_multiplier(j); ret.temporal_scale_in_jobfile = get_time_multiplier(j); - ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges; - ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses; + ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges; + ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses; ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"]; - ret.mean_position = getVector(j["bunch"]["position"]) * lmult / unit_length_in_meters; - ret.sigma_position = getVector(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters; - ret.position_truncations = getVector(j["bunch"]["distribution-truncations"]) * lmult / unit_length_in_meters; + ret.mean_position = + getVector(j["bunch"]["position"]) * lmult / unit_length_in_meters; + ret.sigma_position = + getVector(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters; + ret.position_truncations = getVector(j["bunch"]["distribution-truncations"]) + * lmult / unit_length_in_meters; ret.sigma_momentum = getVector(j["bunch"]["sigma-momentum"]); - ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0; - ret.output_path = "../data/"; - if(j["output"].contains("path")){ + ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0; + ret.output_path = "../data/"; + if (j["output"].contains("path")) { ret.output_path = j["output"]["path"]; - if(!ret.output_path.ends_with('/')){ + if (!ret.output_path.ends_with('/')) { ret.output_path.push_back('/'); } } - if(j.contains("experimentation")){ + if (j.contains("experimentation")) { nlohmann::json je = j["experimentation"]; - for(auto it = je.begin(); it!= je.end();it++){ + for (auto it = je.begin(); it != je.end(); it++) { ret.experiment_options[it.key()] = double(it.value()); } } return ret; } -template +template using FieldVector = ippl::Vector; -template +template struct BunchInitialize { - - /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If it is - * manual the charge at points of the position vector will be produced. */ + /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If + * it is manual the charge at points of the position vector will be produced. + */ // std::string bunchType_; - /* Type of the distributions (transverse or longitudinal) in the bunch. */ + /* Type of the distributions (transverse or longitudinal) in the bunch. + */ std::string distribution_; - /* Type of the generator for creating the bunch distribution. */ + /* Type of the generator for creating the bunch distribution. + */ std::string generator_; - /* Total number of macroparticles in the bunch. */ + /* Total number of macroparticles in the bunch. */ unsigned int numberOfParticles_; - /* Total charge of the bunch in pC. */ + /* Total charge of the bunch in pC. */ scalar cloudCharge_; - /* Initial energy of the bunch in MeV. */ + /* Initial energy of the bunch in MeV. */ scalar initialGamma_; - /* Initial normalized speed of the bunch. */ + /* Initial normalized speed of the bunch. */ scalar initialBeta_; - /* Initial movement direction of the bunch, which is a unit vector. */ + /* Initial movement direction of the bunch, which is a unit vector. */ FieldVector initialDirection_; - /* Position of the center of the bunch in the unit of length scale. */ + /* Position of the center of the bunch in the unit of length scale. */ // std::vector > position_; FieldVector position_; - /* Number of macroparticles in each direction for 3Dcrystal type. */ + /* Number of macroparticles in each direction for 3Dcrystal type. */ FieldVector numbers_; - /* Lattice constant in x, y, and z directions for 3D crystal type. */ + /* Lattice constant in x, y, and z directions for 3D crystal type. */ FieldVector latticeConstants_; /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal - * type, it will be the spread in position for each micro-bunch of the crystal. */ + * type, it will be the spread in position for each micro-bunch of the crystal. + */ FieldVector sigmaPosition_; - /* Spread in energy in each direction. */ + /* Spread in energy in each direction. */ FieldVector sigmaGammaBeta_; - /* Store the truncation transverse distance for the electron generation. */ + /* Store the truncation transverse distance for the electron generation. + */ scalar tranTrun_; - /* Store the truncation longitudinal distance for the electron generation. */ + /* Store the truncation longitudinal distance for the electron generation. + */ scalar longTrun_; - /* Name of the file for reading the electrons distribution from. */ + /* Name of the file for reading the electrons distribution from. + */ std::string fileName_; - /* The radiation wavelength corresponding to the bunch length outside the undulator */ + /* The radiation wavelength corresponding to the bunch length outside the undulator + */ scalar lambda_; - /* Bunching factor for the initialization of the bunch. */ + /* Bunching factor for the initialization of the bunch. + */ scalar bF_; - /* Phase of the bunching factor for the initialization of the bunch. */ + /* Phase of the bunching factor for the initialization of the bunch. + */ scalar bFP_; - /* Boolean flag determining the activation of shot-noise. */ + /* Boolean flag determining the activation of shot-noise. + */ bool shotNoise_; - /* Initial beta vector of the bunch, which is obtained as the product of beta and direction. */ + /* Initial beta vector of the bunch, which is obtained as the product of beta and direction. + */ FieldVector betaVector_; - /* Initialize the parameters for the bunch initialization to some first values. */ + /* Initialize the parameters for the bunch initialization to some first values. */ // BunchInitialize (); }; -//LORENTZ FRAME AND UNDULATOR -template -struct UniaxialLorentzframe{ - constexpr static T c = 1.0; - using scalar = T; - using Vector3 = ippl::Vector; - scalar beta_m; - scalar gammaBeta_m; - scalar gamma_m; - KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma){ - - UniaxialLorentzframe ret; - ret.gamma_m = gamma; - scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma)); - scalar gammabeta = gamma * beta; - ret.beta_m = beta; - ret.gammaBeta_m = gammabeta; - return ret; - } - KOKKOS_INLINE_FUNCTION UniaxialLorentzframe negative()const noexcept{ - UniaxialLorentzframe ret; - ret.beta_m = -beta_m; - ret.gammaBeta_m = -gammaBeta_m; - ret.gamma_m = gamma_m; - return ret; - } +// LORENTZ FRAME AND UNDULATOR - KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default; - KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta){ - using Kokkos::sqrt; - gammaBeta_m = gammaBeta; - beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta); - gamma_m = sqrt(1 + gammaBeta * gammaBeta); - } - KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time)const noexcept{ - arg[axis] = gamma_m * (arg[axis] + beta_m * time); - } - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB(const Kokkos::pair, ippl::Vector>& unprimedEB)const noexcept{ - - Kokkos::pair, ippl::Vector> ret; - ippl::Vector betavec{0, 0, beta_m}; - ret.first = ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); - ret.second = ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first )) * gamma_m;// - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); - ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; - ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; - return ret; - } - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> inverse_transform_EB(const Kokkos::pair, ippl::Vector>& primedEB)const noexcept{ - return negative().transform_EB(primedEB); - } -}; -template -BunchInitialize generate_mithra_config(const config& cfg, const UniaxialLorentzframe& /*frame_boost unused*/) { - using vec3 = ippl::Vector; +template +BunchInitialize generate_mithra_config( + const config& cfg, const ippl::UniaxialLorentzframe& /*frame_boost unused*/) { + using vec3 = ippl::Vector; scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K); BunchInitialize init; - init.generator_ = "random"; - init.distribution_ = "uniform"; + init.generator_ = "random"; + init.distribution_ = "uniform"; init.initialDirection_ = vec3{0, 0, 1}; - init.initialGamma_ = cfg.bunch_gamma; - init.initialBeta_ = cfg.bunch_gamma == scalar(1) ? 0 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma); - init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast(); - init.sigmaPosition_ = cfg.sigma_position.template cast(); + init.initialGamma_ = cfg.bunch_gamma; + init.initialBeta_ = cfg.bunch_gamma == scalar(1) + ? 0 + : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma); + init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast(); + init.sigmaPosition_ = cfg.sigma_position.template cast(); // TODO: Initial bunching factor huh - init.bF_ = 0.01; - init.bFP_ = 0; - init.shotNoise_ = false; - init.cloudCharge_ = cfg.charge; - init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma); - init.longTrun_ = cfg.position_truncations[2]; - init.tranTrun_ = cfg.position_truncations[0]; - init.position_ = cfg.mean_position.cast(); - init.betaVector_ = ippl::Vector{0,0,init.initialBeta_}; + init.bF_ = 0.01; + init.bFP_ = 0; + init.shotNoise_ = false; + init.cloudCharge_ = cfg.charge; + init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma); + init.longTrun_ = cfg.position_truncations[2]; + init.tranTrun_ = cfg.position_truncations[0]; + init.position_ = cfg.mean_position.cast(); + init.betaVector_ = ippl::Vector{0, 0, init.initialBeta_}; init.numberOfParticles_ = cfg.num_particles; - init.numbers_ = 0; // UNUSED - init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED + init.numbers_ = 0; // UNUSED + init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED return init; } template struct Charge { - Double q; /* Charge of the point in the unit of electron charge. */ FieldVector rnp, rnm; /* Position vector of the charge. */ FieldVector gb; /* Normalized velocity vector of the charge. */ - /* Double flag determining if the particle is passing the entrance point of the undulator. This flag - * can be used for better boosting the bunch to the moving frame. We need to consider it to be double, - * because this flag needs to be communicated during bunch update. */ + /* Double flag determining if the particle is passing the entrance point of the undulator. This + * flag can be used for better boosting the bunch to the moving frame. We need to consider it to + * be double, because this flag needs to be communicated during bunch update. + */ Double e; // Charge(); }; template using ChargeVector = std::list>; -template -void initializeBunchEllipsoid (BunchInitialize bunchInit, ChargeVector & chargeVector, int rank, int size, int ia){ - /* Correct the number of particles if it is not a multiple of four. */ - if ( bunchInit.numberOfParticles_ % 4 != 0 ) - { +template +void initializeBunchEllipsoid(BunchInitialize bunchInit, ChargeVector& chargeVector, + int rank, int size, int ia) { + /* Correct the number of particles if it is not a multiple of four. + */ + if (bunchInit.numberOfParticles_ % 4 != 0) { unsigned int n = bunchInit.numberOfParticles_ % 4; bunchInit.numberOfParticles_ += 4 - n; - //printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of particles in the bunch is not a multiple of four. ") + - // std::string("It is corrected to ") + std::to_string(bunchInit.numberOfParticles_) ); + // printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of + // particles in the bunch is not a multiple of four. ") + + // std::string("It is corrected to ") + std::to_string(bunchInit.numberOfParticles_) ); } - /* Save the initially given number of particles. */ - unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size(); + /* Save the initially given number of particles. */ + unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size(); - /* Declare the required parameters for the initialization of charge vectors. */ - Charge charge; charge.q = bunchInit.cloudCharge_ / Np; + /* Declare the required parameters for the initialization of charge vectors. */ + Charge charge; + charge.q = bunchInit.cloudCharge_ / Np; FieldVector gb = bunchInit.initialGamma_ * bunchInit.betaVector_; - FieldVector r (0.0); - FieldVector t (0.0); - Double t0;//, g; - Double zmin = 1e100; - Double Ne, bF, bFi; - unsigned int bmi; - std::vector randomNumbers; - - /* The initialization in group of four particles should only be done if there exists an undulator in - * the interaction. */ - unsigned int ng = ( bunchInit.lambda_ == 0.0 ) ? 1 : 4; - - /* Check the bunching factor. */ - if ( bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0 ) - { - //printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be larger than one or a negative value !!!") ); - //exit(1); - } - - /* If the generator is random we should make sure that different processors do not produce the same - * random numbers. */ - if ( bunchInit.generator_ == "random" ) - { - /* Initialize the random number generator. */ - srand ( time(NULL) ); - /* Np / ng * 20 is the maximum number of particles. */ - randomNumbers.resize( Np / ng * 20, 0.0); - for ( unsigned int ri = 0; ri < Np / ng * 20; ri++) - randomNumbers[ri] = (float)std::min(1 - 1e-7, std::max(1e-7, ((double) rand() ) / RAND_MAX)); - } - - /* Declare the generator function depending on the input. */ - auto generate = [&] (unsigned int n, unsigned int m) { - //if ( bunchInit.generator_ == "random" ) - return ( randomNumbers.at( n * 2 * Np/ng + m ) ); - //else - // return ( randomNumbers[ n * 2 * Np/ng + m ] ); - //TODO: Return halton properly - //return ( halton(n,m) ); - }; + FieldVector r(0.0); + FieldVector t(0.0); + Double t0; //, g; + Double zmin = 1e100; + Double Ne, bF, bFi; + unsigned int bmi; + std::vector randomNumbers; + + /* The initialization in group of four particles should only be done if there exists an + * undulator in the interaction. + */ + unsigned int ng = (bunchInit.lambda_ == 0.0) ? 1 : 4; - /* Declare the function for injecting the shot noise. */ - auto insertCharge = [&] (Charge q) { + /* Check the bunching factor. */ + if (bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0) { + // printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be + // larger than one or a negative value !!!") ); exit(1); + } - for ( unsigned int ii = 0; ii < ng; ii++ ) - { - /* The random modulation is introduced depending on the shot-noise being activated. */ - if ( bunchInit.shotNoise_ ) - { - /* Obtain the number of beamlet. */ - bmi = int( ( charge.rnp[2] - zmin ) / bunchInit.lambda_ ); + /* If the generator is random we should make sure that different processors do not produce the + * same random numbers. + */ + if (bunchInit.generator_ == "random") { + /* Initialize the random number generator. + */ + srand(time(NULL)); + /* Np / ng * 20 is the maximum number of particles. + */ + randomNumbers.resize(Np / ng * 20, 0.0); + for (unsigned int ri = 0; ri < Np / ng * 20; ri++) + randomNumbers[ri] = + (float)std::min(1 - 1e-7, std::max(1e-7, ((double)rand()) / RAND_MAX)); + } - /* Obtain the phase and amplitude of the modulation. */ - bFi = bF * sqrt( - 2.0 * log( generate( 8 , bmi ) ) ); + /* Declare the generator function depending on the input. + */ + auto generate = [&](unsigned int n, unsigned int m) { + // if ( bunchInit.generator_ == "random" ) + return (randomNumbers.at(n * 2 * Np / ng + m)); + // else + // return ( randomNumbers[ n * 2 * Np/ng + m ] ); + // TODO: Return halton properly + // return ( halton(n,m) ); + }; - q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + /* Declare the function for injecting the shot noise. + */ + auto insertCharge = [&](Charge q) { + for (unsigned int ii = 0; ii < ng; ii++) { + /* The random modulation is introduced depending on the shot-noise being activated. + */ + if (bunchInit.shotNoise_) { + /* Obtain the number of beamlet. + */ + bmi = int((charge.rnp[2] - zmin) / bunchInit.lambda_); - q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + 2.0 * M_PI * generate( 9 , bmi ) ); - } - else if ( bunchInit.lambda_ != 0.0) - { - q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + /* Obtain the phase and amplitude of the modulation. + */ + bFi = bF * sqrt(-2.0 * log(generate(8, bmi))); + + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + + q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi + * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + + 2.0 * M_PI * generate(9, bmi)); + } else if (bunchInit.lambda_ != 0.0) { + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; - q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ * sin( 2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + bunchInit.bFP_ * M_PI / 180.0 ); + q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ + * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + + bunchInit.bFP_ * M_PI / 180.0); } - /* Set this charge into the charge vector. */ - chargeVector.push_back(q); + /* Set this charge into the charge vector. */ + chargeVector.push_back(q); } }; /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to - * calculate the FEL bucket number. */ - if ( bunchInit.shotNoise_ ) - { - for (i = 0; i < Np / ng; i++) - { - if ( bunchInit.distribution_ == "uniform" ) - zmin = std::min( Double( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2] , zmin ); - else if ( bunchInit.distribution_ == "gaussian" ) - zmin = std::min( (Double) (bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ) ), zmin ); - else - { - std::cout << std::string("The longitudinal type is not correctly given to the code !!!\n"); + * calculate the FEL bucket number. */ + if (bunchInit.shotNoise_) { + for (i = 0; i < Np / ng; i++) { + if (bunchInit.distribution_ == "uniform") + zmin = std::min( + Double(2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2], zmin); + else if (bunchInit.distribution_ == "gaussian") + zmin = std::min( + (Double)(bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0))), + zmin); + else { + std::cout << std::string( + "The longitudinal type is not correctly given to the code !!!\n"); exit(1); - } - } - - if ( bunchInit.distribution_ == "uniform" ) - for ( ; i < unsigned( Np / ng * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i++) - { - t0 = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate( 2, i + Np0 ) ) ) * sin( 2.0 * M_PI * generate( 3, i + Np0 ) ); - t0 += ( t0 < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); + } + } - zmin = std::min( t0 , zmin ); + if (bunchInit.distribution_ == "uniform") + for (; i < unsigned(Np / ng + * (1.0 + + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI) + / (2.0 * bunchInit.sigmaPosition_[2]))); + i++) { + t0 = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0)); + t0 += (t0 < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]); + + zmin = std::min(t0, zmin); } zmin = zmin + bunchInit.position_[2]; - /* Obtain the average number of electrons per FEL beamlet. */ - Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / ( 2.0 * bunchInit.sigmaPosition_[2] ); - - /* Set the bunching factor level for the shot noise depending on the given values. */ - bF = ( bunchInit.bF_ == 0.0 ) ? 1.0 / sqrt(Ne) : bunchInit.bF_; - - } + /* Obtain the average number of electrons per FEL beamlet. + */ + Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / (2.0 * bunchInit.sigmaPosition_[2]); - /* Determine the properties of each charge point and add them to the charge vector. */ - for (i = rank; i < Np / ng; i += size) - { - /* Determine the transverse coordinate. */ - r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); - r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); + /* Set the bunching factor level for the shot noise depending on the given values. + */ + bF = (bunchInit.bF_ == 0.0) ? 1.0 / sqrt(Ne) : bunchInit.bF_; + } - /* Determine the longitudinal coordinate. */ - if ( bunchInit.distribution_ == "uniform" ) - r[2] = ( 2.0 * generate(2, i + Np0) - 1.0 ) * bunchInit.sigmaPosition_[2]; - else if ( bunchInit.distribution_ == "gaussian" ) - r[2] = bunchInit.sigmaPosition_[2] * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); - else - { + /* Determine the properties of each charge point and add them to the charge vector. */ + for (i = rank; i < Np / ng; i += size) { + /* Determine the transverse coordinate. */ + r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0))) + * cos(2.0 * M_PI * generate(1, i + Np0)); + r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0))) + * sin(2.0 * M_PI * generate(1, i + Np0)); + + /* Determine the longitudinal coordinate. */ + if (bunchInit.distribution_ == "uniform") + r[2] = (2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2]; + else if (bunchInit.distribution_ == "gaussian") + r[2] = bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0)); + else { exit(1); - } - - /* Determine the transverse momentum. */ - t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); - t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); - t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); - - if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) - { - /* Shift the generated charge to the center position and momentum space. */ - charge.rnp = bunchInit.position_; - charge.rnp += r; - - charge.gb = gb; - charge.gb += t; - if(std::isinf(gb[2])){ + } + + /* Determine the transverse momentum. */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0))) + * cos(2.0 * M_PI * generate(5, i + Np0)); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0))) + * sin(2.0 * M_PI * generate(5, i + Np0)); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0))) + * cos(2.0 * M_PI * generate(7, i + Np0)); + + if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ + && fabs(r[2]) < bunchInit.longTrun_) { + /* Shift the generated charge to the center position and momentum space. + */ + charge.rnp = bunchInit.position_; + charge.rnp += r; + + charge.gb = gb; + charge.gb += t; + if (std::isinf(gb[2])) { std::cerr << "[Warning] Gammabeta obtained an klonked here\n"; } - /* Insert this charge and the mirrored ones into the charge vector. */ + /* Insert this charge and the mirrored ones into the charge vector. + */ insertCharge(charge); - } - } + } + } - /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove the - * CSE from the tail of the bunch. */ - if ( bunchInit.distribution_ == "uniform" ){ - for ( ; i < unsigned( uint32_t(Np / ng) * ( 1.0 + 2.0 * bunchInit.lambda_ * sqrt( 2.0 * M_PI ) / ( 2.0 * bunchInit.sigmaPosition_[2] ) ) ); i += size) - { - - r[0] = bunchInit.sigmaPosition_[0] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * cos( 2.0 * M_PI * generate(1, i + Np0) ); - r[1] = bunchInit.sigmaPosition_[1] * sqrt( - 2.0 * log( generate(0, i + Np0) ) ) * sin( 2.0 * M_PI * generate(1, i + Np0) ); - - /* Determine the longitudinal coordinate. */ - r[2] = 2.0 * bunchInit.lambda_ * sqrt( - 2.0 * log( generate(2, i + Np0) ) ) * sin( 2.0 * M_PI * generate(3, i + Np0) ); - r[2] += ( r[2] < 0.0 ) ? ( - bunchInit.sigmaPosition_[2] ) : ( bunchInit.sigmaPosition_[2] ); - - /* Determine the transverse momentum. */ - t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * cos( 2.0 * M_PI * generate(5, i + Np0) ); - t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt( - 2.0 * log( generate(4, i + Np0) ) ) * sin( 2.0 * M_PI * generate(5, i + Np0) ); - t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt( - 2.0 * log( generate(6, i + Np0) ) ) * cos( 2.0 * M_PI * generate(7, i + Np0) ); - if ( fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ && fabs(r[2]) < bunchInit.longTrun_) - { - /* Shift the generated charge to the center position and momentum space. */ - charge.rnp = bunchInit.position_[ia]; - charge.rnp += r; - - charge.gb = gb; - - charge.gb += t; - /* Insert this charge and the mirrored ones into the charge vector. */ - insertCharge(charge); + /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove + * the CSE from the tail of the bunch. + */ + if (bunchInit.distribution_ == "uniform") { + for (; i < unsigned(uint32_t(Np / ng) + * (1.0 + + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI) + / (2.0 * bunchInit.sigmaPosition_[2]))); + i += size) { + r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0))) + * cos(2.0 * M_PI * generate(1, i + Np0)); + r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0))) + * sin(2.0 * M_PI * generate(1, i + Np0)); + + /* Determine the longitudinal coordinate. */ + r[2] = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0)); + r[2] += (r[2] < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]); + + /* Determine the transverse momentum. + */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0))) + * cos(2.0 * M_PI * generate(5, i + Np0)); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0))) + * sin(2.0 * M_PI * generate(5, i + Np0)); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0))) + * cos(2.0 * M_PI * generate(7, i + Np0)); + if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ + && fabs(r[2]) < bunchInit.longTrun_) { + /* Shift the generated charge to the center position and momentum space. + */ + charge.rnp = bunchInit.position_[ia]; + charge.rnp += r; + + charge.gb = gb; + + charge.gb += t; + /* Insert this charge and the mirrored ones into the charge vector. + */ + insertCharge(charge); } } } /* Reset the value for the number of particle variable according to the installed number of - * macro-particles and perform the corresponding changes. */ + * macro-particles and perform the corresponding changes. */ bunchInit.numberOfParticles_ = chargeVector.size(); } -template -void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma){ +template +void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma) { Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma); - Double zmaxL = -1.0e100, zmaxG; - for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) - { - Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); - if(std::isinf(g)){ - std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g << "\n"; + Double zmaxL = -1.0e100, zmaxG; + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + if (std::isinf(g)) { + std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g + << "\n"; abort(); } - Double bz = iterQ->gb[2] / g; - iterQ->rnp[2] *= frame_gamma; - - iterQ->gb[2] = frame_gamma * g * ( bz - frame_beta ); - - zmaxL = std::max( zmaxL , iterQ->rnp[2] ); - } + Double bz = iterQ->gb[2] / g; + iterQ->rnp[2] *= frame_gamma; + + iterQ->gb[2] = frame_gamma * g * (bz - frame_beta); + + zmaxL = std::max(zmaxL, iterQ->rnp[2]); + } zmaxG = zmaxL; struct { - Double zu_; - Double beta_; + Double zu_; + Double beta_; } bunch_; - bunch_.zu_ = zmaxG; - bunch_.beta_ = frame_beta; + bunch_.zu_ = zmaxG; + bunch_.beta_ = frame_beta; /****************************************************************************************************/ - for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++ ) - { - Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); - iterQ->rnp[0] += iterQ->gb[0] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; - iterQ->rnp[1] += iterQ->gb[1] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; - iterQ->rnp[2] += iterQ->gb[2] / g * ( iterQ->rnp[2] - bunch_.zu_ ) * frame_beta; - if(std::isnan(iterQ->rnp[2])){ - std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_ << ", " << frame_beta << "\n"; - std::cerr << __FILE__ << ": " << __LINE__ << " OOOOOF\n"; + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + iterQ->rnp[0] += iterQ->gb[0] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta; + iterQ->rnp[1] += iterQ->gb[1] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta; + iterQ->rnp[2] += iterQ->gb[2] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta; + if (std::isnan(iterQ->rnp[2])) { + std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_ + << ", " << frame_beta << "\n"; + std::cerr << __FILE__ << ": " << __LINE__ << " OOOOOF\n"; abort(); } - } + } } - - - -template -size_t initialize_bunch_mithra( - bunch_type& bunch, - const BunchInitialize& bunchInit, - scalar frame_gamma){ - +template +size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize& bunchInit, + scalar frame_gamma) { ChargeVector oof; initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0); - for(auto& c : oof){ - if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) + for (auto& c : oof) { + if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) std::cout << "Pos before boost: " << c.rnp << "\n"; - if(std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2])) + if (std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2])) std::cout << "Pos before boost: " << c.rnp << "\n"; } boost_bunch(oof, frame_gamma); - for(auto& c : oof){ - if(std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])){ + for (auto& c : oof) { + if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) { std::cout << "Pos after boost: " << c.rnp << "\n"; break; } @@ -1022,60 +850,52 @@ size_t initialize_bunch_mithra( scalar bz = iterQ->gb[2] / g; assert_isreal(bz); (void)bz; - positions(i) = iterQ->rnp; + positions(i) = iterQ->rnp; gammabetas(i) = iterQ->gb; ++iterQ; } - if(oof.size() > bunch.getLocalNum()){ + if (oof.size() > bunch.getLocalNum()) { bunch.create(oof.size() - bunch.getLocalNum()); } - Kokkos::View*> dpositions ("", oof.size()); + Kokkos::View*> dpositions("", oof.size()); Kokkos::View*> dgammabetas("", oof.size()); - + Kokkos::deep_copy(dpositions, positions); Kokkos::deep_copy(dgammabetas, gammabetas); Kokkos::deep_copy(bunch.R_nm1.getView(), positions); Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas); - auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), gbview = bunch.gamma_beta.getView();; - Kokkos::parallel_for(oof.size(), KOKKOS_LAMBDA(size_t i){ - rview(i) = dpositions(i); - rm1view(i) = dpositions(i); - gbview(i) = dgammabetas(i); - }); + auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), + gbview = bunch.gamma_beta.getView(); + ; + Kokkos::parallel_for( + oof.size(), KOKKOS_LAMBDA(size_t i) { + rview(i) = dpositions(i); + rm1view(i) = dpositions(i); + gbview(i) = dgammabetas(i); + }); Kokkos::fence(); - + return oof.size(); } +// END LORENTZ FRAME AND UNDULATOR AND BUNCH +// PREAMBLE - - - - -//END LORENTZ FRAME AND UNDULATOR AND BUNCH - - - - - -//PREAMBLE - -template +template requires((std::is_floating_point_v)) -KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ +KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x) { uint32_t dim = sizeof...(scalar); ippl::Vector vec{scalar1(x - mean)...}; - for(unsigned d = 0;d < dim;d++){ + for (unsigned d = 0; d < dim; d++) { vec[d] = vec[d] * vec[d]; } - #ifndef __CUDA_ARCH__ +#ifndef __CUDA_ARCH__ using std::exp; - #endif - return exp(-(vec.sum()) / (stddev * stddev)); +#endif + return exp(-(vec.sum()) / (stddev * stddev)); } - template KOKKOS_INLINE_FUNCTION ippl::Vector cross_prod(const ippl::Vector& a, const ippl::Vector& b) { @@ -1085,37 +905,34 @@ KOKKOS_INLINE_FUNCTION ippl::Vector cross_prod(const ippl::Vector& a ret[2] = a[0] * b[1] - a[1] * b[0]; return ret; } -template -KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos, int nghost = 1){ +template +KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf( + const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos, + int nghost = 1) { Kokkos::pair, ippl::Vector> ret; - ippl::Vector relpos = pos - origin; + ippl::Vector relpos = pos - origin; ippl::Vector gridpos = relpos / hr; ippl::Vector ipos; ipos = gridpos.template cast(); ippl::Vector fracpos; - for(unsigned k = 0;k < 3;k++){ + for (unsigned k = 0; k < 3; k++) { fracpos[k] = gridpos[k] - (int)ipos[k]; } ipos += ippl::Vector(nghost); - ret.first = ipos; + ret.first = ipos; ret.second = fracpos; return ret; } -template -KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value, int nghost = 1){ +template +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, const scalar value, + int nghost = 1) { auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost); ipos -= ldom.first(); - if( - ipos[0] < 0 - ||ipos[1] < 0 - ||ipos[2] < 0 - ||size_t(ipos[0]) >= view.extent(0) - 1 - ||size_t(ipos[1]) >= view.extent(1) - 1 - ||size_t(ipos[2]) >= view.extent(2) - 1 - ||fracpos[0] < 0 - ||fracpos[1] < 0 - ||fracpos[2] < 0 - ){ + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { return; } assert(fracpos[0] >= 0.0f); @@ -1132,11 +949,11 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view assert(one_minus_fracpos[2] >= 0.0f); assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - - for(unsigned i = 0;i < 8;i++){ - scalar weight = 1; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; ippl::Vector ipos_l = ipos; - for(unsigned d = 0;d < 3;d++){ + for (unsigned d = 0; d < 3; d++) { weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); ipos_l[d] += !!(i & (1 << d)); } @@ -1147,23 +964,18 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view } assert(abs(accum - 1.0f) < 1e-6f); } -template -KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value, int nghost = 1){ - //std::cout << "Value: " << value << "\n"; +template +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, + const ippl::Vector& value, int nghost = 1) { + // std::cout << "Value: " << value << "\n"; auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost); ipos -= ldom.first(); - if( - ipos[0] < 0 - ||ipos[1] < 0 - ||ipos[2] < 0 - ||size_t(ipos[0]) >= view.extent(0) - 1 - ||size_t(ipos[1]) >= view.extent(1) - 1 - ||size_t(ipos[2]) >= view.extent(2) - 1 - ||fracpos[0] < 0 - ||fracpos[1] < 0 - ||fracpos[2] < 0 - ){ - //std::cout << "out of bounds\n"; + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + // std::cout << "out of bounds\n"; return; } assert(fracpos[0] >= 0.0f); @@ -1180,11 +992,11 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view assert(one_minus_fracpos[2] >= 0.0f); assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - - for(unsigned i = 0;i < 8;i++){ - scalar weight = 1; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; ippl::Vector ipos_l = ipos; - for(unsigned d = 0;d < 3;d++){ + for (unsigned d = 0; d < 3; d++) { weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); ipos_l[d] += !!(i & (1 << d)); } @@ -1197,16 +1009,23 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view assert(abs(accum - 1.0f) < 1e-6f); } -template -KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor, int nghost = 1){ +template +KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, + ippl::Vector hr, + ippl::Vector origin, + const ippl::Vector& from, + const ippl::Vector& to, + const scalar factor, int nghost = 1) { + Kokkos::pair, ippl::Vector> from_grid = + gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = + gridCoordinatesOf(hr, origin, to); + + if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] + && from_grid.first[2] == to_grid.first[2]) { + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), + ippl::Vector((to - from) * factor)); - - Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); - Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to ); - - if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); - return; } ippl::Vector relay; @@ -1214,34 +1033,22 @@ KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view using Kokkos::max; using Kokkos::min; for (unsigned int i = 0; i < 3; i++) { - relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i], - max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i], + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(0.0) * hr[i] + orig[i], scalar(0.5) * (to[i] + from[i]))); } - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), + ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)), + ippl::Vector((to - relay) * factor)); } - - namespace ippl { - - template - struct undulator_parameters{ - scalar lambda_u; //MITHRA: lambda_u - scalar K; //Undulator parameter - scalar length; - scalar B_magnitude; - undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) : lambda_u(lambda_u), K(K_undulator_parameter), length(_length){ - B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u); - } - undulator_parameters(const config& cfg): lambda_u(cfg.undulator_period), K(cfg.undulator_K), length(cfg.undulator_length){ - B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) / (electron_charge_in_unit_charges * lambda_u); - } - }; - template - struct nondispersive{ + template + struct nondispersive { scalar a1; scalar a2; scalar a4; @@ -1249,19 +1056,20 @@ namespace ippl { scalar a8; }; template - struct Bunch_eb : public ippl::ParticleBase { + struct Bunch_eb : public ippl::ParticleBase { using scalar = _scalar; // Constructor for the Bunch class, taking a PLayout reference Bunch_eb(PLayout& playout) : ippl::ParticleBase(playout) { // Add attributes to the particle bunch - this->addAttribute(Q); // Charge attribute - this->addAttribute(mass); // Mass attribute - this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute( + gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) this->addAttribute(R_np1); // Position attribute for the next time step this->addAttribute(R_nm1); // Position attribute for the next time step - this->addAttribute(EB_gather); // Electric field attribute for particle gathering + this->addAttribute(EB_gather); // Electric field attribute for particle gathering } // Destructor for the Bunch class @@ -1271,85 +1079,23 @@ namespace ippl { using charge_container_type = ippl::ParticleAttrib; using velocity_container_type = ippl::ParticleAttrib>; using vector_container_type = ippl::ParticleAttrib>; - using vector4_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; // Declare instances of the attribute containers - charge_container_type Q; // Charge container - charge_container_type mass; // Mass container - velocity_container_type gamma_beta; // Gamma-beta container - typename ippl::ParticleBase::particle_position_type R_np1; // Position container for the next time step - typename ippl::ParticleBase::particle_position_type R_nm1; // Position container for the previous time step - ippl::ParticleAttrib, 2>> EB_gather; // Electric field container for particle gathering - - }; - - - /** - * @brief Struct representing an undulator. - * - * @tparam scalar Type of the scalar values (e.g., float, double). - */ - template - struct Undulator { - undulator_parameters uparams; ///< Parameters of the undulator. - scalar distance_to_entry; ///< Distance to the entry of the undulator. - scalar k_u; ///< Wavenumber of the undulator. - - /** - * @brief Constructor to initialize undulator parameters and calculate k_u. - * - * @param p Parameters of the undulator. - * @param dte Distance to the entry of the undulator. - */ - KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) - : uparams(p), distance_to_entry(dte), k_u(2 * M_PI / p.lambda_u) {} - - /** - * @brief Overloaded operator() to compute magnetic field components. - * - * @param position_in_lab_frame Position vector in the lab frame. - * @return Kokkos::pair, ippl::Vector> - * Pair containing magnetic field and its derivative. - */ - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> - operator()(const ippl::Vector& position_in_lab_frame) const noexcept { - using Kokkos::sin; - using Kokkos::sinh; - using Kokkos::cos; - using Kokkos::cosh; - using Kokkos::exp; - - Kokkos::pair, ippl::Vector> ret; // Return pair containing magnetic field and its derivative. - ret.first.fill(0); // Initialize magnetic field vector. - ret.second.fill(0); // Initialize derivative vector. - - // If the position is before the undulator entry. - if (position_in_lab_frame[2] < distance_to_entry) { - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator < 0); // Ensure we are in the correct region. - scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) * 0.5)); // Gaussian decay factor. - - ret.second[0] = 0; // No x-component. - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * z_in_undulator * k_u * scal; // y-component. - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * scal; // z-component. - } - // If the position is within the undulator. - else if (position_in_lab_frame[2] > distance_to_entry && position_in_lab_frame[2] < distance_to_entry + uparams.length) { - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator >= 0); // Ensure we are in the correct region. - - ret.second[0] = 0; // No x-component. - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) * sin(k_u * z_in_undulator); // y-component. - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) * cos(k_u * z_in_undulator); // z-component. - } - return ret; - } + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type + R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type + R_nm1; // Position container for the previous time step + ippl::ParticleAttrib, 2>> + EB_gather; // Electric field container for particle gathering }; - /** * @brief Struct representing the state of an FEL (Free Electron Laser) simulation. - * + * * @tparam scalar Type of the scalar values (e.g., float, double). */ template @@ -1416,7 +1162,7 @@ namespace ippl { : fieldsAndParticles(layout, mesch, nparticles), m_config(cfg), ulb(UniaxialLorentzframe::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), - uparams(cfg), + uparams(cfg.undulator_K, cfg.undulator_period, cfg.undulator_length), undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m) { hr_m = mesch.getMeshSpacing(); @@ -1459,8 +1205,8 @@ namespace ippl { // clang-format on } // namespace ippl bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { - const int channels = 3; // RGB - const int stride = width * channels; + const int channels = 3; // RGB + const int stride = width * channels; std::vector flippedData(data, data + stride * height); // Use stb_image_write to write the BMP image to the file descriptor @@ -1477,169 +1223,188 @@ bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { } /** * @brief Helper function to gather data from a field using interpolation. - * + * * This function computes the value at a given position by interpolating the values in the field. - * The position is transformed from physical coordinates to grid coordinates and then used to - * gather data from the field. The interpolation weights are computed based on the relative + * The position is transformed from physical coordinates to grid coordinates and then used to + * gather data from the field. The interpolation weights are computed based on the relative * positions within the grid cells. - * + * * @tparam View Type of the field view. * @tparam T Scalar type for coordinates and grid spacing. * @tparam Dim Dimensionality of the position vector. - * + * * @param v The field view from which data is to be gathered. * @param pos The position vector in physical coordinates where the data is to be gathered. * @param origin The origin of the grid in physical coordinates. * @param hr The grid spacing vector. * @param lDom The local domain indices of the grid. - * + * * @return typename View::value_type The interpolated value at the specified position. */ -template -KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector& pos, const ippl::Vector& origin, const ippl::Vector& hr, const ippl::NDIndex<3>& lDom){ +template +KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, + const ippl::Vector& pos, + const ippl::Vector& origin, + const ippl::Vector& hr, + const ippl::NDIndex<3>& lDom) { using vector_type = ippl::Vector; vector_type l; - for(unsigned k = 0;k < 3;k++){ - l[k] = (pos[k] - origin[k]) / hr[k] + 1.0; //gather is implemented in a way such that this 1 is necessary here - } + for (unsigned k = 0; k < 3; k++) { + l[k] = (pos[k] - origin[k]) / hr[k] + + 1.0; // gather is implemented in a way such that this 1 is necessary here + } ippl::Vector index{int(l[0]), int(l[1]), int(l[2])}; ippl::Vector whi = l - index; ippl::Vector wlo(1.0); wlo -= whi; - //TODO: nghost + // TODO: nghost ippl::Vector args = index - lDom.first() + 1; - for(unsigned k = 0;k < 3;k++){ - if(args[k] >= v.extent(k) || args[k] == 0){ + for (unsigned k = 0; k < 3; k++) { + if (args[k] >= v.extent(k) || args[k] == 0) { return typename View::value_type(0); } } - return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, whi, args)/*}*/; - + return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, + whi, args) /*}*/; } -template -scalar test_gauss_law(uint32_t n){ - +template +scalar test_gauss_law(uint32_t n) { ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); ippl::Vector nr{n / 2, n / 2, 2 * n}; - ippl::Vector extents{meter_in_unit_lengths,meter_in_unit_lengths,meter_in_unit_lengths}; + ippl::Vector extents{meter_in_unit_lengths, meter_in_unit_lengths, + meter_in_unit_lengths}; std::array isParallel; isParallel.fill(false); isParallel[2] = true; - - // all parallel layout, standard domain, normal axis order + + // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); - //[-1, 1] box + //[-1, 1] box ippl::Vector hx = extents / nr.cast(); - using vector_type = ippl::Vector; - ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), + scalar(-0.5 * meter_in_unit_lengths), + scalar(-0.5 * meter_in_unit_lengths)}; ippl::UniformCartesian mesh(owned, hx, origin); uint32_t pcount = 1 << 20; - //config cfg{}; - //cfg.space_charge = true; - ippl::NSFDSolverWithParticles field_state(layout, mesh, pcount); - field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; - field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant - auto pview = field_state.particles.R.getView(); - auto p1view = field_state.particles.R_nm1.getView(); + // config cfg{}; + // cfg.space_charge = true; + ippl::NSFDSolverWithParticles field_state(layout, mesh, + pcount); + field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; // Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); - - Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ - auto state = random_pool.get_state(); - pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - p1view(i) = pview(i); - random_pool.free_state(state); - }); + + Kokkos::parallel_for( + pcount, KOKKOS_LAMBDA(size_t i) { + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + p1view(i) = pview(i); + random_pool.free_state(state); + }); Kokkos::fence(); field_state.J = scalar(0.0); field_state.scatterBunch(); - for(size_t i = 0;i < 8*n;i++){ + for (size_t i = 0; i < 8 * n; i++) { field_state.field_solver.solve(); } Kokkos::fence(); auto lDom = field_state.E.getLayout().getLocalNDIndex(); - + std::ofstream line("gauss_line.txt"); - typename ippl::FELSimulationState::e_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.E.getView()); - for(unsigned i = 1;i < nr[2];i++){ + typename ippl::FELSimulationState::e_view_type::host_mirror_type view = + Kokkos::create_mirror_view(field_state.E.getView()); + for (unsigned i = 1; i < nr[2]; i++) { vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; pos[2] += hx[2] * scalar(i); ippl::Vector ebg = gather_helper(view, pos, origin, hx, lDom); - line << pos.norm() * unit_length_in_meters << " " << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " + << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; } return 0.0f; } -template -scalar test_amperes_law(uint32_t n){ - +template +scalar test_amperes_law(uint32_t n) { ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); ippl::Vector nr{n / 2, n / 2, 2 * n}; - ippl::Vector extents{meter_in_unit_lengths,(scalar)(4 * meter_in_unit_lengths),meter_in_unit_lengths}; + ippl::Vector extents{meter_in_unit_lengths, (scalar)(4 * meter_in_unit_lengths), + meter_in_unit_lengths}; std::array isParallel; isParallel.fill(false); isParallel[2] = true; - + // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); //[-1, 1] box ippl::Vector hx; - for(unsigned d = 0;d < 3;d++){ + for (unsigned d = 0; d < 3; d++) { hx[d] = extents[d] / (scalar)nr[d]; } - using vector_type = ippl::Vector; - ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), scalar(-2.0 * meter_in_unit_lengths), scalar(-0.5 * meter_in_unit_lengths)}; + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), + scalar(-2.0 * meter_in_unit_lengths), + scalar(-0.5 * meter_in_unit_lengths)}; ippl::UniformCartesian mesh(owned, hx, origin); uint32_t pcount = 1 << 20; - ippl::NSFDSolverWithParticles field_state(layout, mesh, pcount); - field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; - field_state.particles.mass = scalar(1.0) / pcount; //Irrelefant - auto pview = field_state.particles.R.getView(); - auto p1view = field_state.particles.R_nm1.getView(); - constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; - scalar timestep = field_state.field_solver.dt; + ippl::NSFDSolverWithParticles field_state(layout, mesh, + pcount); + field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; // Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); + constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + scalar timestep = field_state.field_solver.dt; Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); - - Kokkos::parallel_for(pcount, KOKKOS_LAMBDA(size_t i){ - //bunch.gammaBeta[i].fill(scalar(0)); - auto state = random_pool.get_state(); - pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); - pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1); - p1view(i) = pview(i); - p1view(i)[1] -= vy * timestep; - random_pool.free_state(state); - }); + + Kokkos::parallel_for( + pcount, KOKKOS_LAMBDA(size_t i) { + // bunch.gammaBeta[i].fill(scalar(0)); + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1); + p1view(i) = pview(i); + p1view(i)[1] -= vy * timestep; + random_pool.free_state(state); + }); Kokkos::fence(); field_state.J = scalar(0.0); field_state.scatterBunch(); - for(size_t i = 0;i < 8*n;i++){ + for (size_t i = 0; i < 8 * n; i++) { field_state.field_solver.solve(); } field_state.field_solver.evaluate_EB(); Kokkos::fence(); auto lDom = field_state.B.getLayout().getLocalNDIndex(); - + std::ofstream line("ampere_line.txt"); - - typename ippl::FELSimulationState::b_view_type::host_mirror_type view = Kokkos::create_mirror_view(field_state.B.getView()); - //ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); - for(unsigned i = 1;i < nr[2];i++){ + + typename ippl::FELSimulationState::b_view_type::host_mirror_type view = + Kokkos::create_mirror_view(field_state.B.getView()); + // ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); + for (unsigned i = 1; i < nr[2]; i++) { vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; pos[2] += hx[2] * scalar(i); ippl::Vector ebg = gather_helper(view, pos, origin, hx, lDom); - //line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * unit_electric_fieldstrength_in_voltpermeters << "\n"; - line << pos.norm() * unit_length_in_meters << " " << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n"; + // line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * + // unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " + << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n"; } return 0.0f; } @@ -1647,19 +1412,22 @@ int main(int argc, char* argv[]) { using scalar = float; ippl::initialize(argc, argv); { - - //test_gauss_law(64); - //test_amperes_law(64); - //goto exit; - config cfg = read_config("../config.json"); - const scalar frame_gamma = std::max(decltype(cfg)::scalar(1), cfg.bunch_gamma / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); + // test_gauss_law(64); + // test_amperes_law(64); + // goto exit; + config cfg = read_config("../config.json"); + const scalar frame_gamma = std::max( + decltype(cfg)::scalar(1), + cfg.bunch_gamma + / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); cfg.extents[2] *= frame_gamma; cfg.total_time /= frame_gamma; - - const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma)); + + const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma)); const scalar frame_gammabeta = frame_gamma * frame_beta; - UniaxialLorentzframe frame_boost(frame_gammabeta); - ippl::undulator_parameters uparams(cfg); + ippl::UniaxialLorentzframe frame_boost(frame_gammabeta); + ippl::undulator_parameters uparams(cfg.undulator_K, cfg.undulator_period, + cfg.undulator_length); BunchInitialize mithra_config = generate_mithra_config(cfg, frame_boost); ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); @@ -1667,116 +1435,134 @@ int main(int argc, char* argv[]) { std::array isParallel; isParallel.fill(false); isParallel[2] = true; - + // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); //[-1, 1] box - ippl::Vector hx = {scalar( cfg.extents[0] / cfg.resolution[0]), scalar(cfg.extents[1] / cfg.resolution[1]), scalar(cfg.extents[2] / cfg.resolution[2])}; - ippl::Vector origin = {scalar(-cfg.extents[0] * 0.5), scalar(-cfg.extents[1] * 0.5), scalar(-cfg.extents[2] * 0.5)}; + ippl::Vector hx = {scalar(cfg.extents[0] / cfg.resolution[0]), + scalar(cfg.extents[1] / cfg.resolution[1]), + scalar(cfg.extents[2] / cfg.resolution[2])}; + ippl::Vector origin = {scalar(-cfg.extents[0] * 0.5), + scalar(-cfg.extents[1] * 0.5), + scalar(-cfg.extents[2] * 0.5)}; ippl::UniformCartesian mesh(owned, hx, origin); std::cout << "hx: " << hx << "\n"; std::cout << "origin: " << origin << "\n"; std::cout << "extents: " << cfg.extents << std::endl; - if(sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1){ + if (sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1) { std::cerr << "Dispersion relation not satisfiable\n"; abort(); } - ippl::FELSimulationState fdtd_state(layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); - - if(ippl::Comm->rank() == 0){ + ippl::FELSimulationState fdtd_state( + layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); + + if (ippl::Comm->rank() == 0) { std::cout << "Init particles: " << std::endl; - size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles, mithra_config, frame_gamma); - fdtd_state.fieldsAndParticles.particles.Q = cfg.charge / actual_pc; + size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles, + mithra_config, frame_gamma); + fdtd_state.fieldsAndParticles.particles.Q = cfg.charge / actual_pc; fdtd_state.fieldsAndParticles.particles.mass = cfg.mass / actual_pc; - } - else{ + } else { fdtd_state.fieldsAndParticles.particles.create(0); } { - auto rview = fdtd_state.fieldsAndParticles.particles.R.getView(); + auto rview = fdtd_state.fieldsAndParticles.particles.R.getView(); auto rm1view = fdtd_state.fieldsAndParticles.particles.R_nm1.getView(); - ippl::Vector meanpos = fdtd_state.fieldsAndParticles.particles.R.sum() * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum()); - - Kokkos::parallel_for(fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - rview(i) -= meanpos; - rm1view(i) -= meanpos; - }); + ippl::Vector meanpos = + fdtd_state.fieldsAndParticles.particles.R.sum() + * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum()); + + Kokkos::parallel_for( + fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + rview(i) -= meanpos; + rm1view(i) -= meanpos; + }); } fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO); - + size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt()); - uint64_t starttime = nanoTime(); + uint64_t starttime = nanoTime(); std::ofstream rad; FILE* ffmpeg_file = nullptr; - if(ippl::Comm->rank() == 0){ + if (ippl::Comm->rank() == 0) { rad = std::ofstream("radiation.txt"); - const char* ffmpegCmd = "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p -c:v libx264 -movflags +faststart ffmpeg_popen.mkv"; - if(cfg.output_rhythm != 0){ + const char* ffmpegCmd = + "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf " + "scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p " + "-c:v libx264 -movflags +faststart ffmpeg_popen.mkv"; + if (cfg.output_rhythm != 0) { ffmpeg_file = popen(ffmpegCmd, "w"); } } - - for(size_t i = 0;i < timesteps_required;i++){ - if(ippl::Comm->rank() == 0){ + for (size_t i = 0; i < timesteps_required; i++) { + if (ippl::Comm->rank() == 0) { std::cout << "Doing step: " << i << std::endl; } fdtd_state.step(); - auto ldom = layout.getLocalNDIndex(); - auto nrg = fdtd_state.nr_global; - auto eview = fdtd_state.fieldsAndParticles.E.getView(); - auto bview = fdtd_state.fieldsAndParticles.B.getView(); + auto ldom = layout.getLocalNDIndex(); + auto nrg = fdtd_state.nr_global; + auto eview = fdtd_state.fieldsAndParticles.E.getView(); + auto bview = fdtd_state.fieldsAndParticles.B.getView(); double radiation = 0.0; - Kokkos::parallel_reduce(ippl::getRangePolicy(eview, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref){ - Kokkos::pair, ippl::Vector> buncheb{eview(i,j,k), bview(i,j,k)}; - ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; - ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; - uint32_t kg = k + ldom.first()[2]; - if(kg == nrg[2] - 3){ - ref += Elab.cross(Blab)[2]; - } - - }, radiation); - double radiation_in_watt_on_this_rank = radiation * - double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters * unit_length_in_meters) * - fdtd_state.hr_m[0] * fdtd_state.hr_m[1]; + Kokkos::parallel_reduce( + ippl::getRangePolicy(eview, 1), + KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref) { + Kokkos::pair, ippl::Vector> buncheb{ + eview(i, j, k), bview(i, j, k)}; + ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; + ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; + uint32_t kg = k + ldom.first()[2]; + if (kg == nrg[2] - 3) { + ref += Elab.cross(Blab)[2]; + } + }, + radiation); + double radiation_in_watt_on_this_rank = + radiation + * double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters + * unit_length_in_meters) + * fdtd_state.hr_m[0] * fdtd_state.hr_m[1]; double radiation_in_watt_global = 0.0; - MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); - if(ippl::Comm->rank() == 0){ - ippl::Vector pos{0,0,(float)cfg.extents[2]}; + MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, + MPI_SUM, 0, ippl::Comm->getCommunicator()); + if (ippl::Comm->rank() == 0) { + ippl::Vector pos{0, 0, (float)cfg.extents[2]}; frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i); rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; } - + int rank = ippl::Comm->rank(); int size = ippl::Comm->size(); - if((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)){ - - - int img_height = 400; - int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]); + if ((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)) { + int img_height = 400; + int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]); float* imagedata = new float[img_width * img_height * 3]; std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); - float* idata_recvbuffer = new float[img_width * img_height * 3]; - int floatcount = img_width * img_height * 3; + float* idata_recvbuffer = new float[img_width * img_height * 3]; + int floatcount = img_width * img_height * 3; uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float)); auto phmirror = fdtd_state.fieldsAndParticles.particles.R.getHostMirror(); Kokkos::deep_copy(phmirror, fdtd_state.fieldsAndParticles.particles.R.getView()); - for(size_t hi = 0;hi < fdtd_state.fieldsAndParticles.particles.getLocalNum();hi++){ + for (size_t hi = 0; hi < fdtd_state.fieldsAndParticles.particles.getLocalNum(); + hi++) { ippl::Vector ppos = phmirror(hi); ppos -= mesh.getOrigin(); ppos /= cfg.extents.cast(); int x_imgcoord = ppos[2] * img_width; int y_imgcoord = ppos[0] * img_height; - //printf("%d, %d\n", x_imgcoord, y_imgcoord); - if(y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width && y_imgcoord < img_height){ - const float intensity = std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles); - //std::cout << intensity << "\n"; - imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = - std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + intensity); + // printf("%d, %d\n", x_imgcoord, y_imgcoord); + if (y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width + && y_imgcoord < img_height) { + const float intensity = + std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles); + // std::cout << intensity << "\n"; + imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = + std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + + intensity); } }; @@ -1786,36 +1572,63 @@ int main(int argc, char* argv[]) { Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView()); { - for(int i = 1;i < img_width;i++){ - for(int j = 1;j < img_height;j++){ - int i_remap = (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2; - int j_remap = (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2; - if(i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]){ - if(j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]){ - ippl::Vector E = eh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); - ippl::Vector B = bh(j_remap + 1 - ldom.first()[0], fdtd_state.nr_global[1] / 2, i_remap + 1 - ldom.first()[2]); - + for (int i = 1; i < img_width; i++) { + for (int j = 1; j < img_height; j++) { + int i_remap = + (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2; + int j_remap = + (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2; + if (i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]) { + if (j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]) { + ippl::Vector E = eh(j_remap + 1 - ldom.first()[0], + fdtd_state.nr_global[1] / 2, + i_remap + 1 - ldom.first()[2]); + ippl::Vector B = bh(j_remap + 1 - ldom.first()[0], + fdtd_state.nr_global[1] / 2, + i_remap + 1 - ldom.first()[2]); + ippl::Vector poynting = E.cross(B); - Kokkos::pair, ippl::Vector> eblab = frame_boost.inverse_transform_EB( - Kokkos::make_pair, ippl::Vector>(E, B) - ); - ippl::Vector poyntinglab = eblab.first.cross(eblab.second); + Kokkos::pair, ippl::Vector> + eblab = frame_boost.inverse_transform_EB( + Kokkos::make_pair, + ippl::Vector>(E, B)); + ippl::Vector poyntinglab = + eblab.first.cross(eblab.second); poynting = poyntinglab; - if(poynting.norm() > 0){ - //std::cout << poynting.norm() << "\n"; + if (poynting.norm() > 0) { + // std::cout << poynting.norm() << "\n"; } - float normalized_colorscale_value = std::sqrt(poynting.norm()) * 0.00001f; - int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f)); - imagedata[(j * img_width + i) * 3 + 0] += turbo_cm[index][0] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); - imagedata[(j * img_width + i) * 3 + 1] += turbo_cm[index][1] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); - imagedata[(j * img_width + i) * 3 + 2] += turbo_cm[index][2] * 255.0f;// * std::min(normalized_colorscale_value * 100.0f + 50.0f, 150.0f);//(unsigned char)(std::min(255u, (unsigned int)(0.5f * std::sqrt(std::sqrt(poynting.squaredNorm()))))); + float normalized_colorscale_value = + std::sqrt(poynting.norm()) * 0.00001f; + int index = (int)std::max( + 0.0f, + std::min(normalized_colorscale_value * 255.0f, 255.0f)); + imagedata[(j * img_width + i) * 3 + 0] += + turbo_cm[index][0] + * 255.0f; // * std::min(normalized_colorscale_value * + // 100.0f + 50.0f, 150.0f);//(unsigned + // char)(std::min(255u, (unsigned int)(0.5f * + // std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 1] += + turbo_cm[index][1] + * 255.0f; // * std::min(normalized_colorscale_value * + // 100.0f + 50.0f, 150.0f);//(unsigned + // char)(std::min(255u, (unsigned int)(0.5f * + // std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 2] += + turbo_cm[index][2] + * 255.0f; // * std::min(normalized_colorscale_value * + // 100.0f + 50.0f, 150.0f);//(unsigned + // char)(std::min(255u, (unsigned int)(0.5f * + // std::sqrt(std::sqrt(poynting.squaredNorm()))))); } } } } } /* - * This code should be replaced by the Image Gathering anyway, but that's a different PR + * This code should be replaced by the Image Gathering anyway, but that's a + * different PR */ int mask = 1; while (mask < size) { @@ -1823,36 +1636,41 @@ int main(int argc, char* argv[]) { { if ((rank & mask) == 0) { // Send data to partner - MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE); + MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, + ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE); // Apply image summation - for(int f = 0;f < floatcount;f++){ + for (int f = 0; f < floatcount; f++) { imagedata[f] += idata_recvbuffer[f]; } } else { - MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, ippl::Comm->getCommunicator()); + MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, + ippl::Comm->getCommunicator()); // Receive data from partner and apply reduction - } } mask <<= 1; // Move to next bit position for pairing } - if(rank == 0){ + if (rank == 0) { char output[1024] = {0}; - + snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i); - std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); - if(ffmpeg_file != nullptr) + std::transform(imagedata, imagedata + img_height * img_width * 3, + imagedata_final, [](float x) { + return (unsigned char)std::min(255.0f, std::max(0.0f, x)); + }); + if (ffmpeg_file != nullptr) writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final); } delete[] imagedata; delete[] idata_recvbuffer; delete[] imagedata_final; - } + } } uint64_t endtime = nanoTime(); - if(ippl::Comm->rank() == 0) - std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 << std::endl; + if (ippl::Comm->rank() == 0) + std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 + << std::endl; } - //exit: + // exit: ippl::finalize(); } \ No newline at end of file diff --git a/alpine/Undulator.h b/alpine/Undulator.h new file mode 100644 index 000000000..981786b8e --- /dev/null +++ b/alpine/Undulator.h @@ -0,0 +1,238 @@ +#ifndef UNDULATOR_H +#define UNDULATOR_H +#include + +#include + +#include "Types/Vector.h" + +#include "units.h" +namespace ippl { + /** + * @brief A template struct representing a uniaxial Lorentz frame. + * + * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis + * (default is the z-axis, axis 2). It includes methods for transforming vectors and + * electromagnetic fields between frames, as well as for computing gamma factors and beta + * velocities. The struct uses Kokkos for portability and performance. + * + * @tparam T The scalar type used for computations (e.g., float, double). + * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the + * z-axis). + */ + template + struct UniaxialLorentzframe { + /// Speed of light, set to 1 for natural units. + constexpr static T c = 1.0; + /// Alias for the scalar type used in the struct. + using scalar = T; + /// Alias for a 3-dimensional vector of the scalar type. + using Vector3 = ippl::Vector; + + /// Beta velocity component along the specified axis. + scalar beta_m; + /// Product of gamma factor and beta velocity. + scalar gammaBeta_m; + /// Gamma factor for the Lorentz transformation. + scalar gamma_m; + + /** + * @brief Create a UniaxialLorentzframe from a given gamma factor. + * + * This static member function constructs a UniaxialLorentzframe object using a given + * gamma factor. It computes the corresponding beta velocity and gamma*beta product. + * + * @param gamma The gamma factor for the Lorentz transformation. + * @return A UniaxialLorentzframe object with computed beta and gamma*beta. + */ + KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) { + UniaxialLorentzframe ret; + ret.gamma_m = gamma; + scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma)); + scalar gammabeta = gamma * beta; + ret.beta_m = beta; + ret.gammaBeta_m = gammabeta; + return ret; + } + + /** + * @brief Get a UniaxialLorentzframe with negative beta velocity. + * + * This member function returns a new UniaxialLorentzframe object with the same gamma + * factor but with a negative beta velocity, effectively representing the inverse + * Lorentz transformation. + * + * @return A UniaxialLorentzframe object with negative beta velocity. + */ + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe negative() const noexcept { + UniaxialLorentzframe ret; + ret.beta_m = -beta_m; + ret.gammaBeta_m = -gammaBeta_m; + ret.gamma_m = gamma_m; + return ret; + } + + /// Default constructor. + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default; + + /** + * @brief Construct a UniaxialLorentzframe from a gamma*beta value. + * + * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta + * value. It computes the corresponding beta velocity and gamma factor. + * + * @param gammaBeta The product of the gamma factor and beta velocity. + */ + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) { + using Kokkos::sqrt; + gammaBeta_m = gammaBeta; + beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta); + gamma_m = sqrt(1 + gammaBeta * gammaBeta); + } + + /** + * @brief Transform a spatial vector from the primed frame to the unprimed frame. + * + * This member function transforms a given spatial vector from the primed frame to the + * unprimed frame using the Lorentz transformation along the specified axis and the given + * time. + * + * @param arg The spatial vector to be transformed. + * @param time The time component for the transformation. + */ + KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept { + arg[axis] = gamma_m * (arg[axis] + beta_m * time); + } + + /** + * @brief Transform electric and magnetic fields from the unprimed to the primed frame. + * + * This member function transforms a pair of electric and magnetic field vectors from the + * unprimed frame to the primed frame using the Lorentz transformation along the specified + * axis. + * + * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the + * unprimed frame. + * @return A pair of vectors representing the electric and magnetic fields in the primed + * frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB( + const Kokkos::pair, ippl::Vector>& unprimedEB) const noexcept { + Kokkos::pair, ippl::Vector> ret; + ippl::Vector betavec{0, 0, beta_m}; + ippl::Vector vnorm{axis == 0, axis == 1, axis == 2}; + ret.first = + ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m + - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); + ret.second = + ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m + - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); + ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; + ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; + return ret; + } + + /** + * @brief Transform electric and magnetic fields from the primed to the unprimed frame. + * + * This member function transforms a pair of electric and magnetic field vectors from the + * primed frame to the unprimed frame using the inverse Lorentz transformation along the + * specified axis. + * + * @param primedEB A pair of vectors representing the electric and magnetic fields in the + * primed frame. + * @return A pair of vectors representing the electric and magnetic fields in the unprimed + * frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + inverse_transform_EB( + const Kokkos::pair, ippl::Vector>& primedEB) const noexcept { + return negative().transform_EB(primedEB); + } + }; + template + struct undulator_parameters { + scalar lambda_u; // MITHRA: lambda_u + scalar K; // Undulator parameter + scalar length; + scalar B_magnitude; + undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) + : lambda_u(lambda_u) + , K(K_undulator_parameter) + , length(_length) { + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) + / (electron_charge_in_unit_charges * lambda_u); + } + }; + /** + * @brief Struct representing an undulator. + * + * @tparam scalar Type of the scalar values (e.g., float, double). + */ + template + struct Undulator { + undulator_parameters uparams; ///< Parameters of the undulator. + scalar distance_to_entry; ///< Distance to the entry of the undulator. + scalar k_u; ///< Wavenumber of the undulator. + + /** + * @brief Constructor to initialize undulator parameters and calculate k_u. + * + * @param p Parameters of the undulator. + * @param dte Distance to the entry of the undulator. + */ + KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) + : uparams(p) + , distance_to_entry(dte) + , k_u(2 * M_PI / p.lambda_u) {} + + /** + * @brief Overloaded operator() to compute magnetic field components. + * + * @param position_in_lab_frame Position vector in the lab frame. + * @return Kokkos::pair, ippl::Vector> + * Pair containing magnetic field and its derivative. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + operator()(const ippl::Vector& position_in_lab_frame) const noexcept { + using Kokkos::cos; + using Kokkos::cosh; + using Kokkos::exp; + using Kokkos::sin; + using Kokkos::sinh; + + Kokkos::pair, ippl::Vector> + ret; // Return pair containing magnetic field and its derivative. + ret.first.fill(0); // Initialize magnetic field vector. + ret.second.fill(0); // Initialize derivative vector. + + // If the position is before the undulator entry. + if (position_in_lab_frame[2] < distance_to_entry) { + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator < 0); // Ensure we are in the correct region. + scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) + * 0.5)); // Gaussian decay factor. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) + * z_in_undulator * k_u * scal; // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) + * scal; // z-component. + } + // If the position is within the undulator. + else if (position_in_lab_frame[2] > distance_to_entry + && position_in_lab_frame[2] < distance_to_entry + uparams.length) { + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator >= 0); // Ensure we are in the correct region. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) + * sin(k_u * z_in_undulator); // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) + * cos(k_u * z_in_undulator); // z-component. + } + return ret; + } + }; +} // namespace ippl +#endif // UNDULATOR_H \ No newline at end of file diff --git a/alpine/units.h b/alpine/units.h new file mode 100644 index 000000000..318892db8 --- /dev/null +++ b/alpine/units.h @@ -0,0 +1,37 @@ +#ifndef UNITS_HPP +#define UNITS_HPP +constexpr double sqrt_4pi = 3.54490770181103205459; +constexpr double alpha_scaling_factor = 1e30; +constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor; +constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi; +constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor; +constexpr double electron_mass_in_kg = 9.1093837015e-31; +constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor; +constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); +constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg; +constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters; +constexpr double electron_charge_in_coulombs = 1.602176634e-19; +constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs; + +constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges; +constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds; +constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses; +constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); + +constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges; +constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges; +constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges; +constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds; +constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes; +constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters; +constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters); +constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters); +constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters; +constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor); +constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts; +constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters); +constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes); +constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds); +constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G; +constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons; +#endif \ No newline at end of file diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index 52ec14529..f9fb5fa5d 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -4,73 +4,38 @@ using std::size_t; #include "Types/Vector.h" - - #include "FieldLayout/FieldLayout.h" -#include "Meshes/UniformCartesian.h" +#include "MaxwellSolvers/AbsorbingBC.h" #include "MaxwellSolvers/Maxwell.h" +#include "Meshes/UniformCartesian.h" #include "Particle/ParticleBase.h" -#include "MaxwellSolvers/AbsorbingBC.h" -constexpr double sqrt_4pi = 3.54490770181103205459; -constexpr double alpha_scaling_factor = 1e30; -constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor; -constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi; -constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor; -constexpr double electron_mass_in_kg = 9.1093837015e-31; -constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor; -constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); -constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg; -constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters; -constexpr double electron_charge_in_coulombs = 1.602176634e-19; -constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs; - -constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges; -constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds; -constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses; -constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); - -constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges; -constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges; -constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges; -constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds; -constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes; -constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters; -constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters); -constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters); -constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters; -constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor); -constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts; -constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters); -constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes); -constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds); -constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G; -constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons; - -constexpr double sq(double x){ + +constexpr double sq(double x) { return x * x; } -constexpr float sq(float x){ +constexpr float sq(float x) { return x * x; } #define assert_isreal(X) assert(!Kokkos::isnan(X) && !Kokkos::isinf(X)) namespace ippl { - enum fdtd_bc{ - periodic, absorbing + enum fdtd_bc { + periodic, + absorbing }; - + template class StandardFDTDSolver : Maxwell { - public: + public: constexpr static unsigned Dim = EMField::dim; - using scalar = typename EMField::value_type::value_type; - using Vector_t = Vector; - using FourVector_t = typename FourField::value_type; + using scalar = typename EMField::value_type::value_type; + using Vector_t = Vector; + using FourVector_t = typename FourField::value_type; StandardFDTDSolver(FourField& source, EMField& E, EMField& B) { Maxwell::setSource(source); Maxwell::setEMFields(E, B); initialize(); } - virtual void solve()override{ + virtual void solve() override { step(); timeShift(); evaluate_EB(); @@ -87,128 +52,134 @@ namespace ippl { (void)x; }; bcsetter(std::make_index_sequence{}); - A_n .setFieldBC(vector_bcs); + A_n.setFieldBC(vector_bcs); A_np1.setFieldBC(vector_bcs); A_nm1.setFieldBC(vector_bcs); } - FourField A_n; FourField A_np1; FourField A_nm1; scalar dt; - private: - void timeShift(){ - - //Look into this, maybe cyclic swap is better + private: + void timeShift() { + // Look into this, maybe cyclic swap is better Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); } - void applyBCs(){ - if constexpr(boundary_conditions == periodic){ + void applyBCs() { + if constexpr (boundary_conditions == periodic) { A_n.getFieldBC().apply(A_n); A_nm1.getFieldBC().apply(A_nm1); A_np1.getFieldBC().apply(A_np1); - } - else{ + } else { Vector true_nr = nr_m; true_nr += (A_n.getNghost() * 2); second_order_mur_boundary_conditions bcs{}; bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); } } - public: - void step(){ - const auto& ldom = layout_mp->getLocalNDIndex(); - const int nghost = A_n.getNghost(); - const auto aview = A_n .getView(); - const auto anp1view = A_np1.getView(); - const auto anm1view = A_nm1.getView(); + + public: + void step() { + const auto& ldom = layout_mp->getLocalNDIndex(); + const int nghost = A_n.getNghost(); + const auto aview = A_n.getView(); + const auto anp1view = A_np1.getView(); + const auto anm1view = A_nm1.getView(); const auto source_view = Maxwell::source_mp->getView(); - const scalar a1 = scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2])); - const scalar a2 = sq(dt / hr_m[0]); - const scalar a4 = sq(dt / hr_m[1]); - const scalar a6 = sq(dt / hr_m[2]); - const scalar a8 = sq(dt); + const scalar a1 = + scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2])); + const scalar a2 = sq(dt / hr_m[0]); + const scalar a4 = sq(dt / hr_m[1]); + const scalar a6 = sq(dt / hr_m[2]); + const scalar a8 = sq(dt); Vector true_nr = nr_m; true_nr += (nghost * 2); - constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0; + constexpr uint32_t one_if_absorbing_otherwise_0 = + boundary_conditions == absorbing ? 1 : 0; Kokkos::parallel_for( - "Four potential update", ippl::getRangePolicy(aview, nghost), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { - // global indices - const uint32_t ig = i + ldom.first()[0]; - const uint32_t jg = j + ldom.first()[1]; - const uint32_t kg = k + ldom.first()[2]; - uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) - + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); - - if(val == 0){ - FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k) - + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k)) - + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k)) - + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1)) - + a8 * (-source_view(i, j, k)); - anp1view(i, j, k) = interior; - } - else{ - //std::cout << i << ", " << j << ", " << k << "\n"; - } - }); + "Four potential update", ippl::getRangePolicy(aview, nghost), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + // global indices + const uint32_t ig = i + ldom.first()[0]; + const uint32_t jg = j + ldom.first()[1]; + const uint32_t kg = k + ldom.first()[2]; + uint32_t val = + uint32_t(ig == one_if_absorbing_otherwise_0) + + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) + + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); + + if (val == 0) { + FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k) + + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k)) + + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k)) + + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1)) + + a8 * (-source_view(i, j, k)); + anp1view(i, j, k) = interior; + } else { + // std::cout << i << ", " << j << ", " << k << "\n"; + } + }); Kokkos::fence(); applyBCs(); A_np1.fillHalo(); } - void evaluate_EB(){ + void evaluate_EB() { ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; - const scalar idt = scalar(1.0) / dt; - auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView(); + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), + A_nm1 = this->A_nm1.getView(); auto source = Maxwell::source_mp->getView(); auto Eview = Maxwell::En_mp->getView(); auto Bview = Maxwell::Bn_mp->getView(); - Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; - ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; - ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; - ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; - - ippl::Vector grad_phi{ - dAdx[0], dAdy[0], dAdz[0] - }; - ippl::Vector curlA{ - dAdy[3] - dAdz[2], - dAdz[1] - dAdx[3], - dAdx[2] - dAdy[1], - }; - Eview(i,j,k) = -dAdt - grad_phi; - Bview(i,j,k) = curlA; - }); + Kokkos::parallel_for( + this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) { + ippl::Vector dAdt = + (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = + (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = + (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = + (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{dAdx[0], dAdy[0], dAdz[0]}; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + Eview(i, j, k) = -dAdt - grad_phi; + Bview(i, j, k) = curlA; + }); Kokkos::fence(); } - bool periodic_bc; - + typename FourField::Mesh_t* mesh_mp; FieldLayout* layout_mp; NDIndex domain_m; Vector_t hr_m; - - + Vector nr_m; - + void initialize() { - // get layout and mesh + // get layout and mesh layout_mp = &(this->source_mp->getLayout()); mesh_mp = &(this->source_mp->get_mesh()); - + // get mesh spacing, domain, and mesh size - hr_m = mesh_mp->getMeshSpacing(); - dt = hr_m[0] / 2; - for (unsigned int i = 0; i < Dim; ++i){ + hr_m = mesh_mp->getMeshSpacing(); + dt = hr_m[0] / 2; + for (unsigned int i = 0; i < Dim; ++i) { dt = std::min(dt, hr_m[i] / 2); } domain_m = layout_mp->getDomain(); @@ -227,20 +198,20 @@ namespace ippl { }; /** * @brief Nonstandard Finite-Difference Time-Domain - * - * @tparam EMField - * @tparam FourField + * + * @tparam EMField + * @tparam FourField */ template class NonStandardFDTDSolver : Maxwell { - public: + public: constexpr static unsigned Dim = EMField::dim; - using scalar = typename EMField::value_type::value_type; - using Vector_t = Vector; - using FourVector_t = typename FourField::value_type; + using scalar = typename EMField::value_type::value_type; + using Vector_t = Vector; + using FourVector_t = typename FourField::value_type; NonStandardFDTDSolver(FourField& source, EMField& E, EMField& B) { auto hx = source.get_mesh().getMeshSpacing(); - if((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1){ + if ((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1) { std::cerr << "Dispersion-free CFL condition not satisfiable\n"; std::abort(); } @@ -248,7 +219,7 @@ namespace ippl { Maxwell::setEMFields(E, B); initialize(); } - virtual void solve()override{ + virtual void solve() override { step(); timeShift(); evaluate_EB(); @@ -268,116 +239,135 @@ namespace ippl { (void)x; }; bcsetter(std::make_index_sequence{}); - A_n .setFieldBC(vector_bcs); + A_n.setFieldBC(vector_bcs); A_np1.setFieldBC(vector_bcs); A_nm1.setFieldBC(vector_bcs); } - - private: - void timeShift(){ - - //Look into this, maybe cyclic swap is better + private: + void timeShift() { + // Look into this, maybe cyclic swap is better Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); } - void applyBCs(){ - if constexpr(boundary_conditions == periodic){ + void applyBCs() { + if constexpr (boundary_conditions == periodic) { A_n.getFieldBC().apply(A_n); A_nm1.getFieldBC().apply(A_nm1); A_np1.getFieldBC().apply(A_np1); - } - else{ + } else { Vector true_nr = nr_m; true_nr += (A_n.getNghost() * 2); second_order_mur_boundary_conditions bcs{}; bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); } } - - public: - template - struct nondispersive{ + + public: + template + struct nondispersive { scalar a1; scalar a2; scalar a4; scalar a6; scalar a8; }; - void step(){ - const auto& ldom = layout_mp->getLocalNDIndex(); - const int nghost = A_n.getNghost(); - const auto aview = A_n .getView(); - const auto anp1view = A_np1.getView(); - const auto anm1view = A_nm1.getView(); + void step() { + const auto& ldom = layout_mp->getLocalNDIndex(); + const int nghost = A_n.getNghost(); + const auto aview = A_n.getView(); + const auto anp1view = A_np1.getView(); + const auto anm1view = A_nm1.getView(); const auto source_view = Maxwell::source_mp->getView(); const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1]))); nondispersive ndisp{ - .a1 = 2 * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2*calA) * sq(dt / hr_m[1]) - sq(dt / hr_m[2])), + .a1 = 2 + * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2 * calA) * sq(dt / hr_m[1]) + - sq(dt / hr_m[2])), .a2 = sq(dt / hr_m[0]), .a4 = sq(dt / hr_m[1]), .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), - .a8 = sq(dt) - }; + .a8 = sq(dt)}; Vector true_nr = nr_m; true_nr += (nghost * 2); - constexpr uint32_t one_if_absorbing_otherwise_0 = boundary_conditions == absorbing ? 1 : 0; - Kokkos::parallel_for(ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - uint32_t ig = i + ldom.first()[0]; - uint32_t jg = j + ldom.first()[1]; - uint32_t kg = k + ldom.first()[2]; - uint32_t val = uint32_t(ig == one_if_absorbing_otherwise_0) + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) - + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); - - if(!val){ - anp1view(i, j, k) = -anm1view(i,j,k) - + ndisp.a1 * aview(i,j,k) - + ndisp.a2 * (calA * aview(i + 1, j, k - 1) + (1 - 2 * calA) * aview(i + 1, j, k) + calA * aview(i + 1, j, k + 1)) - + ndisp.a2 * (calA * aview(i - 1, j, k - 1) + (1 - 2 * calA) * aview(i - 1, j, k) + calA * aview(i - 1, j, k + 1)) - + ndisp.a4 * (calA * aview(i, j + 1, k - 1) + (1 - 2 * calA) * aview(i, j + 1, k) + calA * aview(i, j + 1, k + 1)) - + ndisp.a4 * (calA * aview(i, j - 1, k - 1) + (1 - 2 * calA) * aview(i, j - 1, k) + calA * aview(i, j - 1, k + 1)) - + ndisp.a6 * aview(i, j, k + 1) + ndisp.a6 * aview(i, j, k - 1) + ndisp.a8 * source_view(i, j, k); - } - }); + constexpr uint32_t one_if_absorbing_otherwise_0 = + boundary_conditions == absorbing ? 1 : 0; + Kokkos::parallel_for( + ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) { + uint32_t ig = i + ldom.first()[0]; + uint32_t jg = j + ldom.first()[1]; + uint32_t kg = k + ldom.first()[2]; + uint32_t val = + uint32_t(ig == one_if_absorbing_otherwise_0) + + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) + + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); + + if (!val) { + anp1view(i, j, k) = -anm1view(i, j, k) + ndisp.a1 * aview(i, j, k) + + ndisp.a2 + * (calA * aview(i + 1, j, k - 1) + + (1 - 2 * calA) * aview(i + 1, j, k) + + calA * aview(i + 1, j, k + 1)) + + ndisp.a2 + * (calA * aview(i - 1, j, k - 1) + + (1 - 2 * calA) * aview(i - 1, j, k) + + calA * aview(i - 1, j, k + 1)) + + ndisp.a4 + * (calA * aview(i, j + 1, k - 1) + + (1 - 2 * calA) * aview(i, j + 1, k) + + calA * aview(i, j + 1, k + 1)) + + ndisp.a4 + * (calA * aview(i, j - 1, k - 1) + + (1 - 2 * calA) * aview(i, j - 1, k) + + calA * aview(i, j - 1, k + 1)) + + ndisp.a6 * aview(i, j, k + 1) + + ndisp.a6 * aview(i, j, k - 1) + + ndisp.a8 * source_view(i, j, k); + } + }); Kokkos::fence(); A_np1.fillHalo(); applyBCs(); } - void evaluate_EB(){ - *(Maxwell::En_mp) = typename EMField::value_type(0); - *(Maxwell::Bn_mp) = typename EMField::value_type(0); + void evaluate_EB() { + *(Maxwell::En_mp) = typename EMField::value_type(0); + *(Maxwell::Bn_mp) = typename EMField::value_type(0); ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; - const scalar idt = scalar(1.0) / dt; - auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), A_nm1 = this->A_nm1.getView(); + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), + A_nm1 = this->A_nm1.getView(); auto source = Maxwell::source_mp->getView(); auto Eview = Maxwell::En_mp->getView(); auto Bview = Maxwell::Bn_mp->getView(); - Kokkos::parallel_for(this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ - ippl::Vector dAdt = (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; - ippl::Vector dAdx = (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; - ippl::Vector dAdy = (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; - ippl::Vector dAdz = (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; - - ippl::Vector grad_phi{ - dAdx[0], dAdy[0], dAdz[0] - }; - ippl::Vector curlA{ - dAdy[3] - dAdz[2], - dAdz[1] - dAdx[3], - dAdx[2] - dAdy[1], - }; - Eview(i,j,k) = -dAdt - grad_phi; - Bview(i,j,k) = curlA; - }); + Kokkos::parallel_for( + this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) { + ippl::Vector dAdt = + (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = + (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = + (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = + (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{dAdx[0], dAdy[0], dAdz[0]}; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + Eview(i, j, k) = -dAdt - grad_phi; + Bview(i, j, k) = curlA; + }); Kokkos::fence(); } - - - typename FourField::Mesh_t* mesh_mp; FieldLayout* layout_mp; NDIndex domain_m; @@ -385,15 +375,15 @@ namespace ippl { scalar dt; Vector nr_m; bool periodic_bc; - + void initialize() { - // get layout and mesh + // get layout and mesh layout_mp = &(this->source_mp->getLayout()); mesh_mp = &(this->source_mp->get_mesh()); - + // get mesh spacing, domain, and mesh size hr_m = mesh_mp->getMeshSpacing(); - dt = hr_m[2]; + dt = hr_m[2]; domain_m = layout_mp->getDomain(); for (unsigned int i = 0; i < Dim; ++i) nr_m[i] = domain_m[i].length(); @@ -408,22 +398,23 @@ namespace ippl { A_np1 = 0.0; }; }; - + template - struct Bunch : public ippl::ParticleBase { + struct Bunch : public ippl::ParticleBase { using scalar = _scalar; // Constructor for the Bunch class, taking a PLayout reference Bunch(PLayout& playout) : ippl::ParticleBase(playout) { // Add attributes to the particle bunch - this->addAttribute(Q); // Charge attribute - this->addAttribute(mass); // Mass attribute - this->addAttribute(gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) - this->addAttribute(R_np1); // Position attribute for the next time step - this->addAttribute(R_nm1); // Position attribute for the next time step - this->addAttribute(E_gather); // Electric field attribute for particle gathering - this->addAttribute(B_gather); // Magnedit field attribute for particle gathering + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute( + gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(R_np1); // Position attribute for the next time step + this->addAttribute(R_nm1); // Position attribute for the next time step + this->addAttribute(E_gather); // Electric field attribute for particle gathering + this->addAttribute(B_gather); // Magnedit field attribute for particle gathering } // Destructor for the Bunch class @@ -433,54 +424,53 @@ namespace ippl { using charge_container_type = ippl::ParticleAttrib; using velocity_container_type = ippl::ParticleAttrib>; using vector_container_type = ippl::ParticleAttrib>; - using vector4_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; // Declare instances of the attribute containers - charge_container_type Q; // Charge container - charge_container_type mass; // Mass container - velocity_container_type gamma_beta; // Gamma-beta container - typename ippl::ParticleBase::particle_position_type R_np1; // Position container for the next time step - typename ippl::ParticleBase::particle_position_type R_nm1; // Position container for the previous time step - ippl::ParticleAttrib> E_gather; // Electric field container for particle gathering - ippl::ParticleAttrib> B_gather; // Magnetio field container for particle gathering - + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type + R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type + R_nm1; // Position container for the previous time step + ippl::ParticleAttrib> + E_gather; // Electric field container for particle gathering + ippl::ParticleAttrib> + B_gather; // Magnetio field container for particle gathering }; - template - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf(const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos){ - //return pear, ippl::Vector>{ippl::Vector{5,5,5}, ippl::Vector{0,0,0}}; - //printf("%.10e, %.10e, %.10e\n", (inverse_spacing * spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); + template + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf( + const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos) { + // return pear, ippl::Vector>{ippl::Vector{5,5,5}, + // ippl::Vector{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing * + // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); Kokkos::pair, ippl::Vector> ret; - //pos -= spacing * T(0.5); - ippl::Vector relpos = pos - origin; + // pos -= spacing * T(0.5); + ippl::Vector relpos = pos - origin; ippl::Vector gridpos = relpos / hr; ippl::Vector ipos; ipos = gridpos.template cast(); ippl::Vector fracpos; - for(unsigned k = 0;k < 3;k++){ + for (unsigned k = 0; k < 3; k++) { fracpos[k] = gridpos[k] - (int)ipos[k]; } - //TODO: NGHOST!!!!!!! + // TODO: NGHOST!!!!!!! ipos += ippl::Vector(1); - ret.first = ipos; + ret.first = ipos; ret.second = fracpos; return ret; } - template - KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value){ + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, const scalar value) { auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); ipos -= ldom.first(); - //std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; - if( - ipos[0] < 0 - ||ipos[1] < 0 - ||ipos[2] < 0 - ||size_t(ipos[0]) >= view.extent(0) - 1 - ||size_t(ipos[1]) >= view.extent(1) - 1 - ||size_t(ipos[2]) >= view.extent(2) - 1 - ||fracpos[0] < 0 - ||fracpos[1] < 0 - ||fracpos[2] < 0 - ){ + // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { return; } assert(fracpos[0] >= 0.0f); @@ -498,10 +488,10 @@ namespace ippl { assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - for(unsigned i = 0;i < 8;i++){ - scalar weight = 1; + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; ippl::Vector ipos_l = ipos; - for(unsigned d = 0;d < 3;d++){ + for (unsigned d = 0; d < 3; d++) { weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); ipos_l[d] += !!(i & (1 << d)); } @@ -512,23 +502,18 @@ namespace ippl { } assert(abs(accum - 1.0f) < 1e-6f); } - template - KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value){ - //std::cout << "Value: " << value << "\n"; + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, + const ippl::Vector& value) { + // std::cout << "Value: " << value << "\n"; auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); ipos -= ldom.first(); - if( - ipos[0] < 0 - ||ipos[1] < 0 - ||ipos[2] < 0 - ||size_t(ipos[0]) >= view.extent(0) - 1 - ||size_t(ipos[1]) >= view.extent(1) - 1 - ||size_t(ipos[2]) >= view.extent(2) - 1 - ||fracpos[0] < 0 - ||fracpos[1] < 0 - ||fracpos[2] < 0 - ){ - //std::cout << "out of bounds\n"; + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + // std::cout << "out of bounds\n"; return; } assert(fracpos[0] >= 0.0f); @@ -546,10 +531,10 @@ namespace ippl { assert(one_minus_fracpos[2] <= 1.0f); scalar accum = 0; - for(unsigned i = 0;i < 8;i++){ - scalar weight = 1; + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; ippl::Vector ipos_l = ipos; - for(unsigned d = 0;d < 3;d++){ + for (unsigned d = 0; d < 3; d++) { weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); ipos_l[d] += !!(i & (1 << d)); } @@ -561,52 +546,71 @@ namespace ippl { } assert(abs(accum - 1.0f) < 1e-6f); } - template - KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, ippl::Vector hr, ippl::Vector origin, const ippl::Vector& from, const ippl::Vector& to, const scalar factor){ - - - Kokkos::pair, ippl::Vector> from_grid = gridCoordinatesOf(hr, origin, from); - Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to ); - //printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], from_grid.second[2]); - for(int d = 0;d < 3;d++){ - //if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ - // std::cout < + KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, + ippl::Vector hr, + ippl::Vector origin, + const ippl::Vector& from, + const ippl::Vector& to, + const scalar factor) { + Kokkos::pair, ippl::Vector> from_grid = + gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = + gridCoordinatesOf(hr, origin, to); + // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], + // from_grid.second[2]); + for (int d = 0; d < 3; d++) { + // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ + // std::cout <(nghost); - //to_ipos += ippl::Vector(nghost); + // const uint32_t nghost = g.nghost(); + // from_ipos += ippl::Vector(nghost); + // to_ipos += ippl::Vector(nghost); - if(from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]){ - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), ippl::Vector((to - from) * factor)); + if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] + && from_grid.first[2] == to_grid.first[2]) { + scatterToGrid(ldom, Jview, hr, origin, + ippl::Vector((from + to) * scalar(0.5)), + ippl::Vector((to - from) * factor)); return; } ippl::Vector relay; - const int nghost = 1; + const int nghost = 1; const ippl::Vector orig = origin; using Kokkos::max; using Kokkos::min; for (unsigned int i = 0; i < 3; i++) { - relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(1.0) * hr[i] + orig[i], - max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + scalar(0.0) * hr[i] + orig[i], + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(0.0) * hr[i] + orig[i], scalar(0.5) * (to[i] + from[i]))); } - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), ippl::Vector((relay - from) * factor)); - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)) , ippl::Vector((to - relay) * factor)); + scatterToGrid(ldom, Jview, hr, origin, + ippl::Vector((from + relay) * scalar(0.5)), + ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)), + ippl::Vector((to - relay) * factor)); } - template - class NSFDSolverWithParticles{ - public: + template + class NSFDSolverWithParticles { + public: constexpr static unsigned dim = 3; - using vector_type = ippl::Vector; - using vector4_type = ippl::Vector; - using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; - using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using vector_type = ippl::Vector; + using vector4_type = ippl::Vector; + using FourField = + ippl::Field, + typename ippl::UniformCartesian::DefaultCentering>; + using ThreeField = + ippl::Field, + typename ippl::UniformCartesian::DefaultCentering>; using playout_type = ParticleSpatialLayout; - using bunch_type = Bunch>; - using Mesh_t = ippl::UniformCartesian; + using bunch_type = Bunch>; + using Mesh_t = ippl::UniformCartesian; FieldLayout* layout_mp; Mesh_t* mesh_mp; playout_type playout; @@ -619,138 +623,158 @@ namespace ippl { ippl::Vector nr_global; ippl::Vector hr_m; size_t steps_taken; - NSFDSolverWithParticles(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) : layout_mp(&layout), mesh_mp(&mesch), playout(layout, mesch), particles(playout), J(mesch, layout), E(mesch, layout), B(mesch, layout), field_solver(J, E, B){ + NSFDSolverWithParticles(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) + : layout_mp(&layout) + , mesh_mp(&mesch) + , playout(layout, mesch) + , particles(playout) + , J(mesch, layout) + , E(mesch, layout) + , B(mesch, layout) + , field_solver(J, E, B) { particles.create(nparticles); nr_global = ippl::Vector{ uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), - uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1) - }; - hr_m = mesh_mp->getMeshSpacing(); + uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)}; + hr_m = mesh_mp->getMeshSpacing(); steps_taken = 0; - //field_solver.setEMFields(E, B); + // field_solver.setEMFields(E, B); } - template - void scatterBunch(){ - //ippl::Vector* gammaBeta = this->gammaBeta; - auto hr_m = mesh_mp->getMeshSpacing(); + template + void scatterBunch() { + // ippl::Vector* gammaBeta = this->gammaBeta; + auto hr_m = mesh_mp->getMeshSpacing(); const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; assert_isreal(volume); assert_isreal((scalar(1) / volume)); - J = typename decltype(J)::value_type(0); - auto Jview = J.getView(); - auto qview = particles.Q.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto orig = mesh_mp->getOrigin(); - auto hr = mesh_mp->getMeshSpacing(); - auto dt = field_solver.dt; - bool sc = space_charge; + J = typename decltype(J)::value_type(0); + auto Jview = J.getView(); + auto qview = particles.Q.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto orig = mesh_mp->getOrigin(); + auto hr = mesh_mp->getMeshSpacing(); + auto dt = field_solver.dt; + bool sc = space_charge; ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); - Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - vector_type pos = rview(i); - vector_type to = rview(i); - vector_type from = rm1view(i); - if(sc){ - scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); - } - scatterLineToGrid(lDom, Jview, hr, orig, from, to , scalar(qview(i)) / (volume * dt)); - }); + Kokkos::parallel_for( + particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + vector_type pos = rview(i); + vector_type to = rview(i); + vector_type from = rm1view(i); + if (sc) { + scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); + } + scatterLineToGrid(lDom, Jview, hr, orig, from, to, + scalar(qview(i)) / (volume * dt)); + }); Kokkos::fence(); J.accumulateHalo(); } - template - void updateBunch(scalar time, callable external_field){ - + template + void updateBunch(scalar time, callable external_field) { Kokkos::fence(); - auto gbview = particles.gamma_beta.getView(); - auto eview = particles.E_gather.getView(); - auto bview = particles.B_gather.getView(); - auto qview = particles.Q.getView(); - auto mview = particles.mass.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto rp1view = particles.R_np1.getView(); + auto gbview = particles.gamma_beta.getView(); + auto eview = particles.E_gather.getView(); + auto bview = particles.B_gather.getView(); + auto qview = particles.Q.getView(); + auto mview = particles.mass.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto rp1view = particles.R_np1.getView(); scalar bunch_dt = field_solver.dt / 3; Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); E.fillHalo(); B.fillHalo(); Kokkos::fence(); - for(int bts = 0;bts < 3;bts++){ - - particles.E_gather.gather(E, particles.R, /*offset = */0.0); - particles.B_gather.gather(B, particles.R, /*offset = */0.0); + for (int bts = 0; bts < 3; bts++) { + particles.E_gather.gather(E, particles.R, /*offset = */ 0.0); + particles.B_gather.gather(B, particles.R, /*offset = */ 0.0); Kokkos::fence(); - Kokkos::parallel_for(particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ - const ippl::Vector pgammabeta = gbview(i); - ippl::Vector E_grid = eview(i); - ippl::Vector B_grid = bview(i); - //std::cout << "E_grid: " << E_grid << "\n"; - //std::cout << "B_grid: " << B_grid << "\n"; - ippl::Vector bunchpos = rview(i); - Kokkos::pair, ippl::Vector> external_eb = external_field(bunchpos, time + bunch_dt * bts); - - ippl::Vector, 2> EB{ - ippl::Vector(E_grid + external_eb.first), - ippl::Vector(B_grid + external_eb.second) - }; - - const scalar charge = qview(i); - const scalar mass = mview(i); - const ippl::Vector t1 = pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); - const scalar alpha = charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); - const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); - const ippl::Vector t3 = t1 + t2.cross(scalar(2) * alpha * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); - const ippl::Vector ngammabeta = t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); - - rview(i) = rview(i) + bunch_dt * ngammabeta / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); - gbview(i) = ngammabeta; - }); + Kokkos::parallel_for( + particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + const ippl::Vector pgammabeta = gbview(i); + ippl::Vector E_grid = eview(i); + ippl::Vector B_grid = bview(i); + // std::cout << "E_grid: " << E_grid << "\n"; + // std::cout << "B_grid: " << B_grid << "\n"; + ippl::Vector bunchpos = rview(i); + Kokkos::pair, ippl::Vector> external_eb = + external_field(bunchpos, time + bunch_dt * bts); + + ippl::Vector, 2> EB{ + ippl::Vector(E_grid + external_eb.first), + ippl::Vector(B_grid + external_eb.second)}; + + const scalar charge = qview(i); + const scalar mass = mview(i); + const ippl::Vector t1 = + pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); + const scalar alpha = + charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); + const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); + const ippl::Vector t3 = + t1 + + t2.cross(scalar(2) * alpha + * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); + const ippl::Vector ngammabeta = + t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); + + rview(i) = + rview(i) + + bunch_dt * ngammabeta + / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); + gbview(i) = ngammabeta; + }); Kokkos::fence(); } Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); size_t invalid_count = 0; - auto origo = mesh_mp->getOrigin(); - ippl::Vector extenz;// + auto origo = mesh_mp->getOrigin(); + ippl::Vector extenz; // extenz[0] = nr_global[0] * hr_m[0]; extenz[1] = nr_global[1] * hr_m[1]; extenz[2] = nr_global[2] * hr_m[2]; Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, particles.getLocalNum()), - KOKKOS_LAMBDA(size_t i, size_t& ref){ - bool out_of_bounds = false; + Kokkos::RangePolicy< + typename playout_type::RegionLayout_t::view_type::execution_space>( + 0, particles.getLocalNum()), + KOKKOS_LAMBDA(size_t i, size_t & ref) { + bool out_of_bounds = false; ippl::Vector ppos = rview(i); - for(size_t d = 0;d < dim;d++){ + for (size_t d = 0; d < dim; d++) { out_of_bounds |= (ppos[d] <= origo[d]); - out_of_bounds |= (ppos[d] >= origo[d] + extenz[d]); //Check against simulation domain + out_of_bounds |= + (ppos[d] >= origo[d] + extenz[d]); // Check against simulation domain } invalid(i) = out_of_bounds; ref += out_of_bounds; - }, + }, invalid_count); particles.destroy(invalid, invalid_count); Kokkos::fence(); playout.update(particles); - } - void solve(){ + void solve() { scatterBunch(); field_solver.solve(); - updateBunch(field_solver.dt * steps_taken, /*no external field*/[]KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/){return Kokkos::pair{ - vector_type(0), - vector_type(0) - };}); + updateBunch( + field_solver.dt * steps_taken, + /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) { + return Kokkos::pair{vector_type(0), vector_type(0)}; + }); ++steps_taken; } - template - void solve(callable external_field){ + template + void solve(callable external_field) { scatterBunch(); field_solver.solve(); - //std::cout << field_solver.dt * steps_taken << "\n"; + // std::cout << field_solver.dt * steps_taken << "\n"; updateBunch(field_solver.dt * steps_taken, external_field); ++steps_taken; } }; -} +} // namespace ippl #endif \ No newline at end of file diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/test/solver/TestFDTDSolverWithParticles.cpp index a48a829e1..d5d44a174 100644 --- a/test/solver/TestFDTDSolverWithParticles.cpp +++ b/test/solver/TestFDTDSolverWithParticles.cpp @@ -7,7 +7,7 @@ using std::size_t; #include "MaxwellSolvers/FDTD.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include - +#include "../alpine/units.h" #include template requires((std::is_floating_point_v)) From 7a803acfaec8ecb57a2afa065079dcad25b58a7a Mon Sep 17 00:00:00 2001 From: Manuel Date: Fri, 24 May 2024 17:49:18 +0200 Subject: [PATCH 30/32] Move FEL stuff to separate directory, add matrix type and lorentz transforms --- CMakeLists.txt | 1 + alpine/CMakeLists.txt | 3 - alpine/Undulator.h | 238 ------------------- fel/CMakeLists.txt | 13 ++ {alpine => fel}/FreeElectronLaser.cpp | 0 fel/LorentzTransform.h | 318 ++++++++++++++++++++++++++ fel/Undulator.h | 97 ++++++++ {alpine => fel}/units.h | 0 src/Types/Matrix.h | 186 +++++++++++++++ 9 files changed, 615 insertions(+), 241 deletions(-) delete mode 100644 alpine/Undulator.h create mode 100644 fel/CMakeLists.txt rename {alpine => fel}/FreeElectronLaser.cpp (100%) create mode 100644 fel/LorentzTransform.h create mode 100644 fel/Undulator.h rename {alpine => fel}/units.h (100%) create mode 100644 src/Types/Matrix.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ee3cc6c4..63f4f5060 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,6 +234,7 @@ if (ENABLE_ALPINE) endif () message (STATUS "Enable Alpine") add_subdirectory (alpine) + add_subdirectory(fel) endif () option (ENABLE_UNIT_TESTS "Enable unit tests" OFF) diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt index e2711a472..c045305a8 100644 --- a/alpine/CMakeLists.txt +++ b/alpine/CMakeLists.txt @@ -21,9 +21,6 @@ target_link_libraries (PenningTrap ${IPPL_LIBS}) add_executable (LandauDamping LandauDamping.cpp) target_link_libraries (LandauDamping ${IPPL_LIBS}) -add_executable (FreeElectronLaser FreeElectronLaser.cpp) -target_link_libraries (FreeElectronLaser ${IPPL_LIBS}) - add_executable (BumponTailInstability BumponTailInstability.cpp) target_link_libraries (BumponTailInstability ${IPPL_LIBS}) diff --git a/alpine/Undulator.h b/alpine/Undulator.h deleted file mode 100644 index 981786b8e..000000000 --- a/alpine/Undulator.h +++ /dev/null @@ -1,238 +0,0 @@ -#ifndef UNDULATOR_H -#define UNDULATOR_H -#include - -#include - -#include "Types/Vector.h" - -#include "units.h" -namespace ippl { - /** - * @brief A template struct representing a uniaxial Lorentz frame. - * - * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis - * (default is the z-axis, axis 2). It includes methods for transforming vectors and - * electromagnetic fields between frames, as well as for computing gamma factors and beta - * velocities. The struct uses Kokkos for portability and performance. - * - * @tparam T The scalar type used for computations (e.g., float, double). - * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the - * z-axis). - */ - template - struct UniaxialLorentzframe { - /// Speed of light, set to 1 for natural units. - constexpr static T c = 1.0; - /// Alias for the scalar type used in the struct. - using scalar = T; - /// Alias for a 3-dimensional vector of the scalar type. - using Vector3 = ippl::Vector; - - /// Beta velocity component along the specified axis. - scalar beta_m; - /// Product of gamma factor and beta velocity. - scalar gammaBeta_m; - /// Gamma factor for the Lorentz transformation. - scalar gamma_m; - - /** - * @brief Create a UniaxialLorentzframe from a given gamma factor. - * - * This static member function constructs a UniaxialLorentzframe object using a given - * gamma factor. It computes the corresponding beta velocity and gamma*beta product. - * - * @param gamma The gamma factor for the Lorentz transformation. - * @return A UniaxialLorentzframe object with computed beta and gamma*beta. - */ - KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) { - UniaxialLorentzframe ret; - ret.gamma_m = gamma; - scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma)); - scalar gammabeta = gamma * beta; - ret.beta_m = beta; - ret.gammaBeta_m = gammabeta; - return ret; - } - - /** - * @brief Get a UniaxialLorentzframe with negative beta velocity. - * - * This member function returns a new UniaxialLorentzframe object with the same gamma - * factor but with a negative beta velocity, effectively representing the inverse - * Lorentz transformation. - * - * @return A UniaxialLorentzframe object with negative beta velocity. - */ - KOKKOS_INLINE_FUNCTION UniaxialLorentzframe negative() const noexcept { - UniaxialLorentzframe ret; - ret.beta_m = -beta_m; - ret.gammaBeta_m = -gammaBeta_m; - ret.gamma_m = gamma_m; - return ret; - } - - /// Default constructor. - KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default; - - /** - * @brief Construct a UniaxialLorentzframe from a gamma*beta value. - * - * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta - * value. It computes the corresponding beta velocity and gamma factor. - * - * @param gammaBeta The product of the gamma factor and beta velocity. - */ - KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) { - using Kokkos::sqrt; - gammaBeta_m = gammaBeta; - beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta); - gamma_m = sqrt(1 + gammaBeta * gammaBeta); - } - - /** - * @brief Transform a spatial vector from the primed frame to the unprimed frame. - * - * This member function transforms a given spatial vector from the primed frame to the - * unprimed frame using the Lorentz transformation along the specified axis and the given - * time. - * - * @param arg The spatial vector to be transformed. - * @param time The time component for the transformation. - */ - KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept { - arg[axis] = gamma_m * (arg[axis] + beta_m * time); - } - - /** - * @brief Transform electric and magnetic fields from the unprimed to the primed frame. - * - * This member function transforms a pair of electric and magnetic field vectors from the - * unprimed frame to the primed frame using the Lorentz transformation along the specified - * axis. - * - * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the - * unprimed frame. - * @return A pair of vectors representing the electric and magnetic fields in the primed - * frame. - */ - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB( - const Kokkos::pair, ippl::Vector>& unprimedEB) const noexcept { - Kokkos::pair, ippl::Vector> ret; - ippl::Vector betavec{0, 0, beta_m}; - ippl::Vector vnorm{axis == 0, axis == 1, axis == 2}; - ret.first = - ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m - - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); - ret.second = - ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m - - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); - ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; - ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; - return ret; - } - - /** - * @brief Transform electric and magnetic fields from the primed to the unprimed frame. - * - * This member function transforms a pair of electric and magnetic field vectors from the - * primed frame to the unprimed frame using the inverse Lorentz transformation along the - * specified axis. - * - * @param primedEB A pair of vectors representing the electric and magnetic fields in the - * primed frame. - * @return A pair of vectors representing the electric and magnetic fields in the unprimed - * frame. - */ - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> - inverse_transform_EB( - const Kokkos::pair, ippl::Vector>& primedEB) const noexcept { - return negative().transform_EB(primedEB); - } - }; - template - struct undulator_parameters { - scalar lambda_u; // MITHRA: lambda_u - scalar K; // Undulator parameter - scalar length; - scalar B_magnitude; - undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) - : lambda_u(lambda_u) - , K(K_undulator_parameter) - , length(_length) { - B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) - / (electron_charge_in_unit_charges * lambda_u); - } - }; - /** - * @brief Struct representing an undulator. - * - * @tparam scalar Type of the scalar values (e.g., float, double). - */ - template - struct Undulator { - undulator_parameters uparams; ///< Parameters of the undulator. - scalar distance_to_entry; ///< Distance to the entry of the undulator. - scalar k_u; ///< Wavenumber of the undulator. - - /** - * @brief Constructor to initialize undulator parameters and calculate k_u. - * - * @param p Parameters of the undulator. - * @param dte Distance to the entry of the undulator. - */ - KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) - : uparams(p) - , distance_to_entry(dte) - , k_u(2 * M_PI / p.lambda_u) {} - - /** - * @brief Overloaded operator() to compute magnetic field components. - * - * @param position_in_lab_frame Position vector in the lab frame. - * @return Kokkos::pair, ippl::Vector> - * Pair containing magnetic field and its derivative. - */ - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> - operator()(const ippl::Vector& position_in_lab_frame) const noexcept { - using Kokkos::cos; - using Kokkos::cosh; - using Kokkos::exp; - using Kokkos::sin; - using Kokkos::sinh; - - Kokkos::pair, ippl::Vector> - ret; // Return pair containing magnetic field and its derivative. - ret.first.fill(0); // Initialize magnetic field vector. - ret.second.fill(0); // Initialize derivative vector. - - // If the position is before the undulator entry. - if (position_in_lab_frame[2] < distance_to_entry) { - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator < 0); // Ensure we are in the correct region. - scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) - * 0.5)); // Gaussian decay factor. - - ret.second[0] = 0; // No x-component. - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) - * z_in_undulator * k_u * scal; // y-component. - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) - * scal; // z-component. - } - // If the position is within the undulator. - else if (position_in_lab_frame[2] > distance_to_entry - && position_in_lab_frame[2] < distance_to_entry + uparams.length) { - scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; - assert(z_in_undulator >= 0); // Ensure we are in the correct region. - - ret.second[0] = 0; // No x-component. - ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) - * sin(k_u * z_in_undulator); // y-component. - ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) - * cos(k_u * z_in_undulator); // z-component. - } - return ret; - } - }; -} // namespace ippl -#endif // UNDULATOR_H \ No newline at end of file diff --git a/fel/CMakeLists.txt b/fel/CMakeLists.txt new file mode 100644 index 000000000..dd8898dd7 --- /dev/null +++ b/fel/CMakeLists.txt @@ -0,0 +1,13 @@ +include_directories ( + ${CMAKE_SOURCE_DIR}/src +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${Kokkos_DIR}/.. +) +set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) +set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) + +add_executable (FreeElectronLaser FreeElectronLaser.cpp) +target_link_libraries (FreeElectronLaser ${IPPL_LIBS}) \ No newline at end of file diff --git a/alpine/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp similarity index 100% rename from alpine/FreeElectronLaser.cpp rename to fel/FreeElectronLaser.cpp diff --git a/fel/LorentzTransform.h b/fel/LorentzTransform.h new file mode 100644 index 000000000..876bb22a6 --- /dev/null +++ b/fel/LorentzTransform.h @@ -0,0 +1,318 @@ +#ifndef LORENTZ_TRANSFORM_H +#define LORENTZ_TRANSFORM_H +#include + +#include "Types/Matrix.h" +#include "Types/Vector.h" +namespace ippl { + /** + * @brief A template struct representing a uniaxial Lorentz frame. + * + * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis + * (default is the z-axis, axis 2). It includes methods for transforming vectors and + * electromagnetic fields between frames, as well as for computing gamma factors and beta + * velocities. The struct uses Kokkos for portability and performance. + * + * @tparam T The scalar type used for computations (e.g., float, double). + * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the + * z-axis). + */ + template + struct UniaxialLorentzframe { + /// Speed of light, set to 1 for natural units. + constexpr static T c = 1.0; + /// Alias for the scalar type used in the struct. + using scalar = T; + /// Alias for a 3-dimensional vector of the scalar type. + using Vector3 = ippl::Vector; + + /// Beta velocity component along the specified axis. + scalar beta_m; + /// Product of gamma factor and beta velocity. + scalar gammaBeta_m; + /// Gamma factor for the Lorentz transformation. + scalar gamma_m; + + /** + * @brief Create a UniaxialLorentzframe from a given gamma factor. + * + * This static member function constructs a UniaxialLorentzframe object using a given + * gamma factor. It computes the corresponding beta velocity and gamma*beta product. + * + * @param gamma The gamma factor for the Lorentz transformation. + * @return A UniaxialLorentzframe object with computed beta and gamma*beta. + */ + KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) { + UniaxialLorentzframe ret; + ret.gamma_m = gamma; + scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma)); + scalar gammabeta = gamma * beta; + ret.beta_m = beta; + ret.gammaBeta_m = gammabeta; + return ret; + } + + /** + * @brief Get a UniaxialLorentzframe with negative beta velocity. + * + * This member function returns a new UniaxialLorentzframe object with the same gamma + * factor but with a negative beta velocity, effectively representing the inverse + * Lorentz transformation. + * + * @return A UniaxialLorentzframe object with negative beta velocity. + */ + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe negative() const noexcept { + UniaxialLorentzframe ret; + ret.beta_m = -beta_m; + ret.gammaBeta_m = -gammaBeta_m; + ret.gamma_m = gamma_m; + return ret; + } + + /// Default constructor. + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default; + + /** + * @brief Construct a UniaxialLorentzframe from a gamma*beta value. + * + * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta + * value. It computes the corresponding beta velocity and gamma factor. + * + * @param gammaBeta The product of the gamma factor and beta velocity. + */ + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) { + using Kokkos::sqrt; + gammaBeta_m = gammaBeta; + beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta); + gamma_m = sqrt(1 + gammaBeta * gammaBeta); + } + + /** + * @brief Transform a spatial vector from the primed frame to the unprimed frame. + * + * This member function transforms a given spatial vector from the primed frame to the + * unprimed frame using the Lorentz transformation along the specified axis and the given + * time. + * + * @param arg The spatial vector to be transformed. + * @param time The time component for the transformation. + */ + KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept { + arg[axis] = gamma_m * (arg[axis] + beta_m * time); + } + + /** + * @brief Transform electric and magnetic fields from the unprimed to the primed frame. + * + * This member function transforms a pair of electric and magnetic field vectors from the + * unprimed frame to the primed frame using the Lorentz transformation along the specified + * axis. + * + * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the + * unprimed frame. + * @return A pair of vectors representing the electric and magnetic fields in the primed + * frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB( + const Kokkos::pair, ippl::Vector>& unprimedEB) const noexcept { + Kokkos::pair, ippl::Vector> ret; + ippl::Vector betavec{0, 0, beta_m}; + ippl::Vector vnorm{axis == 0, axis == 1, axis == 2}; + ret.first = + ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m + - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); + ret.second = + ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m + - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); + ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; + ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; + return ret; + } + + /** + * @brief Transform electric and magnetic fields from the primed to the unprimed frame. + * + * This member function transforms a pair of electric and magnetic field vectors from the + * primed frame to the unprimed frame using the inverse Lorentz transformation along the + * specified axis. + * + * @param primedEB A pair of vectors representing the electric and magnetic fields in the + * primed frame. + * @return A pair of vectors representing the electric and magnetic fields in the unprimed + * frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + inverse_transform_EB( + const Kokkos::pair, ippl::Vector>& primedEB) const noexcept { + return negative().transform_EB(primedEB); + } + }; + /** + * @brief Represents a Lorentz frame with associated transformations and operations. + * + * This template struct models a Lorentz frame used in special relativity to handle + * transformations between reference frames moving at a constant velocity relative to each other. + * The LorentzFrame supports various operations such as converting between frames and transforming + * vectors and electromagnetic fields. + * + * @tparam T The type for scalar values, typically a floating-point type. + */ + template + struct LorentzFrame { + /// Speed of light in natural units (c = 1) + constexpr static T c = 1.0; + + /// Alias for the scalar type. + using scalar = T; + + /// Alias for a 3D vector of type T. + using Vector3 = ippl::Vector; + + /// Velocity vector (beta) in the frame, normalized by the speed of light. + ippl::Vector beta_m; + + /// Product of gamma and beta vectors. + ippl::Vector gammaBeta_m; + + /// Lorentz factor gamma. + T gamma_m; + + /** + * @brief Constructs a LorentzFrame from a given gammaBeta vector. + * + * This constructor initializes the Lorentz frame using a vector that represents + * the product of the Lorentz factor gamma and the velocity vector beta. + * The velocity vector beta is then computed by normalizing this gammaBeta vector, + * and the Lorentz factor gamma is computed from the magnitude of gammaBeta. + * + * @param gammaBeta The gammaBeta vector. + */ + KOKKOS_INLINE_FUNCTION LorentzFrame(const ippl::Vector& gammaBeta) { + beta_m = gammaBeta / sqrt(1 + gammaBeta.dot(gammaBeta)); + gamma_m = sqrt(1 + gammaBeta.dot(gammaBeta)); + gammaBeta_m = gammaBeta; + } + /** + * @brief Constructs a uniaxial Lorentz frame given a gamma value and an axis. + * + * This static member function creates a Lorentz frame that has motion along one of the principal + * axes (x, y, or z) based on the provided gamma value. The beta vector is calculated accordingly + * and aligned with the specified axis. + * + * @tparam axis The axis along which the frame is moving ('x', 'y', or 'z'). + * @param gamma The Lorentz factor gamma, which must be greater than or equal to 1. + * @return LorentzFrame A Lorentz frame with motion along the specified axis. + */ + template + static LorentzFrame uniaxialGamma(T gamma) { + static_assert(axis == 'x' || axis == 'y' || axis == 'z', "Only xyz axis suproted"); + assert(gamma >= 1.0 && "Gamma must be >= 1"); + using Kokkos::sqrt; + + T beta = gamma == 1 ? T(0) : sqrt(gamma * gamma - 1) / gamma; + Vector3 arg{0, 0, 0}; + arg[axis - 'x'] = gamma * beta; + return LorentzFrame(arg); + } + + /** + * @brief Constructs the Lorentz transformation matrix for converting from unprimed to primed frame. + * + * This function computes the Lorentz transformation matrix that converts vectors + * from the unprimed reference frame to the primed reference frame. The transformation + * accounts for the relative velocity between the frames using the stored beta and gamma values. + * + * If the magnitude of the beta vector is very small (less than 1e-10), the function returns + * an identity matrix, indicating no significant transformation. + * + * @return matrix The transformation matrix. + */ + KOKKOS_INLINE_FUNCTION matrix unprimedToPrimed() const noexcept { + T betaMagsq = beta_m.dot(beta_m); + using Kokkos::abs; + if (abs(betaMagsq) < 1e-10) { + return matrix(T(1)); + } + ippl::Vector betaSquared = beta_m * beta_m; + + matrix ret; + + ret.data[0] = ippl::Vector{gamma_m, -gammaBeta_m[0], -gammaBeta_m[1], -gammaBeta_m[2]}; + ret.data[1] = ippl::Vector{-gammaBeta_m[0], 1 + (gamma_m - 1) * betaSquared[0] / betaMagsq, + (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq, + (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq}; + ret.data[2] = ippl::Vector{-gammaBeta_m[1], + (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq, + 1 + (gamma_m - 1) * betaSquared[1] / betaMagsq, + (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq}; + ret.data[3] = ippl::Vector{-gammaBeta_m[2], + (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq, + (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq, + 1 + (gamma_m - 1) * betaSquared[2] / betaMagsq}; + return ret; + } + /** + * @brief Constructs the Lorentz transformation matrix for converting from primed to unprimed frame. + * + * This function computes the inverse of the Lorentz transformation matrix obtained + * from unprimedToPrimed(), allowing conversion of vectors from the primed reference frame + * back to the unprimed reference frame. It utilizes the matrix inversion function to achieve this. + * + * @return matrix The inverse transformation matrix. This could also be done by taking the negative velocity + */ + KOKKOS_INLINE_FUNCTION matrix primedToUnprimed() const noexcept { + return unprimedToPrimed().inverse(); + } + + KOKKOS_INLINE_FUNCTION Vector3 transformV(const Vector3& unprimedV) const noexcept { + T factor = T(1.0) / (1.0 - unprimedV.dot(beta_m)); + Vector3 ret = unprimedV * scalar(1.0 / gamma_m); + ret -= beta_m; + ret += beta_m * (unprimedV.dot(beta_m) * (gamma_m / (gamma_m + 1))); + return ret * factor; + } + + KOKKOS_INLINE_FUNCTION Vector3 transformGammabeta(const Vector3& gammabeta) const noexcept { + using Kokkos::sqrt; + T gamma = sqrt(T(1) + gammabeta.dot(gammabeta)); + Vector3 beta = gammabeta; + beta /= gamma; + Vector3 betatrf = transformV(beta); + betatrf *= sqrt(1 - betatrf.dot(betatrf)); + return betatrf; + } + /** + * @brief Transforms electric and magnetic fields from the unprimed to the primed frame. + * + * This function applies the Lorentz transformation to a pair of 3D vectors representing + * the electric field (E) and magnetic field (B) in the unprimed reference frame, + * converting them to the corresponding fields in the primed reference frame. The transformation + * accounts for relativistic effects on the fields due to the relative motion of the frames. + * + * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the unprimed frame. + * @return Kokkos::pair, ippl::Vector> A pair of transformed vectors representing the electric and magnetic fields in the primed frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB( + const Kokkos::pair, ippl::Vector>& unprimedEB) const noexcept { + using Kokkos::sqrt; + Kokkos::pair, ippl::Vector> ret; + Vector3 vnorm = beta_m * (T(1.0) / sqrt(beta_m.dot(beta_m))); + + ret.first = + (ippl::Vector(unprimedEB.first + beta_m.cross(unprimedEB.second)) * gamma_m) + - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); + ret.second = + (ippl::Vector(unprimedEB.second - beta_m.cross(unprimedEB.first)) * gamma_m) + - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); + return ret; + } + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + inverse_transform_EB( + const Kokkos::pair, ippl::Vector>& primedEB) const noexcept { + ippl::Vector mgb(gammaBeta_m * scalar(-1.0)); + return LorentzFrame(mgb).transform_EB(primedEB); + } + }; + +} // namespace ippl +#endif diff --git a/fel/Undulator.h b/fel/Undulator.h new file mode 100644 index 000000000..57cad337c --- /dev/null +++ b/fel/Undulator.h @@ -0,0 +1,97 @@ +#ifndef UNDULATOR_H +#define UNDULATOR_H +#include + +#include + +#include "Types/Vector.h" + +#include "units.h" +#include "LorentzTransform.h" +namespace ippl { + template + struct undulator_parameters { + scalar lambda_u; // MITHRA: lambda_u + scalar K; // Undulator parameter + scalar length; + scalar B_magnitude; + undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) + : lambda_u(lambda_u) + , K(K_undulator_parameter) + , length(_length) { + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) + / (electron_charge_in_unit_charges * lambda_u); + } + }; + /** + * @brief Struct representing an undulator. + * + * @tparam scalar Type of the scalar values (e.g., float, double). + */ + template + struct Undulator { + undulator_parameters uparams; ///< Parameters of the undulator. + scalar distance_to_entry; ///< Distance to the entry of the undulator. + scalar k_u; ///< Wavenumber of the undulator. + + /** + * @brief Constructor to initialize undulator parameters and calculate k_u. + * + * @param p Parameters of the undulator. + * @param dte Distance to the entry of the undulator. + */ + KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) + : uparams(p) + , distance_to_entry(dte) + , k_u(2 * M_PI / p.lambda_u) {} + + /** + * @brief Overloaded operator() to compute magnetic field components. + * + * @param position_in_lab_frame Position vector in the lab frame. + * @return Kokkos::pair, ippl::Vector> + * Pair containing magnetic field and its derivative. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + operator()(const ippl::Vector& position_in_lab_frame) const noexcept { + using Kokkos::cos; + using Kokkos::cosh; + using Kokkos::exp; + using Kokkos::sin; + using Kokkos::sinh; + + Kokkos::pair, ippl::Vector> + ret; // Return pair containing magnetic field and its derivative. + ret.first.fill(0); // Initialize magnetic field vector. + ret.second.fill(0); // Initialize derivative vector. + + // If the position is before the undulator entry. + if (position_in_lab_frame[2] < distance_to_entry) { + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator < 0); // Ensure we are in the correct region. + scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) + * 0.5)); // Gaussian decay factor. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) + * z_in_undulator * k_u * scal; // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) + * scal; // z-component. + } + // If the position is within the undulator. + else if (position_in_lab_frame[2] > distance_to_entry + && position_in_lab_frame[2] < distance_to_entry + uparams.length) { + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator >= 0); // Ensure we are in the correct region. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) + * sin(k_u * z_in_undulator); // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) + * cos(k_u * z_in_undulator); // z-component. + } + return ret; + } + }; +} // namespace ippl +#endif // UNDULATOR_H \ No newline at end of file diff --git a/alpine/units.h b/fel/units.h similarity index 100% rename from alpine/units.h rename to fel/units.h diff --git a/src/Types/Matrix.h b/src/Types/Matrix.h new file mode 100644 index 000000000..147286f99 --- /dev/null +++ b/src/Types/Matrix.h @@ -0,0 +1,186 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include + +#include "Vector.h" +namespace ippl { + /** + * @brief Column major + * + * @tparam T Scalar type + * @tparam m Number of rows + * @tparam n Number of columns + */ + template + struct matrix { + + ippl::Vector, n> data; + constexpr static bool squareMatrix = (m == n && m > 0); + + constexpr KOKKOS_INLINE_FUNCTION matrix(T diag) + requires(m == n) + { + for (unsigned i = 0; i < n; i++) { + for (unsigned j = 0; j < n; j++) { + data[i][j] = diag * T(i == j); + } + } + } + constexpr matrix() = default; + KOKKOS_INLINE_FUNCTION constexpr static matrix zero() { + matrix ret; + for (unsigned i = 0; i < n; i++) { + for (unsigned j = 0; j < n; j++) { + ret.data[i][j] = 0; + } + } + return ret; + }; + KOKKOS_INLINE_FUNCTION void setZero() { + for (unsigned i = 0; i < n; i++) { + for (unsigned j = 0; j < n; j++) { + data[i][j] = 0; + } + } + } + + KOKKOS_INLINE_FUNCTION T operator()(int i, int j) const noexcept { return data[j][i]; } + KOKKOS_INLINE_FUNCTION T& operator()(int i, int j) noexcept { return data[j][i]; } + template + KOKKOS_INLINE_FUNCTION matrix cast() const noexcept { + matrix ret; + for (unsigned i = 0; i < n; i++) { + ret.data[i] = data[i].template cast(); + } + return ret; + } + KOKKOS_INLINE_FUNCTION matrix operator+(const matrix& other) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] + other.data[i]; + } + return result; + } + + // Implement matrix subtraction + KOKKOS_INLINE_FUNCTION matrix operator-(const matrix& other) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] - other.data[i]; + } + return result; + } + KOKKOS_INLINE_FUNCTION matrix operator*(const T& factor) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] * factor; + } + return result; + } + KOKKOS_INLINE_FUNCTION matrix operator/(const T& divisor) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] / divisor; + } + return result; + } + + // Implement matrix-vector multiplication + template + KOKKOS_INLINE_FUNCTION ippl::Vector operator*( + const ippl::Vector& vec) const { + static_assert((int)other_m == n); + ippl::Vector result(0); + for (int i = 0; i < n; ++i) { + for(int j = 0;j < m;i++){ + //This could be a vector operation, but you never know with ippl::Vector + result[j] += vec[i] * data[i][j]; + } + } + return result; + } + template + requires(n == otherm) + KOKKOS_INLINE_FUNCTION matrix operator*( + const matrix& otherMat) const noexcept { + matrix ret(0); + for (int i = 0; i < m; i++) { + for (int j = 0; j < othern; j++) { + for (int k = 0; k < n; k++) { + ret(i, j) += (*this)(i, k) * otherMat(k, j); + } + } + } + return ret; + } + KOKKOS_INLINE_FUNCTION void addCol(int i, int j, T alpha = 1.0) { + data[j] += data[i] * alpha; + } + KOKKOS_INLINE_FUNCTION matrix inverse() const noexcept + requires(squareMatrix) + { + constexpr int N = m; + + matrix ret(1.0); + matrix dis(*this); + + for (int i = 0; i < N; i++) { + for (int j = i + 1; j < N; j++) { + T alpha = -dis(i, j) / dis(i, i); + dis.addCol(i, j, alpha); + dis(i, j) = 0; + ret.addCol(i, j, alpha); + } + } + for (int i = N - 1; i >= 0; i--) { + for (int j = i - 1; j >= 0; j--) { + T alpha = -dis(i, j) / dis(i, i); + dis.addCol(i, j, alpha); + dis(i, j) = 0; + ret.addCol(i, j, alpha); + } + } + for (int i = 0; i < N; i++) { + T d = dis(i, i); + T oneod = T(1) / d; + dis.data[i] *= oneod; + ret.data[i] *= oneod; + } + + return ret; + } + + template + friend stream_t& operator<<(stream_t& str, const matrix& mat) { + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + str << mat.data[j][i] << " "; + } + str << "\n"; + } + return str; + } + }; + /** + * @brief Computes the outer product l r^T of two vectors + * + * @tparam T Scalar type + * @tparam N Output rows + * @tparam M Output cols + * @param l left operand + * @param r right operand + * @return KOKKOS_INLINE_FUNCTION the matrix l r^T + */ + template + KOKKOS_INLINE_FUNCTION matrix outer_product(const ippl::Vector& l, + const ippl::Vector& r) { + matrix ret; + for (unsigned i = 0; i < N; i++) { + for (unsigned j = 0; j < M; j++) { + ret.data[j][i] = l[i] * r[j]; + } + } + return ret; + } +} // namespace ippl +#endif \ No newline at end of file From b0ba9953fe462e06d1f05738f47ce1c6d295693d Mon Sep 17 00:00:00 2001 From: Manuel Date: Wed, 19 Jun 2024 14:03:15 +0200 Subject: [PATCH 31/32] Move Particle stuff into fel/ --- fel/FreeElectronLaser.cpp | 2 +- fel/NSFDSolverWithParticles.h | 382 ++++++++++++++++++++++++++++++++++ src/MaxwellSolvers/FDTD.h | 377 --------------------------------- 3 files changed, 383 insertions(+), 378 deletions(-) create mode 100644 fel/NSFDSolverWithParticles.h diff --git a/fel/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp index e2a0c8551..a33159c51 100644 --- a/fel/FreeElectronLaser.cpp +++ b/fel/FreeElectronLaser.cpp @@ -7,7 +7,7 @@ #include "Field/Field.h" -#include "MaxwellSolvers/FDTD.h" +#include "NSFDSolverWithParticles.h" #define JSON_HAS_RANGES 0 // Merlin compilation #include #include diff --git a/fel/NSFDSolverWithParticles.h b/fel/NSFDSolverWithParticles.h new file mode 100644 index 000000000..06aa2f748 --- /dev/null +++ b/fel/NSFDSolverWithParticles.h @@ -0,0 +1,382 @@ +#ifndef NSFDSOLVERWITHPARTICLES_H +#define NSFDSOLVERWITHPARTICLES_H +#include "MaxwellSolvers/FDTD.h" +namespace ippl{ + template + struct Bunch : public ippl::ParticleBase { + using scalar = _scalar; + + // Constructor for the Bunch class, taking a PLayout reference + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) { + // Add attributes to the particle bunch + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute( + gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(R_np1); // Position attribute for the next time step + this->addAttribute(R_nm1); // Position attribute for the next time step + this->addAttribute(E_gather); // Electric field attribute for particle gathering + this->addAttribute(B_gather); // Magnedit field attribute for particle gathering + } + + // Destructor for the Bunch class + ~Bunch() {} + + // Define container types for various attributes + using charge_container_type = ippl::ParticleAttrib; + using velocity_container_type = ippl::ParticleAttrib>; + using vector_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; + + // Declare instances of the attribute containers + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type + R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type + R_nm1; // Position container for the previous time step + ippl::ParticleAttrib> + E_gather; // Electric field container for particle gathering + ippl::ParticleAttrib> + B_gather; // Magnetic field container for particle gathering + }; + template + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf( + const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos) { + // return pear, ippl::Vector>{ippl::Vector{5,5,5}, + // ippl::Vector{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing * + // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); + Kokkos::pair, ippl::Vector> ret; + // pos -= spacing * T(0.5); + ippl::Vector relpos = pos - origin; + ippl::Vector gridpos = relpos / hr; + ippl::Vector ipos; + ipos = gridpos.template cast(); + ippl::Vector fracpos; + for (unsigned k = 0; k < 3; k++) { + fracpos[k] = gridpos[k] - (int)ipos[k]; + } + // TODO: NGHOST!!!!!!! + ipos += ippl::Vector(1); + ret.first = ipos; + ret.second = fracpos; + return ret; + } + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, const scalar value) { + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for (unsigned d = 0; d < 3; d++) { + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(value); + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); + } + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, + const ippl::Vector& value) { + // std::cout << "Value: " << value << "\n"; + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + // std::cout << "out of bounds\n"; + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for (unsigned d = 0; d < 3; d++) { + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); + } + template + KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, + ippl::Vector hr, + ippl::Vector origin, + const ippl::Vector& from, + const ippl::Vector& to, + const scalar factor) { + Kokkos::pair, ippl::Vector> from_grid = + gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = + gridCoordinatesOf(hr, origin, to); + // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], + // from_grid.second[2]); + for (int d = 0; d < 3; d++) { + // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ + // std::cout <(nghost); + // to_ipos += ippl::Vector(nghost); + + if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] + && from_grid.first[2] == to_grid.first[2]) { + scatterToGrid(ldom, Jview, hr, origin, + ippl::Vector((from + to) * scalar(0.5)), + ippl::Vector((to - from) * factor)); + + return; + } + ippl::Vector relay; + const int nghost = 1; + const ippl::Vector orig = origin; + using Kokkos::max; + using Kokkos::min; + for (unsigned int i = 0; i < 3; i++) { + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(0.0) * hr[i] + orig[i], + scalar(0.5) * (to[i] + from[i]))); + } + scatterToGrid(ldom, Jview, hr, origin, + ippl::Vector((from + relay) * scalar(0.5)), + ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)), + ippl::Vector((to - relay) * factor)); + } + template + class NSFDSolverWithParticles { + public: + constexpr static unsigned dim = 3; + using vector_type = ippl::Vector; + using vector4_type = ippl::Vector; + using FourField = + ippl::Field, + typename ippl::UniformCartesian::DefaultCentering>; + using ThreeField = + ippl::Field, + typename ippl::UniformCartesian::DefaultCentering>; + using playout_type = ParticleSpatialLayout; + using bunch_type = Bunch>; + using Mesh_t = ippl::UniformCartesian; + FieldLayout* layout_mp; + Mesh_t* mesh_mp; + playout_type playout; + Bunch> particles; + FourField J; + ThreeField E; + ThreeField B; + NonStandardFDTDSolver field_solver; + + ippl::Vector nr_global; + ippl::Vector hr_m; + size_t steps_taken; + NSFDSolverWithParticles(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) + : layout_mp(&layout) + , mesh_mp(&mesch) + , playout(layout, mesch) + , particles(playout) + , J(mesch, layout) + , E(mesch, layout) + , B(mesch, layout) + , field_solver(J, E, B) { + particles.create(nparticles); + nr_global = ippl::Vector{ + uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), + uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), + uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)}; + hr_m = mesh_mp->getMeshSpacing(); + steps_taken = 0; + // field_solver.setEMFields(E, B); + } + template + void scatterBunch() { + // ippl::Vector* gammaBeta = this->gammaBeta; + auto hr_m = mesh_mp->getMeshSpacing(); + const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; + assert_isreal(volume); + assert_isreal((scalar(1) / volume)); + J = typename decltype(J)::value_type(0); + auto Jview = J.getView(); + auto qview = particles.Q.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto orig = mesh_mp->getOrigin(); + auto hr = mesh_mp->getMeshSpacing(); + auto dt = field_solver.dt; + bool sc = space_charge; + ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); + Kokkos::parallel_for( + particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + vector_type pos = rview(i); + vector_type to = rview(i); + vector_type from = rm1view(i); + if (sc) { + scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); + } + scatterLineToGrid(lDom, Jview, hr, orig, from, to, + scalar(qview(i)) / (volume * dt)); + }); + Kokkos::fence(); + J.accumulateHalo(); + } + template + void updateBunch(scalar time, callable external_field) { + Kokkos::fence(); + auto gbview = particles.gamma_beta.getView(); + auto eview = particles.E_gather.getView(); + auto bview = particles.B_gather.getView(); + auto qview = particles.Q.getView(); + auto mview = particles.mass.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto rp1view = particles.R_np1.getView(); + scalar bunch_dt = field_solver.dt / 3; + Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); + E.fillHalo(); + B.fillHalo(); + Kokkos::fence(); + for (int bts = 0; bts < 3; bts++) { + particles.E_gather.gather(E, particles.R, /*offset = */ 0.0); + particles.B_gather.gather(B, particles.R, /*offset = */ 0.0); + Kokkos::fence(); + Kokkos::parallel_for( + particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + const ippl::Vector pgammabeta = gbview(i); + ippl::Vector E_grid = eview(i); + ippl::Vector B_grid = bview(i); + // std::cout << "E_grid: " << E_grid << "\n"; + // std::cout << "B_grid: " << B_grid << "\n"; + ippl::Vector bunchpos = rview(i); + Kokkos::pair, ippl::Vector> external_eb = + external_field(bunchpos, time + bunch_dt * bts); + + ippl::Vector, 2> EB{ + ippl::Vector(E_grid + external_eb.first), + ippl::Vector(B_grid + external_eb.second)}; + + const scalar charge = qview(i); + const scalar mass = mview(i); + const ippl::Vector t1 = + pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); + const scalar alpha = + charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); + const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); + const ippl::Vector t3 = + t1 + + t2.cross(scalar(2) * alpha + * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); + const ippl::Vector ngammabeta = + t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); + + rview(i) = + rview(i) + + bunch_dt * ngammabeta + / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); + gbview(i) = ngammabeta; + }); + Kokkos::fence(); + } + Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); + size_t invalid_count = 0; + auto origo = mesh_mp->getOrigin(); + ippl::Vector extenz; // + extenz[0] = nr_global[0] * hr_m[0]; + extenz[1] = nr_global[1] * hr_m[1]; + extenz[2] = nr_global[2] * hr_m[2]; + Kokkos::parallel_reduce( + Kokkos::RangePolicy< + typename playout_type::RegionLayout_t::view_type::execution_space>( + 0, particles.getLocalNum()), + KOKKOS_LAMBDA(size_t i, size_t & ref) { + bool out_of_bounds = false; + ippl::Vector ppos = rview(i); + for (size_t d = 0; d < dim; d++) { + out_of_bounds |= (ppos[d] <= origo[d]); + out_of_bounds |= + (ppos[d] >= origo[d] + extenz[d]); // Check against simulation domain + } + invalid(i) = out_of_bounds; + ref += out_of_bounds; + }, + invalid_count); + particles.destroy(invalid, invalid_count); + Kokkos::fence(); + playout.update(particles); + } + void solve() { + scatterBunch(); + field_solver.solve(); + updateBunch( + field_solver.dt * steps_taken, + /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) { + return Kokkos::pair{vector_type(0), vector_type(0)}; + }); + ++steps_taken; + } + template + void solve(callable external_field) { + scatterBunch(); + field_solver.solve(); + // std::cout << field_solver.dt * steps_taken << "\n"; + updateBunch(field_solver.dt * steps_taken, external_field); + ++steps_taken; + } + }; +} +#endif \ No newline at end of file diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h index f9fb5fa5d..018758db4 100644 --- a/src/MaxwellSolvers/FDTD.h +++ b/src/MaxwellSolvers/FDTD.h @@ -398,383 +398,6 @@ namespace ippl { A_np1 = 0.0; }; }; - - template - struct Bunch : public ippl::ParticleBase { - using scalar = _scalar; - - // Constructor for the Bunch class, taking a PLayout reference - Bunch(PLayout& playout) - : ippl::ParticleBase(playout) { - // Add attributes to the particle bunch - this->addAttribute(Q); // Charge attribute - this->addAttribute(mass); // Mass attribute - this->addAttribute( - gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) - this->addAttribute(R_np1); // Position attribute for the next time step - this->addAttribute(R_nm1); // Position attribute for the next time step - this->addAttribute(E_gather); // Electric field attribute for particle gathering - this->addAttribute(B_gather); // Magnedit field attribute for particle gathering - } - - // Destructor for the Bunch class - ~Bunch() {} - - // Define container types for various attributes - using charge_container_type = ippl::ParticleAttrib; - using velocity_container_type = ippl::ParticleAttrib>; - using vector_container_type = ippl::ParticleAttrib>; - using vector4_container_type = ippl::ParticleAttrib>; - - // Declare instances of the attribute containers - charge_container_type Q; // Charge container - charge_container_type mass; // Mass container - velocity_container_type gamma_beta; // Gamma-beta container - typename ippl::ParticleBase::particle_position_type - R_np1; // Position container for the next time step - typename ippl::ParticleBase::particle_position_type - R_nm1; // Position container for the previous time step - ippl::ParticleAttrib> - E_gather; // Electric field container for particle gathering - ippl::ParticleAttrib> - B_gather; // Magnetio field container for particle gathering - }; - template - KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf( - const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos) { - // return pear, ippl::Vector>{ippl::Vector{5,5,5}, - // ippl::Vector{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing * - // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); - Kokkos::pair, ippl::Vector> ret; - // pos -= spacing * T(0.5); - ippl::Vector relpos = pos - origin; - ippl::Vector gridpos = relpos / hr; - ippl::Vector ipos; - ipos = gridpos.template cast(); - ippl::Vector fracpos; - for (unsigned k = 0; k < 3; k++) { - fracpos[k] = gridpos[k] - (int)ipos[k]; - } - // TODO: NGHOST!!!!!!! - ipos += ippl::Vector(1); - ret.first = ipos; - ret.second = fracpos; - return ret; - } - template - KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, - ippl::Vector hr, ippl::Vector orig, - const ippl::Vector& pos, const scalar value) { - auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); - ipos -= ldom.first(); - // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; - if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 - || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 - || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { - return; - } - assert(fracpos[0] >= 0.0f); - assert(fracpos[0] <= 1.0f); - assert(fracpos[1] >= 0.0f); - assert(fracpos[1] <= 1.0f); - assert(fracpos[2] >= 0.0f); - assert(fracpos[2] <= 1.0f); - ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; - assert(one_minus_fracpos[0] >= 0.0f); - assert(one_minus_fracpos[0] <= 1.0f); - assert(one_minus_fracpos[1] >= 0.0f); - assert(one_minus_fracpos[1] <= 1.0f); - assert(one_minus_fracpos[2] >= 0.0f); - assert(one_minus_fracpos[2] <= 1.0f); - scalar accum = 0; - - for (unsigned i = 0; i < 8; i++) { - scalar weight = 1; - ippl::Vector ipos_l = ipos; - for (unsigned d = 0; d < 3; d++) { - weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); - ipos_l[d] += !!(i & (1 << d)); - } - assert_isreal(value); - assert_isreal(weight); - accum += weight; - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); - } - assert(abs(accum - 1.0f) < 1e-6f); - } - template - KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, - ippl::Vector hr, ippl::Vector orig, - const ippl::Vector& pos, - const ippl::Vector& value) { - // std::cout << "Value: " << value << "\n"; - auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); - ipos -= ldom.first(); - if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 - || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 - || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { - // std::cout << "out of bounds\n"; - return; - } - assert(fracpos[0] >= 0.0f); - assert(fracpos[0] <= 1.0f); - assert(fracpos[1] >= 0.0f); - assert(fracpos[1] <= 1.0f); - assert(fracpos[2] >= 0.0f); - assert(fracpos[2] <= 1.0f); - ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; - assert(one_minus_fracpos[0] >= 0.0f); - assert(one_minus_fracpos[0] <= 1.0f); - assert(one_minus_fracpos[1] >= 0.0f); - assert(one_minus_fracpos[1] <= 1.0f); - assert(one_minus_fracpos[2] >= 0.0f); - assert(one_minus_fracpos[2] <= 1.0f); - scalar accum = 0; - - for (unsigned i = 0; i < 8; i++) { - scalar weight = 1; - ippl::Vector ipos_l = ipos; - for (unsigned d = 0; d < 3; d++) { - weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); - ipos_l[d] += !!(i & (1 << d)); - } - assert_isreal(weight); - accum += weight; - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); - Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); - } - assert(abs(accum - 1.0f) < 1e-6f); - } - template - KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, - ippl::Vector hr, - ippl::Vector origin, - const ippl::Vector& from, - const ippl::Vector& to, - const scalar factor) { - Kokkos::pair, ippl::Vector> from_grid = - gridCoordinatesOf(hr, origin, from); - Kokkos::pair, ippl::Vector> to_grid = - gridCoordinatesOf(hr, origin, to); - // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], - // from_grid.second[2]); - for (int d = 0; d < 3; d++) { - // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ - // std::cout <(nghost); - // to_ipos += ippl::Vector(nghost); - - if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] - && from_grid.first[2] == to_grid.first[2]) { - scatterToGrid(ldom, Jview, hr, origin, - ippl::Vector((from + to) * scalar(0.5)), - ippl::Vector((to - from) * factor)); - - return; - } - ippl::Vector relay; - const int nghost = 1; - const ippl::Vector orig = origin; - using Kokkos::max; - using Kokkos::min; - for (unsigned int i = 0; i < 3; i++) { - relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] - + scalar(1.0) * hr[i] + orig[i], - max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] - + scalar(0.0) * hr[i] + orig[i], - scalar(0.5) * (to[i] + from[i]))); - } - scatterToGrid(ldom, Jview, hr, origin, - ippl::Vector((from + relay) * scalar(0.5)), - ippl::Vector((relay - from) * factor)); - scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)), - ippl::Vector((to - relay) * factor)); - } - template - class NSFDSolverWithParticles { - public: - constexpr static unsigned dim = 3; - using vector_type = ippl::Vector; - using vector4_type = ippl::Vector; - using FourField = - ippl::Field, - typename ippl::UniformCartesian::DefaultCentering>; - using ThreeField = - ippl::Field, - typename ippl::UniformCartesian::DefaultCentering>; - using playout_type = ParticleSpatialLayout; - using bunch_type = Bunch>; - using Mesh_t = ippl::UniformCartesian; - FieldLayout* layout_mp; - Mesh_t* mesh_mp; - playout_type playout; - Bunch> particles; - FourField J; - ThreeField E; - ThreeField B; - NonStandardFDTDSolver field_solver; - - ippl::Vector nr_global; - ippl::Vector hr_m; - size_t steps_taken; - NSFDSolverWithParticles(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) - : layout_mp(&layout) - , mesh_mp(&mesch) - , playout(layout, mesch) - , particles(playout) - , J(mesch, layout) - , E(mesch, layout) - , B(mesch, layout) - , field_solver(J, E, B) { - particles.create(nparticles); - nr_global = ippl::Vector{ - uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), - uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), - uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)}; - hr_m = mesh_mp->getMeshSpacing(); - steps_taken = 0; - // field_solver.setEMFields(E, B); - } - template - void scatterBunch() { - // ippl::Vector* gammaBeta = this->gammaBeta; - auto hr_m = mesh_mp->getMeshSpacing(); - const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; - assert_isreal(volume); - assert_isreal((scalar(1) / volume)); - J = typename decltype(J)::value_type(0); - auto Jview = J.getView(); - auto qview = particles.Q.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto orig = mesh_mp->getOrigin(); - auto hr = mesh_mp->getMeshSpacing(); - auto dt = field_solver.dt; - bool sc = space_charge; - ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); - Kokkos::parallel_for( - particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { - vector_type pos = rview(i); - vector_type to = rview(i); - vector_type from = rm1view(i); - if (sc) { - scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); - } - scatterLineToGrid(lDom, Jview, hr, orig, from, to, - scalar(qview(i)) / (volume * dt)); - }); - Kokkos::fence(); - J.accumulateHalo(); - } - template - void updateBunch(scalar time, callable external_field) { - Kokkos::fence(); - auto gbview = particles.gamma_beta.getView(); - auto eview = particles.E_gather.getView(); - auto bview = particles.B_gather.getView(); - auto qview = particles.Q.getView(); - auto mview = particles.mass.getView(); - auto rview = particles.R.getView(); - auto rm1view = particles.R_nm1.getView(); - auto rp1view = particles.R_np1.getView(); - scalar bunch_dt = field_solver.dt / 3; - Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); - E.fillHalo(); - B.fillHalo(); - Kokkos::fence(); - for (int bts = 0; bts < 3; bts++) { - particles.E_gather.gather(E, particles.R, /*offset = */ 0.0); - particles.B_gather.gather(B, particles.R, /*offset = */ 0.0); - Kokkos::fence(); - Kokkos::parallel_for( - particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { - const ippl::Vector pgammabeta = gbview(i); - ippl::Vector E_grid = eview(i); - ippl::Vector B_grid = bview(i); - // std::cout << "E_grid: " << E_grid << "\n"; - // std::cout << "B_grid: " << B_grid << "\n"; - ippl::Vector bunchpos = rview(i); - Kokkos::pair, ippl::Vector> external_eb = - external_field(bunchpos, time + bunch_dt * bts); - - ippl::Vector, 2> EB{ - ippl::Vector(E_grid + external_eb.first), - ippl::Vector(B_grid + external_eb.second)}; - - const scalar charge = qview(i); - const scalar mass = mview(i); - const ippl::Vector t1 = - pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); - const scalar alpha = - charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); - const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); - const ippl::Vector t3 = - t1 - + t2.cross(scalar(2) * alpha - * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); - const ippl::Vector ngammabeta = - t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); - - rview(i) = - rview(i) - + bunch_dt * ngammabeta - / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); - gbview(i) = ngammabeta; - }); - Kokkos::fence(); - } - Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); - size_t invalid_count = 0; - auto origo = mesh_mp->getOrigin(); - ippl::Vector extenz; // - extenz[0] = nr_global[0] * hr_m[0]; - extenz[1] = nr_global[1] * hr_m[1]; - extenz[2] = nr_global[2] * hr_m[2]; - Kokkos::parallel_reduce( - Kokkos::RangePolicy< - typename playout_type::RegionLayout_t::view_type::execution_space>( - 0, particles.getLocalNum()), - KOKKOS_LAMBDA(size_t i, size_t & ref) { - bool out_of_bounds = false; - ippl::Vector ppos = rview(i); - for (size_t d = 0; d < dim; d++) { - out_of_bounds |= (ppos[d] <= origo[d]); - out_of_bounds |= - (ppos[d] >= origo[d] + extenz[d]); // Check against simulation domain - } - invalid(i) = out_of_bounds; - ref += out_of_bounds; - }, - invalid_count); - particles.destroy(invalid, invalid_count); - Kokkos::fence(); - playout.update(particles); - } - void solve() { - scatterBunch(); - field_solver.solve(); - updateBunch( - field_solver.dt * steps_taken, - /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) { - return Kokkos::pair{vector_type(0), vector_type(0)}; - }); - ++steps_taken; - } - template - void solve(callable external_field) { - scatterBunch(); - field_solver.solve(); - // std::cout << field_solver.dt * steps_taken << "\n"; - updateBunch(field_solver.dt * steps_taken, external_field); - ++steps_taken; - } - }; } // namespace ippl #endif \ No newline at end of file From 3906349a7ac9133be1e74341f9d5e98145f568d5 Mon Sep 17 00:00:00 2001 From: Manuel Date: Fri, 28 Jun 2024 13:25:45 +0200 Subject: [PATCH 32/32] Remove some dead code, move everything particle related into fel --- fel/CMakeLists.txt | 5 +- fel/FreeElectronLaser.cpp | 50 +++++++-------- fel/NSFDSolverWithParticles.h | 63 ++++++++++--------- .../TestFDTDSolverWithParticles.cpp | 9 +-- test/solver/CMakeLists.txt | 6 -- 5 files changed, 63 insertions(+), 70 deletions(-) rename {test/solver => fel}/TestFDTDSolverWithParticles.cpp (87%) diff --git a/fel/CMakeLists.txt b/fel/CMakeLists.txt index dd8898dd7..e986ce8b9 100644 --- a/fel/CMakeLists.txt +++ b/fel/CMakeLists.txt @@ -10,4 +10,7 @@ set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) add_executable (FreeElectronLaser FreeElectronLaser.cpp) -target_link_libraries (FreeElectronLaser ${IPPL_LIBS}) \ No newline at end of file +target_link_libraries (FreeElectronLaser ${IPPL_LIBS}) + +add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp) +target_link_libraries (TestFDTDSolverWithParticles ${IPPL_LIBS}) \ No newline at end of file diff --git a/fel/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp index a33159c51..ce05c8c3e 100644 --- a/fel/FreeElectronLaser.cpp +++ b/fel/FreeElectronLaser.cpp @@ -813,7 +813,7 @@ void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma) { if (std::isnan(iterQ->rnp[2])) { std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_ << ", " << frame_beta << "\n"; - std::cerr << __FILE__ << ": " << __LINE__ << " OOOOOF\n"; + std::cerr << __FILE__ << ": " << __LINE__ << " Particle has NaN velocity or position\n"; abort(); } } @@ -822,25 +822,25 @@ void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma) { template size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize& bunchInit, scalar frame_gamma) { - ChargeVector oof; - initializeBunchEllipsoid(bunchInit, oof, 0, 1, 0); - for (auto& c : oof) { + ChargeVector temporary_charge_list; + initializeBunchEllipsoid(bunchInit, temporary_charge_list, 0, 1, 0); + for (auto& c : temporary_charge_list) { if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) std::cout << "Pos before boost: " << c.rnp << "\n"; if (std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2])) std::cout << "Pos before boost: " << c.rnp << "\n"; } - boost_bunch(oof, frame_gamma); - for (auto& c : oof) { + boost_bunch(temporary_charge_list, frame_gamma); + for (auto& c : temporary_charge_list) { if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) { std::cout << "Pos after boost: " << c.rnp << "\n"; break; } } - Kokkos::View*, Kokkos::HostSpace> positions("", oof.size()); - Kokkos::View*, Kokkos::HostSpace> gammabetas("", oof.size()); - auto iterQ = oof.begin(); - for (size_t i = 0; i < oof.size(); i++) { + Kokkos::View*, Kokkos::HostSpace> positions("", temporary_charge_list.size()); + Kokkos::View*, Kokkos::HostSpace> gammabetas("", temporary_charge_list.size()); + auto iterQ = temporary_charge_list.begin(); + for (size_t i = 0; i < temporary_charge_list.size(); i++) { assert_isreal(iterQ->gb[0]); assert_isreal(iterQ->gb[1]); assert_isreal(iterQ->gb[2]); @@ -854,11 +854,11 @@ size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize& gammabetas(i) = iterQ->gb; ++iterQ; } - if (oof.size() > bunch.getLocalNum()) { - bunch.create(oof.size() - bunch.getLocalNum()); + if (temporary_charge_list.size() > bunch.getLocalNum()) { + bunch.create(temporary_charge_list.size() - bunch.getLocalNum()); } - Kokkos::View*> dpositions("", oof.size()); - Kokkos::View*> dgammabetas("", oof.size()); + Kokkos::View*> dpositions("", temporary_charge_list.size()); + Kokkos::View*> dgammabetas("", temporary_charge_list.size()); Kokkos::deep_copy(dpositions, positions); Kokkos::deep_copy(dgammabetas, gammabetas); @@ -868,14 +868,14 @@ size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize& gbview = bunch.gamma_beta.getView(); ; Kokkos::parallel_for( - oof.size(), KOKKOS_LAMBDA(size_t i) { + temporary_charge_list.size(), KOKKOS_LAMBDA(size_t i) { rview(i) = dpositions(i); rm1view(i) = dpositions(i); gbview(i) = dgammabetas(i); }); Kokkos::fence(); - return oof.size(); + return temporary_charge_list.size(); } // END LORENTZ FRAME AND UNDULATOR AND BUNCH @@ -969,14 +969,13 @@ KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value, int nghost = 1) { - // std::cout << "Value: " << value << "\n"; auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost); ipos -= ldom.first(); if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { - // std::cout << "out of bounds\n"; - return; + + return; // Return in case of out of bounds } assert(fracpos[0] >= 0.0f); assert(fracpos[0] <= 1.0f); @@ -1246,14 +1245,16 @@ KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, const ippl::Vector& pos, const ippl::Vector& origin, const ippl::Vector& hr, - const ippl::NDIndex<3>& lDom) { + const ippl::NDIndex<3>& lDom, + int nghost = 1) { using vector_type = ippl::Vector; vector_type l; for (unsigned k = 0; k < 3; k++) { l[k] = (pos[k] - origin[k]) / hr[k] - + 1.0; // gather is implemented in a way such that this 1 is necessary here + + 1.0; // gather is implemented in a way such that this 1 is necessary here. + // This is NOT the nghost, this needs to be a hardcoded 1.0 } ippl::Vector index{int(l[0]), int(l[1]), int(l[2])}; @@ -1261,15 +1262,14 @@ KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, ippl::Vector wlo(1.0); wlo -= whi; - // TODO: nghost - ippl::Vector args = index - lDom.first() + 1; + ippl::Vector args = index - lDom.first() + nghost; for (unsigned k = 0; k < 3; k++) { if (args[k] >= v.extent(k) || args[k] == 0) { return typename View::value_type(0); } } - return /*{true,*/ ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, - whi, args) /*}*/; + return ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, + whi, args); } template scalar test_gauss_law(uint32_t n) { diff --git a/fel/NSFDSolverWithParticles.h b/fel/NSFDSolverWithParticles.h index 06aa2f748..28f9668bc 100644 --- a/fel/NSFDSolverWithParticles.h +++ b/fel/NSFDSolverWithParticles.h @@ -44,33 +44,51 @@ namespace ippl{ }; template KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf( - const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos) { - // return pear, ippl::Vector>{ippl::Vector{5,5,5}, - // ippl::Vector{0,0,0}}; printf("%.10e, %.10e, %.10e\n", (inverse_spacing * - // spacing)[0], (inverse_spacing * spacing)[1], (inverse_spacing * spacing)[2]); + const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos, int nghost = 1) { + + // Declare a pair to hold the resulting grid coordinates (integer part) and fractional part Kokkos::pair, ippl::Vector> ret; - // pos -= spacing * T(0.5); - ippl::Vector relpos = pos - origin; + + // Calculate the relative position of pos with respect to the origin + ippl::Vector relpos = pos - origin; + + // Convert the relative position to grid coordinates by dividing by the grid spacing (hr) ippl::Vector gridpos = relpos / hr; + + // Declare an integer vector to hold the integer part of the grid coordinates ippl::Vector ipos; + + // Cast the grid position to an integer vector, which gives us the integer part of the coordinates ipos = gridpos.template cast(); + + // Declare a vector to hold the fractional part of the grid coordinates ippl::Vector fracpos; + + // Calculate the fractional part of the grid coordinates for (unsigned k = 0; k < 3; k++) { - fracpos[k] = gridpos[k] - (int)ipos[k]; + fracpos[k] = gridpos[k] - static_cast(ipos[k]); } - // TODO: NGHOST!!!!!!! - ipos += ippl::Vector(1); - ret.first = ipos; + + // Add the number of ghost cells to the integer part of the coordinates + ipos += ippl::Vector(nghost); + + // Set the integer part of the coordinates in the return pair + ret.first = ipos; + + // Set the fractional part of the coordinates in the return pair ret.second = fracpos; + + // Return the pair containing both the integer and fractional parts of the grid coordinates return ret; } + template KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const scalar value) { auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); ipos -= ldom.first(); - // std::cout << pos << " 's scatter args (will have 1 added): " << ipos << "\n"; + // std::cout << pos << " 's scatter args (will have 1, or nghost in general added): " << ipos << "\n"; if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { @@ -110,13 +128,12 @@ namespace ippl{ ippl::Vector hr, ippl::Vector orig, const ippl::Vector& pos, const ippl::Vector& value) { - // std::cout << "Value: " << value << "\n"; auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); ipos -= ldom.first(); if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { - // std::cout << "out of bounds\n"; + // Out of bounds case (you'll do nothing) return; } assert(fracpos[0] >= 0.0f); @@ -160,24 +177,12 @@ namespace ippl{ gridCoordinatesOf(hr, origin, from); Kokkos::pair, ippl::Vector> to_grid = gridCoordinatesOf(hr, origin, to); - // printf("Scatterdest: %.4e, %.4e, %.4e\n", from_grid.second[0], from_grid.second[1], - // from_grid.second[2]); - for (int d = 0; d < 3; d++) { - // if(abs(from_grid.first[d] - to_grid.first[d]) > 1){ - // std::cout <(nghost); - // to_ipos += ippl::Vector(nghost); if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] && from_grid.first[2] == to_grid.first[2]) { scatterToGrid(ldom, Jview, hr, origin, - ippl::Vector((from + to) * scalar(0.5)), - ippl::Vector((to - from) * factor)); + /*Scatter point, geometric average */ ippl::Vector((from + to) * scalar(0.5)), + /*Scatter value, factor=charge / timestep */ ippl::Vector((to - from) * factor)); return; } @@ -242,11 +247,9 @@ namespace ippl{ uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)}; hr_m = mesh_mp->getMeshSpacing(); steps_taken = 0; - // field_solver.setEMFields(E, B); } template void scatterBunch() { - // ippl::Vector* gammaBeta = this->gammaBeta; auto hr_m = mesh_mp->getMeshSpacing(); const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; assert_isreal(volume); @@ -300,8 +303,6 @@ namespace ippl{ const ippl::Vector pgammabeta = gbview(i); ippl::Vector E_grid = eview(i); ippl::Vector B_grid = bview(i); - // std::cout << "E_grid: " << E_grid << "\n"; - // std::cout << "B_grid: " << B_grid << "\n"; ippl::Vector bunchpos = rview(i); Kokkos::pair, ippl::Vector> external_eb = external_field(bunchpos, time + bunch_dt * bts); diff --git a/test/solver/TestFDTDSolverWithParticles.cpp b/fel/TestFDTDSolverWithParticles.cpp similarity index 87% rename from test/solver/TestFDTDSolverWithParticles.cpp rename to fel/TestFDTDSolverWithParticles.cpp index d5d44a174..33c516918 100644 --- a/test/solver/TestFDTDSolverWithParticles.cpp +++ b/fel/TestFDTDSolverWithParticles.cpp @@ -4,10 +4,10 @@ using std::size_t; #include "Ippl.h" #include "Types/Vector.h" #include "Field/Field.h" -#include "MaxwellSolvers/FDTD.h" +#include "NSFDSolverWithParticles.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include -#include "../alpine/units.h" +#include "units.h" #include template requires((std::is_floating_point_v)) @@ -29,11 +29,6 @@ int main(int argc, char* argv[]){ ippl::initialize(argc, argv); { using scalar = float; - //const unsigned dim = 3; - //using vector_type = ippl::Vector; - //using vector4_type = ippl::Vector; - //using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; - //using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; constexpr size_t n = 100; ippl::Vector nr{n / 2, n / 2, 2 * n}; ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt index 7bf9f853b..dda292fff 100644 --- a/test/solver/CMakeLists.txt +++ b/test/solver/CMakeLists.txt @@ -31,12 +31,6 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) -add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp) -target_link_libraries ( - TestFDTDSolverWithParticles - ${IPPL_LIBS} - ${MPI_CXX_LIBRARIES} -) if (ENABLE_FFT) add_executable (TestGaussian_convergence TestGaussian_convergence.cpp)