diff --git a/.ci_fedora.sh b/.ci_fedora.sh index b8805abb15..583bb79d7a 100755 --- a/.ci_fedora.sh +++ b/.ci_fedora.sh @@ -41,7 +41,7 @@ then # Ignore weak depencies echo "install_weak_deps=False" >> /etc/dnf/dnf.conf time dnf -y install dnf5 - time dnf5 -y install dnf5-plugins cmake python3-zoidberg python3-natsort + time dnf5 -y install dnf5-plugins cmake python3-zoidberg python3-natsort wget # Allow to override packages - see #2073 time dnf5 copr enable -y davidsch/fixes4bout || : time dnf5 -y upgrade diff --git a/CMakeLists.txt b/CMakeLists.txt index f57a78a14a..57d7793204 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -591,6 +591,9 @@ else() endif() set(BOUT_USE_METRIC_3D ${BOUT_ENABLE_METRIC_3D}) +option(BOUT_ENABLE_FCI_AUTOMAGIC "Enable (slow?) automatic features for FCI" ON) +set(BOUT_USE_FCI_AUTOMAGIC ${BOUT_ENABLE_FCI_AUTOMAGIC}) + include(CheckCXXSourceCompiles) check_cxx_source_compiles("int main() { const char* name = __PRETTY_FUNCTION__; }" HAS_PRETTY_FUNCTION) diff --git a/cmake/BOUT++functions.cmake b/cmake/BOUT++functions.cmake index 40e45f99be..77279dfd4b 100644 --- a/cmake/BOUT++functions.cmake +++ b/cmake/BOUT++functions.cmake @@ -162,7 +162,7 @@ endfunction() # function(bout_add_integrated_or_mms_test BUILD_CHECK_TARGET TESTNAME) set(options USE_RUNTEST USE_DATA_BOUT_INP) - set(oneValueArgs EXECUTABLE_NAME PROCESSORS) + set(oneValueArgs EXECUTABLE_NAME PROCESSORS DOWNLOAD DOWNLOAD_NAME) set(multiValueArgs SOURCES EXTRA_FILES REQUIRES CONFLICTS TESTARGS EXTRA_DEPENDS) cmake_parse_arguments(BOUT_TEST_OPTIONS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -202,6 +202,20 @@ function(bout_add_integrated_or_mms_test BUILD_CHECK_TARGET TESTNAME) add_custom_target(${TESTNAME}) endif() + if (BOUT_TEST_OPTIONS_DOWNLOAD) + if (NOT BOUT_TEST_OPTIONS_DOWNLOAD_NAME) + message(FATAL_ERROR "We need DOWNLOAD_NAME if we should DOWNLOAD!") + endif() + set(output ) + add_custom_command(OUTPUT ${BOUT_TEST_OPTIONS_DOWNLOAD_NAME} + COMMAND wget ${BOUT_TEST_OPTIONS_DOWNLOAD} -O ${BOUT_TEST_OPTIONS_DOWNLOAD_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Downloading ${BOUT_TEST_OPTIONS_DOWNLOAD_NAME}" + ) + add_custom_target(download_test_data DEPENDS ${BOUT_TEST_OPTIONS_DOWNLOAD_NAME}) + add_dependencies(${TESTNAME} download_test_data) + endif() + if (BOUT_TEST_OPTIONS_EXTRA_DEPENDS) add_dependencies(${TESTNAME} ${BOUT_TEST_OPTIONS_EXTRA_DEPENDS}) endif() diff --git a/cmake/SetupBOUTThirdParty.cmake b/cmake/SetupBOUTThirdParty.cmake index 9c49fe6fdc..f76e9bf017 100644 --- a/cmake/SetupBOUTThirdParty.cmake +++ b/cmake/SetupBOUTThirdParty.cmake @@ -288,7 +288,7 @@ if (BOUT_USE_SUNDIALS) set(EXAMPLES_ENABLE_C OFF CACHE BOOL "" FORCE) set(EXAMPLES_INSTALL OFF CACHE BOOL "" FORCE) set(ENABLE_MPI ${BOUT_USE_MPI} CACHE BOOL "" FORCE) - set(ENABLE_OPENMP OFF CACHE BOOL "" FORCE) + set(ENABLE_OPENMP ${BOUT_USE_OPENMP} CACHE BOOL "" FORCE) if (BUILD_SHARED_LIBS) set(BUILD_STATIC_LIBS OFF CACHE BOOL "" FORCE) else() diff --git a/cmake_build_defines.hxx.in b/cmake_build_defines.hxx.in index 4d63a01b7d..23245b1427 100644 --- a/cmake_build_defines.hxx.in +++ b/cmake_build_defines.hxx.in @@ -35,6 +35,7 @@ #cmakedefine BOUT_METRIC_TYPE @BOUT_METRIC_TYPE@ #cmakedefine01 BOUT_USE_METRIC_3D #cmakedefine01 BOUT_USE_MSGSTACK +#cmakedefine01 BOUT_USE_FCI_AUTOMAGIC // CMake build does not support legacy interface #define BOUT_HAS_LEGACY_NETCDF 0 diff --git a/examples/fci-wave-logn/boundary/BOUT.inp b/examples/fci-wave-logn/boundary/BOUT.inp index 0632aa949b..b2176c17a4 100644 --- a/examples/fci-wave-logn/boundary/BOUT.inp +++ b/examples/fci-wave-logn/boundary/BOUT.inp @@ -40,5 +40,5 @@ bndry_par_ydown = parallel_neumann_o2 [v] -bndry_par_yup = parallel_dirichlet(+1.0) -bndry_par_ydown = parallel_dirichlet(-1.0) +bndry_par_yup = parallel_dirichlet_o2(+1.0) +bndry_par_ydown = parallel_dirichlet_o2(-1.0) diff --git a/examples/fci-wave-logn/div-integrate/BOUT.inp b/examples/fci-wave-logn/div-integrate/BOUT.inp index 66bdbce5f2..582a2e7316 100644 --- a/examples/fci-wave-logn/div-integrate/BOUT.inp +++ b/examples/fci-wave-logn/div-integrate/BOUT.inp @@ -40,4 +40,4 @@ bndry_par_ydown = parallel_neumann_o2 [v] -bndry_par_all = parallel_dirichlet +bndry_par_all = parallel_dirichlet_o2 diff --git a/examples/fci-wave-logn/expanded/BOUT.inp b/examples/fci-wave-logn/expanded/BOUT.inp index e084511d24..f58574a355 100644 --- a/examples/fci-wave-logn/expanded/BOUT.inp +++ b/examples/fci-wave-logn/expanded/BOUT.inp @@ -40,4 +40,4 @@ bndry_par_ydown = parallel_neumann_o2 [v] -bndry_par_all = parallel_dirichlet +bndry_par_all = parallel_dirichlet_o2 diff --git a/examples/fci-wave/div-integrate/BOUT.inp b/examples/fci-wave/div-integrate/BOUT.inp index 68f2326f52..bd19dd40d3 100644 --- a/examples/fci-wave/div-integrate/BOUT.inp +++ b/examples/fci-wave/div-integrate/BOUT.inp @@ -41,4 +41,4 @@ bndry_par_ydown = parallel_neumann_o2 [v] -bndry_par_all = parallel_dirichlet +bndry_par_all = parallel_dirichlet_o2 diff --git a/examples/fci-wave/div/BOUT.inp b/examples/fci-wave/div/BOUT.inp index 3f497df6c7..7873a0587c 100644 --- a/examples/fci-wave/div/BOUT.inp +++ b/examples/fci-wave/div/BOUT.inp @@ -41,4 +41,4 @@ bndry_par_ydown = parallel_neumann_o2 [v] -bndry_par_all = parallel_dirichlet +bndry_par_all = parallel_dirichlet_o2 diff --git a/examples/fci-wave/logn/BOUT.inp b/examples/fci-wave/logn/BOUT.inp index 26f8a99d63..c8e2760e09 100644 --- a/examples/fci-wave/logn/BOUT.inp +++ b/examples/fci-wave/logn/BOUT.inp @@ -41,4 +41,4 @@ bndry_par_ydown = parallel_neumann_o2 [nv] -bndry_par_all = parallel_dirichlet +bndry_par_all = parallel_dirichlet_o2 diff --git a/include/bout/boundary_iterator.hxx b/include/bout/boundary_iterator.hxx new file mode 100644 index 0000000000..93f02c004d --- /dev/null +++ b/include/bout/boundary_iterator.hxx @@ -0,0 +1,117 @@ +#pragma once + +#include "bout/mesh.hxx" +#include "bout/sys/parallel_stencils.hxx" +#include "bout/sys/range.hxx" + +class BoundaryRegionIter { +public: + BoundaryRegionIter(int x, int y, int bx, int by, Mesh* mesh) + : dir(bx + by), x(x), y(y), bx(bx), by(by), localmesh(mesh) { + ASSERT3(bx * by == 0); + } + bool operator!=(const BoundaryRegionIter& rhs) { return ind() != rhs.ind(); } + + Ind3D ind() const { return xyz2ind(x, y, z); } + BoundaryRegionIter& operator++() { + ASSERT3(z < nz()); + z++; + if (z == nz()) { + z = 0; + _next(); + } + return *this; + } + virtual void _next() = 0; + BoundaryRegionIter& operator*() { return *this; } + + void dirichlet_o2(Field3D& f, BoutReal value) const { + ynext(f) = parallel_stencil::dirichlet_o2(1, f[ind()], 0.5, value); + } + + BoutReal extrapolate_grad_o2(const Field3D& f) const { return f[ind()] - yprev(f); } + + BoutReal extrapolate_sheath_o2(const Field3D& f) const { + return (f[ind()] * 3 - yprev(f)) * 0.5; + } + + BoutReal extrapolate_next_o2(const Field3D& f) const { return 2 * f[ind()] - yprev(f); } + + BoutReal + extrapolate_next_o2(const std::function& f) const { + return 2 * f(0, ind()) - f(0, ind().yp(-by).xp(-bx)); + } + + BoutReal interpolate_sheath(const Field3D& f) const { + return (f[ind()] + ynext(f)) * 0.5; + } + + BoutReal& ynext(Field3D& f) const { return f[ind().yp(by).xp(bx)]; } + const BoutReal& ynext(const Field3D& f) const { return f[ind().yp(by).xp(bx)]; } + BoutReal& yprev(Field3D& f) const { return f[ind().yp(-by).xp(-bx)]; } + const BoutReal& yprev(const Field3D& f) const { return f[ind().yp(-by).xp(-bx)]; } + + const int dir; + +protected: + int z{0}; + int x; + int y; + const int bx; + const int by; + +private: + Mesh* localmesh; + int nx() const { return localmesh->LocalNx; } + int ny() const { return localmesh->LocalNy; } + int nz() const { return localmesh->LocalNz; } + + Ind3D xyz2ind(int x, int y, int z) const { + return Ind3D{(x * ny() + y) * nz() + z, ny(), nz()}; + } +}; + +class BoundaryRegionIterY : public BoundaryRegionIter { +public: + BoundaryRegionIterY(RangeIterator r, int y, int dir, bool is_end, Mesh* mesh) + : BoundaryRegionIter(r.ind, y, 0, dir, mesh), r(r), is_end(is_end) {} + + bool operator!=(const BoundaryRegionIterY& rhs) { + ASSERT2(y == rhs.y); + if (is_end) { + if (rhs.is_end) { + return false; + } + return !rhs.r.isDone(); + } + if (rhs.is_end) { + return !r.isDone(); + } + return x != rhs.x; + } + + virtual void _next() override { + ++r; + x = r.ind; + } + +private: + RangeIterator r; + bool is_end; +}; + +class NewBoundaryRegionY { +public: + NewBoundaryRegionY(Mesh* mesh, bool lower, RangeIterator r) + : mesh(mesh), lower(lower), r(std::move(r)) {} + BoundaryRegionIterY begin(bool begin = true) { + return BoundaryRegionIterY(r, lower ? mesh->ystart : mesh->yend, lower ? -1 : +1, + !begin, mesh); + } + BoundaryRegionIterY end() { return begin(false); } + +private: + Mesh* mesh; + bool lower; + RangeIterator r; +}; diff --git a/include/bout/boundary_region.hxx b/include/bout/boundary_region.hxx index 58de12045e..22956d1d4a 100644 --- a/include/bout/boundary_region.hxx +++ b/include/bout/boundary_region.hxx @@ -4,6 +4,9 @@ class BoundaryRegion; #ifndef BOUT_BNDRY_REGION_H #define BOUT_BNDRY_REGION_H +#include "bout/mesh.hxx" +#include "bout/region.hxx" +#include "bout/sys/parallel_stencils.hxx" #include #include diff --git a/include/bout/coordinates.hxx b/include/bout/coordinates.hxx index 49feffa0a7..5ea2d276fc 100644 --- a/include/bout/coordinates.hxx +++ b/include/bout/coordinates.hxx @@ -133,9 +133,10 @@ public: transform = std::move(pt); } + bool hasParallelTransform() const { return transform != nullptr; } /// Return the parallel transform - ParallelTransform& getParallelTransform() { - ASSERT1(transform != nullptr); + ParallelTransform& getParallelTransform() const { + ASSERT1(hasParallelTransform()); return *transform; } diff --git a/include/bout/field.hxx b/include/bout/field.hxx index c0693ec0fb..1107e3f204 100644 --- a/include/bout/field.hxx +++ b/include/bout/field.hxx @@ -86,6 +86,8 @@ public: std::string name; + bool isFci() const; + #if CHECK > 0 // Routines to test guard/boundary cells set @@ -677,7 +679,27 @@ inline T floor(const T& var, BoutReal f, const std::string& rgn = "RGN_ALL") { result[d] = f; } } - + if constexpr (bout::utils::is_Field3D()) { +#if BOUT_USE_FCI_AUTOMAGIC + if (var.isFci()) { + for (size_t i = 0; i < result.numberParallelSlices(); ++i) { + BOUT_FOR(d, result.yup(i).getRegion(rgn)) { + if (result.yup(i)[d] < f) { + result.yup(i)[d] = f; + } + } + BOUT_FOR(d, result.ydown(i).getRegion(rgn)) { + if (result.ydown(i)[d] < f) { + result.ydown(i)[d] = f; + } + } + } + } else +#endif + { + result.clearParallelSlices(); + } + } return result; } diff --git a/include/bout/field2d.hxx b/include/bout/field2d.hxx index 10b801ef8d..cd036c04ff 100644 --- a/include/bout/field2d.hxx +++ b/include/bout/field2d.hxx @@ -135,8 +135,11 @@ public: return *this; } - /// Check if this field has yup and ydown fields + /// Dummy functions to increase portability bool hasParallelSlices() const { return true; } + void calcParallelSlices() const {} + void clearParallelSlices() {} + int numberParallelSlices() { return 0; } Field2D& yup(std::vector::size_type UNUSED(index) = 0) { return *this; } const Field2D& yup(std::vector::size_type UNUSED(index) = 0) const { @@ -280,7 +283,7 @@ public: friend void swap(Field2D& first, Field2D& second) noexcept; - int size() const override { return nx * ny; }; + int size() const override { return nx * ny; } private: /// Internal data array. Handles allocation/freeing of memory diff --git a/include/bout/field3d.hxx b/include/bout/field3d.hxx index ba8c8e879e..f10bb47de7 100644 --- a/include/bout/field3d.hxx +++ b/include/bout/field3d.hxx @@ -261,27 +261,38 @@ public: #endif } + /// get number of parallel slices + size_t numberParallelSlices() const { + // Do checks + hasParallelSlices(); + return yup_fields.size(); + } + /// Check if this field has yup and ydown fields /// Return reference to yup field Field3D& yup(std::vector::size_type index = 0) { ASSERT2(index < yup_fields.size()); + ASSERT2(allow_parallel_slices); return yup_fields[index]; } /// Return const reference to yup field const Field3D& yup(std::vector::size_type index = 0) const { ASSERT2(index < yup_fields.size()); + ASSERT2(allow_parallel_slices); return yup_fields[index]; } /// Return reference to ydown field Field3D& ydown(std::vector::size_type index = 0) { ASSERT2(index < ydown_fields.size()); + ASSERT2(allow_parallel_slices); return ydown_fields[index]; } /// Return const reference to ydown field const Field3D& ydown(std::vector::size_type index = 0) const { ASSERT2(index < ydown_fields.size()); + ASSERT2(allow_parallel_slices); return ydown_fields[index]; } @@ -473,6 +484,11 @@ public: friend class Vector2D; Field3D& calcParallelSlices(); + void allowParallelSlices([[maybe_unused]] bool allow) { +#if CHECK > 0 + allow_parallel_slices = allow; +#endif + } void applyBoundary(bool init = false) override; void applyBoundary(BoutReal t); @@ -487,6 +503,7 @@ public: /// Note: does not just copy values in boundary region. void setBoundaryTo(const Field3D& f3d); + using FieldData::applyParallelBoundary; void applyParallelBoundary() override; void applyParallelBoundary(BoutReal t) override; void applyParallelBoundary(const std::string& condition) override; @@ -514,6 +531,8 @@ private: /// RegionID over which the field is valid std::optional regionID; + + bool allow_parallel_slices{true}; }; // Non-member overloaded operators @@ -656,4 +675,14 @@ bool operator==(const Field3D& a, const Field3D& b); /// Output a string describing a Field3D to a stream std::ostream& operator<<(std::ostream& out, const Field3D& value); +inline Field3D copy(const Field3D& f) { + Field3D result{f}; + result.allocate(); + for (size_t i = 0; i < result.numberParallelSlices(); ++i) { + result.yup(i).allocate(); + result.ydown(i).allocate(); + } + return result; +} + #endif /* BOUT_FIELD3D_H */ diff --git a/include/bout/field_data.hxx b/include/bout/field_data.hxx index 185dcabf2d..412d6320b9 100644 --- a/include/bout/field_data.hxx +++ b/include/bout/field_data.hxx @@ -43,7 +43,8 @@ class BoundaryOpPar; class Coordinates; class Mesh; -#include "bout/boundary_region.hxx" +class BoundaryRegion; +//#include "bout/boundary_region.hxx" class BoundaryRegionPar; enum class BndryLoc; diff --git a/include/bout/fv_ops.hxx b/include/bout/fv_ops.hxx index 94007a57a2..8a9baaf3e7 100644 --- a/include/bout/fv_ops.hxx +++ b/include/bout/fv_ops.hxx @@ -10,6 +10,7 @@ #include "bout/vector2d.hxx" #include "bout/utils.hxx" +#include #include namespace FV { @@ -192,6 +193,12 @@ template const Field3D Div_par(const Field3D& f_in, const Field3D& v_in, const Field3D& wave_speed_in, bool fixflux = true) { +#if BOUT_USE_FCI_AUTOMAGIC + if (f_in.isFci()) { + return ::Div_par(f_in, v_in); + } +#endif + ASSERT1_FIELDS_COMPATIBLE(f_in, v_in); ASSERT1_FIELDS_COMPATIBLE(f_in, wave_speed_in); diff --git a/include/bout/index_derivs_interface.hxx b/include/bout/index_derivs_interface.hxx index 8f7e41a68e..73b3dcb3ba 100644 --- a/include/bout/index_derivs_interface.hxx +++ b/include/bout/index_derivs_interface.hxx @@ -200,9 +200,19 @@ template T DDY(const T& f, CELL_LOC outloc = CELL_DEFAULT, const std::string& method = "DEFAULT", const std::string& region = "RGN_NOBNDRY") { AUTO_TRACE(); - if (f.hasParallelSlices()) { + if (f.isFci()) { ASSERT1(f.getDirectionY() == YDirectionType::Standard); - return standardDerivative(f, outloc, + T f_tmp = f; + if (!f.hasParallelSlices()) { +#if BOUT_USE_FCI_AUTOMAGIC + f_tmp.calcParallelSlices(); +#else + raise BoutException( + "parallel slices needed for parallel derivatives. Make sure to communicate and " + "apply parallel boundary conditions before calling derivative"); +#endif + } + return standardDerivative(f_tmp, outloc, method, region); } else { const bool is_unaligned = (f.getDirectionY() == YDirectionType::Standard); diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 52dc38f174..def8a60a3e 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -26,6 +26,15 @@ #include "bout/mask.hxx" +#define USE_NEW_WEIGHTS 1 +#if BOUT_HAS_PETSC +#define HS_USE_PETSC 1 +#endif + +#ifdef HS_USE_PETSC +#include "bout/petsclib.hxx" +#endif + class Options; /// Interpolate a field onto a perturbed set of points @@ -43,8 +52,7 @@ public: protected: Mesh* localmesh{nullptr}; - std::string region_name; - std::shared_ptr> region{nullptr}; + int region_id{-1}; public: XZInterpolation(int y_offset = 0, Mesh* localmeshIn = nullptr) @@ -52,48 +60,44 @@ public: localmesh(localmeshIn == nullptr ? bout::globals::mesh : localmeshIn) {} XZInterpolation(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) : XZInterpolation(y_offset, mesh) { - region = regionFromMask(mask, localmesh); + setMask(mask); } XZInterpolation(const std::string& region_name, int y_offset = 0, Mesh* mesh = nullptr) - : y_offset(y_offset), localmesh(mesh), region_name(region_name) {} - XZInterpolation(std::shared_ptr> region, int y_offset = 0, - Mesh* mesh = nullptr) - : y_offset(y_offset), localmesh(mesh), region(std::move(region)) {} + : y_offset(y_offset), localmesh(mesh), + region_id(localmesh->getRegionID(region_name)) {} + XZInterpolation(const Region& region, int y_offset = 0, Mesh* mesh = nullptr) + : y_offset(y_offset), localmesh(mesh) { + setRegion(region); + } virtual ~XZInterpolation() = default; - void setMask(const BoutMask& mask) { - region = regionFromMask(mask, localmesh); - region_name = ""; - } + void setMask(const BoutMask& mask) { setRegion(regionFromMask(mask, localmesh)); } void setRegion(const std::string& region_name) { - this->region_name = region_name; - this->region = nullptr; - } - void setRegion(const std::shared_ptr>& region) { - this->region_name = ""; - this->region = region; + this->region_id = localmesh->getRegionID(region_name); } + void setRegion(const std::unique_ptr> region) { setRegion(*region); } void setRegion(const Region& region) { - this->region_name = ""; - this->region = std::make_shared>(region); + std::string name; + int i = 0; + do { + name = fmt::format("unsec_reg_xz_interp_{:d}", i++); + } while (localmesh->hasRegion3D(name)); + localmesh->addRegion(name, region); + this->region_id = localmesh->getRegionID(name); } - Region getRegion() const { - if (!region_name.empty()) { - return localmesh->getRegion(region_name); - } - ASSERT1(region != nullptr); - return *region; + const Region& getRegion() const { + ASSERT2(region_id != -1); + return localmesh->getRegion(region_id); } - Region getRegion(const std::string& region) const { - const bool has_region = !region_name.empty() or this->region != nullptr; - if (!region.empty() and region != "RGN_ALL") { - if (has_region) { - return intersection(localmesh->getRegion(region), getRegion()); - } + const Region& getRegion(const std::string& region) const { + if (region_id == -1) { return localmesh->getRegion(region); } - ASSERT1(has_region); - return getRegion(); + if (region == "" or region == "RGN_ALL") { + return getRegion(); + } + return localmesh->getRegion( + localmesh->getCommonRegion(localmesh->getRegionID(region), region_id)); } virtual void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") = 0; @@ -134,8 +138,8 @@ protected: /// This is protected rather than private so that it can be /// extended and used by HermiteSplineMonotonic - Tensor i_corner; // x-index of bottom-left grid point - Tensor k_corner; // z-index of bottom-left grid point + Tensor> i_corner; // index of bottom-left grid point + Tensor k_corner; // z-index of bottom-left grid point // Basis functions for cubic Hermite spline interpolation // see http://en.wikipedia.org/wiki/Cubic_Hermite_spline @@ -152,12 +156,32 @@ protected: Field3D h10_z; Field3D h11_z; + std::vector newWeights; + +#if HS_USE_PETSC + PetscLib* petsclib; + bool isInit{false}; + Mat petscWeights; + Vec rhs, result; +#endif + public: XZHermiteSpline(Mesh* mesh = nullptr) : XZHermiteSpline(0, mesh) {} XZHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr); XZHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) : XZHermiteSpline(y_offset, mesh) { - region = regionFromMask(mask, localmesh); + setRegion(regionFromMask(mask, localmesh)); + } + ~XZHermiteSpline() { +#if HS_USE_PETSC + if (isInit) { + MatDestroy(&petscWeights); + VecDestroy(&rhs); + VecDestroy(&result); + isInit = false; + delete petsclib; + } +#endif } void calcWeights(const Field3D& delta_x, const Field3D& delta_z, @@ -188,11 +212,23 @@ public: /// problems most obviously occur. class XZMonotonicHermiteSpline : public XZHermiteSpline { public: - XZMonotonicHermiteSpline(Mesh* mesh = nullptr) : XZHermiteSpline(0, mesh) {} + XZMonotonicHermiteSpline(Mesh* mesh = nullptr) : XZHermiteSpline(0, mesh) { + if (localmesh->getNXPE() > 1) { + throw BoutException("Do not support MPI splitting in X"); + } + } XZMonotonicHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(y_offset, mesh) {} + : XZHermiteSpline(y_offset, mesh) { + if (localmesh->getNXPE() > 1) { + throw BoutException("Do not support MPI splitting in X"); + } + } XZMonotonicHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(mask, y_offset, mesh) {} + : XZHermiteSpline(mask, y_offset, mesh) { + if (localmesh->getNXPE() > 1) { + throw BoutException("Do not support MPI splitting in X"); + } + } using XZHermiteSpline::interpolate; /// Interpolate using precalculated weights. @@ -213,7 +249,7 @@ public: XZLagrange4pt(int y_offset = 0, Mesh* mesh = nullptr); XZLagrange4pt(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) : XZLagrange4pt(y_offset, mesh) { - region = regionFromMask(mask, localmesh); + setRegion(regionFromMask(mask, localmesh)); } void calcWeights(const Field3D& delta_x, const Field3D& delta_z, @@ -246,7 +282,7 @@ public: XZBilinear(int y_offset = 0, Mesh* mesh = nullptr); XZBilinear(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) : XZBilinear(y_offset, mesh) { - region = regionFromMask(mask, localmesh); + setRegion(regionFromMask(mask, localmesh)); } void calcWeights(const Field3D& delta_x, const Field3D& delta_z, diff --git a/include/bout/mask.hxx b/include/bout/mask.hxx index fd90ae7345..b1c094adb8 100644 --- a/include/bout/mask.hxx +++ b/include/bout/mask.hxx @@ -81,4 +81,13 @@ inline std::unique_ptr> regionFromMask(const BoutMask& mask, } return std::make_unique>(indices); } + +inline BoutMask maskFromRegion(const Region& region, const Mesh* mesh) { + BoutMask mask{mesh, false}; + //(int nx, int ny, int nz, bool value=false) : + + BOUT_FOR(i, region) { mask[i] = true; } + return mask; +} + #endif //BOUT_MASK_H diff --git a/include/bout/mesh.hxx b/include/bout/mesh.hxx index c80716fc12..c1a6a1336d 100644 --- a/include/bout/mesh.hxx +++ b/include/bout/mesh.hxx @@ -636,6 +636,19 @@ public: return inserted.first->second; } + std::shared_ptr + getCoordinatesConst(const CELL_LOC location = CELL_CENTRE) const { + ASSERT1(location != CELL_DEFAULT); + ASSERT1(location != CELL_VSHIFT); + + auto found = coords_map.find(location); + if (found != coords_map.end()) { + // True branch most common, returns immediately + return found->second; + } + throw BoutException("Coordinates not yet set. Use non-const version!"); + } + /// Returns the non-CELL_CENTRE location /// allowed as a staggered location CELL_LOC getAllowedStaggerLoc(DIRECTION direction) const { @@ -828,6 +841,16 @@ public: ASSERT1(RegionID.has_value()); return region3D[RegionID.value()]; } + bool isFci() const { + const auto coords = this->getCoordinatesConst(); + if (coords == nullptr) { + return false; + } + if (not coords->hasParallelTransform()) { + return false; + } + return not coords->getParallelTransform().canToFromFieldAligned(); + } private: /// Allocates default Coordinates objects diff --git a/include/bout/options.hxx b/include/bout/options.hxx index 4a32907b17..e51fb49da9 100644 --- a/include/bout/options.hxx +++ b/include/bout/options.hxx @@ -949,6 +949,9 @@ Tensor Options::as>(const Tensor& similar_t /// Convert \p value to string std::string toString(const Options& value); +/// Save the parallel fields +void saveParallel(Options& opt, const std::string name, const Field3D& tosave); + /// Output a stringified \p value to a stream /// /// This is templated to avoid implict casting: anything is diff --git a/include/bout/parallel_boundary_op.hxx b/include/bout/parallel_boundary_op.hxx index d8620e892b..9e551ebc17 100644 --- a/include/bout/parallel_boundary_op.hxx +++ b/include/bout/parallel_boundary_op.hxx @@ -49,7 +49,7 @@ protected: enum class ValueType { GEN, FIELD, REAL }; const ValueType value_type{ValueType::REAL}; - BoutReal getValue(const BoundaryRegionPar& bndry, BoutReal t); + BoutReal getValue(const BoundaryRegionParIter& bndry, BoutReal t); }; template @@ -95,12 +95,13 @@ public: auto dy = f.getCoordinates()->dy; - for (bndry->first(); !bndry->isDone(); bndry->next()) { - BoutReal value = getValue(*bndry, t); + for (auto pnt : *bndry) { + //for (bndry->first(); !bndry->isDone(); bndry->next()) { + BoutReal value = getValue(pnt, t); if (isNeumann) { - value *= dy[bndry->ind()]; + value *= dy[pnt.ind()]; } - static_cast(this)->apply_stencil(f, bndry, value); + static_cast(this)->apply_stencil(f, pnt, value); } } }; @@ -111,24 +112,27 @@ public: class BoundaryOpPar_dirichlet_o1 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { - bndry->dirichlet_o1(f, value); + static void apply_stencil(Field3D& f, const BoundaryRegionParIter& pnt, + BoutReal value) { + pnt.dirichlet_o1(f, value); } }; class BoundaryOpPar_dirichlet_o2 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { - bndry->dirichlet_o2(f, value); + static void apply_stencil(Field3D& f, const BoundaryRegionParIter& pnt, + BoutReal value) { + pnt.dirichlet_o2(f, value); } }; class BoundaryOpPar_dirichlet_o3 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { - bndry->dirichlet_o3(f, value); + static void apply_stencil(Field3D& f, const BoundaryRegionParIter& pnt, + BoutReal value) { + pnt.dirichlet_o3(f, value); } }; @@ -136,8 +140,9 @@ class BoundaryOpPar_neumann_o1 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { - bndry->neumann_o1(f, value); + static void apply_stencil(Field3D& f, const BoundaryRegionParIter& pnt, + BoutReal value) { + pnt.neumann_o1(f, value); } }; @@ -145,8 +150,9 @@ class BoundaryOpPar_neumann_o2 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { - bndry->neumann_o2(f, value); + static void apply_stencil(Field3D& f, const BoundaryRegionParIter& pnt, + BoutReal value) { + pnt.neumann_o2(f, value); } }; @@ -154,8 +160,9 @@ class BoundaryOpPar_neumann_o3 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { - bndry->neumann_o3(f, value); + static void apply_stencil(Field3D& f, const BoundaryRegionParIter& pnt, + BoutReal value) { + pnt.neumann_o3(f, value); } }; diff --git a/include/bout/parallel_boundary_region.hxx b/include/bout/parallel_boundary_region.hxx index 4d5278d00f..29a986f2a6 100644 --- a/include/bout/parallel_boundary_region.hxx +++ b/include/bout/parallel_boundary_region.hxx @@ -3,8 +3,10 @@ #include "bout/boundary_region.hxx" #include "bout/bout_types.hxx" +#include #include +#include "bout/sys/parallel_stencils.hxx" #include #include @@ -14,99 +16,41 @@ * */ -namespace parallel_stencil { -// generated by src/mesh/parallel_boundary_stencil.cxx.py -inline BoutReal pow(BoutReal val, int exp) { - // constexpr int expval = exp; - // static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3"); - if (exp == 2) { - return val * val; - } - ASSERT3(exp == 3); - return val * val * val; -} -inline BoutReal dirichlet_o1(BoutReal UNUSED(spacing0), BoutReal value0) { - return value0; -} -inline BoutReal dirichlet_o2(BoutReal spacing0, BoutReal value0, BoutReal spacing1, - BoutReal value1) { - return (spacing0 * value1 - spacing1 * value0) / (spacing0 - spacing1); -} -inline BoutReal neumann_o2(BoutReal UNUSED(spacing0), BoutReal value0, BoutReal spacing1, - BoutReal value1) { - return -spacing1 * value0 + value1; -} -inline BoutReal dirichlet_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, - BoutReal value1, BoutReal spacing2, BoutReal value2) { - return (pow(spacing0, 2) * spacing1 * value2 - pow(spacing0, 2) * spacing2 * value1 - - spacing0 * pow(spacing1, 2) * value2 + spacing0 * pow(spacing2, 2) * value1 - + pow(spacing1, 2) * spacing2 * value0 - spacing1 * pow(spacing2, 2) * value0) - / ((spacing0 - spacing1) * (spacing0 - spacing2) * (spacing1 - spacing2)); -} -inline BoutReal neumann_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, - BoutReal value1, BoutReal spacing2, BoutReal value2) { - return (2 * spacing0 * spacing1 * value2 - 2 * spacing0 * spacing2 * value1 - + pow(spacing1, 2) * spacing2 * value0 - pow(spacing1, 2) * value2 - - spacing1 * pow(spacing2, 2) * value0 + pow(spacing2, 2) * value1) - / ((spacing1 - spacing2) * (2 * spacing0 - spacing1 - spacing2)); -} -} // namespace parallel_stencil +namespace bout { +namespace parallel_boundary_region { -class BoundaryRegionPar : public BoundaryRegionBase { +struct RealPoint { + BoutReal s_x; + BoutReal s_y; + BoutReal s_z; +}; - struct RealPoint { - BoutReal s_x; - BoutReal s_y; - BoutReal s_z; - }; - - struct Indices { - // Indices of the boundary point - Ind3D index; - // Intersection with boundary in index space - RealPoint intersection; - // Distance to intersection - BoutReal length; - // Angle between field line and boundary - // BoutReal angle; - // How many points we can go in the opposite direction - signed char valid; - }; - - using IndicesVec = std::vector; - using IndicesIter = IndicesVec::iterator; +struct Indices { + // Indices of the boundary point + Ind3D index; + // Intersection with boundary in index space + RealPoint intersection; + // Distance to intersection + BoutReal length; + // Angle between field line and boundary + // BoutReal angle; + // How many points we can go in the opposite direction + signed char valid; +}; - /// Vector of points in the boundary - IndicesVec bndry_points; - /// Current position in the boundary points - IndicesIter bndry_position; +using IndicesVec = std::vector; +using IndicesIter = IndicesVec::iterator; +using IndicesIterConst = IndicesVec::const_iterator; -public: - BoundaryRegionPar(const std::string& name, int dir, Mesh* passmesh) - : BoundaryRegionBase(name, passmesh), dir(dir) { - ASSERT0(std::abs(dir) == 1); - BoundaryRegionBase::isParallel = true; - } - BoundaryRegionPar(const std::string& name, BndryLoc loc, int dir, Mesh* passmesh) - : BoundaryRegionBase(name, loc, passmesh), dir(dir) { - BoundaryRegionBase::isParallel = true; - ASSERT0(std::abs(dir) == 1); - } - - /// Add a point to the boundary - void add_point(Ind3D ind, BoutReal x, BoutReal y, BoutReal z, BoutReal length, - signed char valid) { - bndry_points.push_back({ind, {x, y, z}, length, valid}); - } - void add_point(int ix, int iy, int iz, BoutReal x, BoutReal y, BoutReal z, - BoutReal length, signed char valid) { - bndry_points.push_back({xyz2ind(ix, iy, iz, localmesh), {x, y, z}, length, valid}); - } +//} - // final, so they can be inlined - void first() final { bndry_position = begin(bndry_points); } - void next() final { ++bndry_position; } - bool isDone() final { return (bndry_position == end(bndry_points)); } +template +class BoundaryRegionParIterBase { +public: + BoundaryRegionParIterBase(IndicesVec& bndry_points, IndicesIter bndry_position, int dir, + Mesh* localmesh) + : bndry_points(bndry_points), bndry_position(bndry_position), dir(dir), + localmesh(localmesh){}; // getter Ind3D ind() const { return bndry_position->index; } @@ -116,23 +60,73 @@ public: BoutReal length() const { return bndry_position->length; } signed char valid() const { return bndry_position->valid; } - // setter - void setValid(signed char val) { bndry_position->valid = val; } + // extrapolate a given point to the boundary + BoutReal extrapolate_sheath_o1(const Field3D& f) const { return f[ind()]; } + BoutReal extrapolate_sheath_o2(const Field3D& f) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return extrapolate_sheath_o1(f); + } + return f[ind()] * (1 + length()) - f.ynext(-dir)[ind().yp(-dir)] * length(); + } + inline BoutReal + extrapolate_sheath_o1(const std::function& f) const { + return f(0, ind()); + } + inline BoutReal + extrapolate_sheath_o2(const std::function& f) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return extrapolate_sheath_o1(f); + } + return f(0, ind()) * (1 + length()) - f(-dir, ind().yp(-dir)) * length(); + } - bool contains(const BoundaryRegionPar& bndry) const { - return std::binary_search( - begin(bndry_points), end(bndry_points), *bndry.bndry_position, - [](const Indices& i1, const Indices& i2) { return i1.index < i2.index; }); + inline BoutReal interpolate_sheath(const Field3D& f) const { + return f[ind()] * (1 - length()) + ynext(f) * length(); } - // extrapolate a given point to the boundary - BoutReal extrapolate_o1(const Field3D& f) const { return f[ind()]; } - BoutReal extrapolate_o2(const Field3D& f) const { + inline BoutReal extrapolate_next_o1(const Field3D& f) const { return f[ind()]; } + inline BoutReal extrapolate_next_o2(const Field3D& f) const { ASSERT3(valid() >= 0); if (valid() < 1) { - return extrapolate_o1(f); + return extrapolate_next_o1(f); } - return f[ind()] * (1 + length()) - f.ynext(-dir)[ind().yp(-dir)] * length(); + return f[ind()] * 2 - f.ynext(-dir)[ind().yp(-dir)]; + } + + inline BoutReal + extrapolate_next_o1(const std::function& f) const { + return f(0, ind()); + } + inline BoutReal + extrapolate_next_o2(const std::function& f) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return extrapolate_sheath_o1(f); + } + return f(0, ind()) * 2 - f(-dir, ind().yp(-dir)); + } + + // extrapolate the gradient into the boundary + inline BoutReal extrapolate_grad_o1(const Field3D& f) const { return 0; } + inline BoutReal extrapolate_grad_o2(const Field3D& f) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return extrapolate_grad_o1(f); + } + return f[ind()] - f.ynext(-dir)[ind().yp(-dir)]; + } + + BoundaryRegionParIterBase& operator*() { return *this; } + + BoundaryRegionParIterBase& operator++() { + ++bndry_position; + return *this; + } + + bool operator!=(const BoundaryRegionParIterBase& rhs) { + return bndry_position != rhs.bndry_position; } // dirichlet boundary code @@ -185,16 +179,100 @@ public: parallel_stencil::neumann_o3(1 - length(), value, 1, f[ind()], 2, yprev(f)); } - const int dir; + // BoutReal get(const Field3D& f, int off) + const BoutReal& ynext(const Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } + BoutReal& ynext(Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } + + const BoutReal& yprev(const Field3D& f) const { + ASSERT3(valid() > 0); + return f.ynext(-dir)[ind().yp(-dir)]; + } + BoutReal& yprev(Field3D& f) const { + ASSERT3(valid() > 0); + return f.ynext(-dir)[ind().yp(-dir)]; + } private: + const IndicesVec& bndry_points; + IndicesIter bndry_position; + constexpr static BoutReal small_value = 1e-2; - // BoutReal get(const Field3D& f, int off) - const BoutReal& ynext(const Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } - BoutReal& ynext(Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } - const BoutReal& yprev(const Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; } - BoutReal& yprev(Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; } +public: + const int dir; + Mesh* localmesh; +}; +} // namespace parallel_boundary_region +} // namespace bout +using BoundaryRegionParIter = bout::parallel_boundary_region::BoundaryRegionParIterBase< + bout::parallel_boundary_region::IndicesVec, + bout::parallel_boundary_region::IndicesIter>; +using BoundaryRegionParIterConst = + bout::parallel_boundary_region::BoundaryRegionParIterBase< + const bout::parallel_boundary_region::IndicesVec, + bout::parallel_boundary_region::IndicesIterConst>; + +class BoundaryRegionPar : public BoundaryRegionBase { +public: + BoundaryRegionPar(const std::string& name, int dir, Mesh* passmesh) + : BoundaryRegionBase(name, passmesh), dir(dir) { + ASSERT0(std::abs(dir) == 1); + BoundaryRegionBase::isParallel = true; + } + BoundaryRegionPar(const std::string& name, BndryLoc loc, int dir, Mesh* passmesh) + : BoundaryRegionBase(name, loc, passmesh), dir(dir) { + BoundaryRegionBase::isParallel = true; + ASSERT0(std::abs(dir) == 1); + } + + /// Add a point to the boundary + void add_point(Ind3D ind, BoutReal x, BoutReal y, BoutReal z, BoutReal length, + char valid) { + bndry_points.push_back({ind, {x, y, z}, length, valid}); + } + void add_point(int ix, int iy, int iz, BoutReal x, BoutReal y, BoutReal z, + BoutReal length, char valid) { + bndry_points.push_back({xyz2ind(ix, iy, iz, localmesh), {x, y, z}, length, valid}); + } + + // final, so they can be inlined + void first() final { bndry_position = std::begin(bndry_points); } + void next() final { ++bndry_position; } + bool isDone() final { return (bndry_position == std::end(bndry_points)); } + + bool contains(const BoundaryRegionPar& bndry) const { + return std::binary_search(std::begin(bndry_points), std::end(bndry_points), + *bndry.bndry_position, + [](const bout::parallel_boundary_region::Indices& i1, + const bout::parallel_boundary_region::Indices& i2) { + return i1.index < i2.index; + }); + } + + // setter + void setValid(char val) { bndry_position->valid = val; } + + // BoundaryRegionParIterConst begin() const { + // return BoundaryRegionParIterConst(bndry_points, bndry_points.begin(), dir); + // } + // BoundaryRegionParIterConst end() const { + // return BoundaryRegionParIterConst(bndry_points, bndry_points.begin(), dir); + // } + BoundaryRegionParIter begin() { + return BoundaryRegionParIter(bndry_points, bndry_points.begin(), dir, localmesh); + } + BoundaryRegionParIter end() { + return BoundaryRegionParIter(bndry_points, bndry_points.end(), dir, localmesh); + } + + const int dir; + +private: + /// Vector of points in the boundary + bout::parallel_boundary_region::IndicesVec bndry_points; + /// Current position in the boundary points + bout::parallel_boundary_region::IndicesIter bndry_position; + static Ind3D xyz2ind(int x, int y, int z, Mesh* mesh) { const int ny = mesh->LocalNy; const int nz = mesh->LocalNz; diff --git a/include/bout/sys/parallel_stencils.hxx b/include/bout/sys/parallel_stencils.hxx new file mode 100644 index 0000000000..34a51c5285 --- /dev/null +++ b/include/bout/sys/parallel_stencils.hxx @@ -0,0 +1,39 @@ +#pragma once + +namespace parallel_stencil { +// generated by src/mesh/parallel_boundary_stencil.cxx.py +inline BoutReal pow(BoutReal val, int exp) { + // constexpr int expval = exp; + // static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3"); + if (exp == 2) { + return val * val; + } + ASSERT3(exp == 3); + return val * val * val; +} +inline BoutReal dirichlet_o1(BoutReal UNUSED(spacing0), BoutReal value0) { + return value0; +} +inline BoutReal dirichlet_o2(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1) { + return (spacing0 * value1 - spacing1 * value0) / (spacing0 - spacing1); +} +inline BoutReal neumann_o2(BoutReal UNUSED(spacing0), BoutReal value0, BoutReal spacing1, + BoutReal value1) { + return -spacing1 * value0 + value1; +} +inline BoutReal dirichlet_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1, BoutReal spacing2, BoutReal value2) { + return (pow(spacing0, 2) * spacing1 * value2 - pow(spacing0, 2) * spacing2 * value1 + - spacing0 * pow(spacing1, 2) * value2 + spacing0 * pow(spacing2, 2) * value1 + + pow(spacing1, 2) * spacing2 * value0 - spacing1 * pow(spacing2, 2) * value0) + / ((spacing0 - spacing1) * (spacing0 - spacing2) * (spacing1 - spacing2)); +} +inline BoutReal neumann_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1, BoutReal spacing2, BoutReal value2) { + return (2 * spacing0 * spacing1 * value2 - 2 * spacing0 * spacing2 * value1 + + pow(spacing1, 2) * spacing2 * value0 - pow(spacing1, 2) * value2 + - spacing1 * pow(spacing2, 2) * value0 + pow(spacing2, 2) * value1) + / ((spacing1 - spacing2) * (2 * spacing0 - spacing1 - spacing2)); +} +} // namespace parallel_stencil diff --git a/manual/sphinx/user_docs/boundary_options.rst b/manual/sphinx/user_docs/boundary_options.rst index 57c6658891..826f873dc1 100644 --- a/manual/sphinx/user_docs/boundary_options.rst +++ b/manual/sphinx/user_docs/boundary_options.rst @@ -147,8 +147,9 @@ shifted``, see :ref:`sec-shifted-metric`), the recommended method is to apply boundary conditions directly to the ``yup`` and ``ydown`` parallel slices. This can be done by setting ``bndry_par_yup`` and ``bndry_par_ydown``, or ``bndry_par_all`` to set both at once. The -possible values are ``parallel_dirichlet``, ``parallel_dirichlet_O3`` -and ``parallel_neumann``. The stencils used are the same as for the +possible values are ``parallel_dirichlet_o1``, ``parallel_dirichlet_o2``, +``parallel_dirichlet_o3``, ``parallel_neumann_o1``, ``parallel_neumann_o2`` +and ``parallel_neumann_o3``. The stencils used are the same as for the standard boundary conditions without the ``parallel_`` prefix, but are applied directly to parallel slices. The boundary condition can only be applied after the parallel slices are calculated, which is usually @@ -168,7 +169,7 @@ For example, for an evolving variable ``f``, put a section in the [f] bndry_xin = dirichlet bndry_xout = dirichlet - bndry_par_all = parallel_neumann + bndry_par_all = parallel_neumann_o2 bndry_ydown = none bndry_yup = none @@ -278,7 +279,7 @@ cells of the base variable. For example, for an evolving variable [f] bndry_xin = dirichlet bndry_xout = dirichlet - bndry_par_all = parallel_dirichlet + bndry_par_all = parallel_dirichlet_o2 bndry_ydown = none bndry_yup = none @@ -289,7 +290,7 @@ communication, while the perpendicular ones before: f.applyBoundary(); mesh->communicate(f); - f.applyParallelBoundary("parallel_neumann"); + f.applyParallelBoundary("parallel_neumann_o2"); Note that during grid generation care has to be taken to ensure that there are no "short" connection lengths. Otherwise it can happen that for a point on a diff --git a/src/field/field.cxx b/src/field/field.cxx index e48a8f3ef7..c9373454bf 100644 --- a/src/field/field.cxx +++ b/src/field/field.cxx @@ -39,3 +39,14 @@ int Field::getNx() const { return getMesh()->LocalNx; } int Field::getNy() const { return getMesh()->LocalNy; } int Field::getNz() const { return getMesh()->LocalNz; } + +bool Field::isFci() const { + const auto coords = this->getCoordinates(); + if (coords == nullptr) { + return false; + } + if (not coords->hasParallelTransform()) { + return false; + } + return not coords->getParallelTransform().canToFromFieldAligned(); +} diff --git a/src/field/field3d.cxx b/src/field/field3d.cxx index 4ed9641f44..89815e5899 100644 --- a/src/field/field3d.cxx +++ b/src/field/field3d.cxx @@ -91,6 +91,15 @@ Field3D::Field3D(const BoutReal val, Mesh* localmesh) : Field3D(localmesh) { TRACE("Field3D: Copy constructor from value"); *this = val; +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci()) { + splitParallelSlices(); + for (size_t i = 0; i < numberParallelSlices(); ++i) { + yup(i) = *this; + ydown(i) = *this; + } + } +#endif } Field3D::Field3D(Array data_in, Mesh* localmesh, CELL_LOC datalocation, @@ -137,6 +146,7 @@ BOUT_HOST_DEVICE Field3D* Field3D::timeDeriv() { void Field3D::splitParallelSlices() { TRACE("Field3D::splitParallelSlices"); + ASSERT2(allow_parallel_slices); if (hasParallelSlices()) { return; @@ -163,6 +173,7 @@ void Field3D::clearParallelSlices() { const Field3D& Field3D::ynext(int dir) const { #if CHECK > 0 + ASSERT2(allow_parallel_slices); // Asked for more than yguards if (std::abs(dir) > fieldmesh->ystart) { throw BoutException( @@ -342,7 +353,13 @@ Field3D& Field3D::operator=(const BoutReal val) { } Field3D& Field3D::calcParallelSlices() { + ASSERT2(allow_parallel_slices); getCoordinates()->getParallelTransform().calcParallelSlices(*this); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci()) { + this->applyParallelBoundary("parallel_neumann_o2"); + } +#endif return *this; } diff --git a/src/field/gen_fieldops.jinja b/src/field/gen_fieldops.jinja index ecd4e628cc..dede7d120f 100644 --- a/src/field/gen_fieldops.jinja +++ b/src/field/gen_fieldops.jinja @@ -12,6 +12,15 @@ {% if lhs == rhs == "Field3D" %} {{out.name}}.setRegion({{lhs.name}}.getMesh()->getCommonRegion({{lhs.name}}.getRegionID(), {{rhs.name}}.getRegionID())); +#if BOUT_USE_FCI_AUTOMAGIC + if ({{lhs.name}}.isFci() and {{lhs.name}}.hasParallelSlices() and {{rhs.name}}.hasParallelSlices()) { + {{out.name}}.splitParallelSlices(); + for (size_t i{0} ; i < {{lhs.name}}.numberParallelSlices() ; ++i) { + {{out.name}}.yup(i) = {{lhs.name}}.yup(i) {{operator}} {{rhs.name}}.yup(i); + {{out.name}}.ydown(i) = {{lhs.name}}.ydown(i) {{operator}} {{rhs.name}}.ydown(i); + } + } +#endif {% elif lhs == "Field3D" %} {{out.name}}.setRegion({{lhs.name}}.getRegionID()); {% elif rhs == "Field3D" %} @@ -78,7 +87,17 @@ {% if (lhs == "Field3D") %} // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices() {% if rhs == "Field3D" %} and {{rhs.name}}.hasParallelSlices() {% endif %}) { + for (size_t i{0} ; i < yup_fields.size() ; ++i) { + yup(i) {{operator}}= {{rhs.name}}{% if rhs == "Field3D" %}.yup(i){% endif %}; + ydown(i) {{operator}}= {{rhs.name}}{% if rhs == "Field3D" %}.ydown(i){% endif %}; + } + } else +#endif + { + clearParallelSlices(); + } {% endif %} checkData(*this); diff --git a/src/field/generated_fieldops.cxx b/src/field/generated_fieldops.cxx index 6b778acee3..74b319e314 100644 --- a/src/field/generated_fieldops.cxx +++ b/src/field/generated_fieldops.cxx @@ -15,6 +15,15 @@ Field3D operator*(const Field3D& lhs, const Field3D& rhs) { checkData(rhs); result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); +#if BOUT_USE_FCI_AUTOMAGIC + if (lhs.isFci() and lhs.hasParallelSlices() and rhs.hasParallelSlices()) { + result.splitParallelSlices(); + for (size_t i{0}; i < lhs.numberParallelSlices(); ++i) { + result.yup(i) = lhs.yup(i) * rhs.yup(i); + result.ydown(i) = lhs.ydown(i) * rhs.ydown(i); + } + } +#endif BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs[index]; @@ -33,7 +42,17 @@ Field3D& Field3D::operator*=(const Field3D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices() and rhs.hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) *= rhs.yup(i); + ydown(i) *= rhs.ydown(i); + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -59,6 +78,15 @@ Field3D operator/(const Field3D& lhs, const Field3D& rhs) { checkData(rhs); result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); +#if BOUT_USE_FCI_AUTOMAGIC + if (lhs.isFci() and lhs.hasParallelSlices() and rhs.hasParallelSlices()) { + result.splitParallelSlices(); + for (size_t i{0}; i < lhs.numberParallelSlices(); ++i) { + result.yup(i) = lhs.yup(i) / rhs.yup(i); + result.ydown(i) = lhs.ydown(i) / rhs.ydown(i); + } + } +#endif BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] / rhs[index]; @@ -77,7 +105,17 @@ Field3D& Field3D::operator/=(const Field3D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices() and rhs.hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) /= rhs.yup(i); + ydown(i) /= rhs.ydown(i); + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -103,6 +141,15 @@ Field3D operator+(const Field3D& lhs, const Field3D& rhs) { checkData(rhs); result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); +#if BOUT_USE_FCI_AUTOMAGIC + if (lhs.isFci() and lhs.hasParallelSlices() and rhs.hasParallelSlices()) { + result.splitParallelSlices(); + for (size_t i{0}; i < lhs.numberParallelSlices(); ++i) { + result.yup(i) = lhs.yup(i) + rhs.yup(i); + result.ydown(i) = lhs.ydown(i) + rhs.ydown(i); + } + } +#endif BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs[index]; @@ -121,7 +168,17 @@ Field3D& Field3D::operator+=(const Field3D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices() and rhs.hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) += rhs.yup(i); + ydown(i) += rhs.ydown(i); + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -147,6 +204,15 @@ Field3D operator-(const Field3D& lhs, const Field3D& rhs) { checkData(rhs); result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); +#if BOUT_USE_FCI_AUTOMAGIC + if (lhs.isFci() and lhs.hasParallelSlices() and rhs.hasParallelSlices()) { + result.splitParallelSlices(); + for (size_t i{0}; i < lhs.numberParallelSlices(); ++i) { + result.yup(i) = lhs.yup(i) - rhs.yup(i); + result.ydown(i) = lhs.ydown(i) - rhs.ydown(i); + } + } +#endif BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs[index]; @@ -165,7 +231,17 @@ Field3D& Field3D::operator-=(const Field3D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices() and rhs.hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) -= rhs.yup(i); + ydown(i) -= rhs.ydown(i); + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -214,7 +290,17 @@ Field3D& Field3D::operator*=(const Field2D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) *= rhs; + ydown(i) *= rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -267,7 +353,17 @@ Field3D& Field3D::operator/=(const Field2D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) /= rhs; + ydown(i) /= rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -320,7 +416,17 @@ Field3D& Field3D::operator+=(const Field2D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) += rhs; + ydown(i) += rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -372,7 +478,17 @@ Field3D& Field3D::operator-=(const Field2D& rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) -= rhs; + ydown(i) -= rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -497,7 +613,17 @@ Field3D& Field3D::operator*=(const BoutReal rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) *= rhs; + ydown(i) *= rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -538,7 +664,17 @@ Field3D& Field3D::operator/=(const BoutReal rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) /= rhs; + ydown(i) /= rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -579,7 +715,17 @@ Field3D& Field3D::operator+=(const BoutReal rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) += rhs; + ydown(i) += rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); @@ -619,7 +765,17 @@ Field3D& Field3D::operator-=(const BoutReal rhs) { // Delete existing parallel slices. We don't copy parallel slices, so any // that currently exist will be incorrect. - clearParallelSlices(); +#if BOUT_USE_FCI_AUTOMAGIC + if (this->isFci() and this->hasParallelSlices()) { + for (size_t i{0}; i < yup_fields.size(); ++i) { + yup(i) -= rhs; + ydown(i) -= rhs; + } + } else +#endif + { + clearParallelSlices(); + } checkData(*this); checkData(rhs); diff --git a/src/invert/laplace/invert_laplace.cxx b/src/invert/laplace/invert_laplace.cxx index 4032499781..bd839256c3 100644 --- a/src/invert/laplace/invert_laplace.cxx +++ b/src/invert/laplace/invert_laplace.cxx @@ -226,10 +226,10 @@ Field3D Laplacian::solve(const Field3D& b, const Field3D& x0) { // Setting the start and end range of the y-slices int ys = localmesh->ystart, ye = localmesh->yend; - if (localmesh->hasBndryLowerY() && include_yguards) { + if (include_yguards && localmesh->hasBndryLowerY()) { ys = 0; // Mesh contains a lower boundary } - if (localmesh->hasBndryUpperY() && include_yguards) { + if (include_yguards && localmesh->hasBndryUpperY()) { ye = localmesh->LocalNy - 1; // Contains upper boundary } diff --git a/src/mesh/boundary_factory.cxx b/src/mesh/boundary_factory.cxx index 00282566a9..d112a216ad 100644 --- a/src/mesh/boundary_factory.cxx +++ b/src/mesh/boundary_factory.cxx @@ -318,7 +318,7 @@ BoundaryOpBase* BoundaryFactory::createFromOptions(const string& varname, /// Then (all, all) if (region->isParallel) { // Different default for parallel boundary regions - varOpts->get(prefix + "par_all", set, "parallel_dirichlet"); + varOpts->get(prefix + "par_all", set, "parallel_dirichlet_o2"); } else { varOpts->get(prefix + "all", set, "dirichlet"); } diff --git a/src/mesh/coordinates.cxx b/src/mesh/coordinates.cxx index 4e515449ca..6892063102 100644 --- a/src/mesh/coordinates.cxx +++ b/src/mesh/coordinates.cxx @@ -942,6 +942,13 @@ const Field2D& Coordinates::zlength() const { int Coordinates::geometry(bool recalculate_staggered, bool force_interpolate_from_centre) { TRACE("Coordinates::geometry"); + { + std::vector fields{dx, dy, dz, g11, g22, g33, g12, g13, + g23, g_11, g_22, g_33, g_12, g_13, g_23, J}; + for (auto& f : fields) { + f.allowParallelSlices(false); + } + } communicate(dx, dy, dz, g11, g22, g33, g12, g13, g23, g_11, g_22, g_33, g_12, g_13, g_23, J, Bxy); diff --git a/src/mesh/fv_ops.cxx b/src/mesh/fv_ops.cxx index cd5b924e9e..c35d6e0d40 100644 --- a/src/mesh/fv_ops.cxx +++ b/src/mesh/fv_ops.cxx @@ -72,7 +72,7 @@ Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) { if (!coord->g23.hasParallelSlices() || !coord->g_23.hasParallelSlices() || !coord->dy.hasParallelSlices() || !coord->dz.hasParallelSlices() || !coord->Bxy.hasParallelSlices() || !coord->J.hasParallelSlices()) { - throw BoutException("metrics have no yup/down: Maybe communicate in init?"); + throw BoutException("metrics have no yup/down!"); } } @@ -82,11 +82,11 @@ Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) { // Values on this y slice (centre). // This is needed because toFieldAligned may modify the field - const auto f_slice = makeslices(fci, f); - const auto a_slice = makeslices(fci, a); + const auto f_slice = makeslices(false, f); + const auto a_slice = makeslices(false, a); // Only in 3D case with FCI do the metrics have parallel slices - const bool metric_fci = fci and bout::build::use_metric_3d; + const bool metric_fci = a.isFci() and bout::build::use_metric_3d; const auto g23 = makeslices(metric_fci, coord->g23); const auto g_23 = makeslices(metric_fci, coord->g_23); const auto J = makeslices(metric_fci, coord->J); @@ -96,9 +96,7 @@ Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) { // Result of the Y and Z fluxes Field3D yzresult(0.0, mesh); - if (!fci) { - yzresult.setDirectionY(YDirectionType::Aligned); - } + yzresult.setDirectionY(YDirectionType::Aligned); // Y flux @@ -169,12 +167,7 @@ Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) { } } - // Check if we need to transform back - if (fci) { - result += yzresult; - } else { - result += fromFieldAligned(yzresult); - } + result += fromFieldAligned(yzresult); return result; } @@ -183,6 +176,10 @@ const Field3D Div_par_K_Grad_par(const Field3D& Kin, const Field3D& fin, bool bndry_flux) { TRACE("FV::Div_par_K_Grad_par"); + if (Kin.isFci()) { + return ::Div_par_K_Grad_par(Kin, fin); + } + ASSERT2(Kin.getLocation() == fin.getLocation()); Mesh* mesh = Kin.getMesh(); diff --git a/src/mesh/impls/bout/boutmesh.cxx b/src/mesh/impls/bout/boutmesh.cxx index 16061cd47e..0c4bf2bc27 100644 --- a/src/mesh/impls/bout/boutmesh.cxx +++ b/src/mesh/impls/bout/boutmesh.cxx @@ -49,6 +49,9 @@ #include #include +//#include "bout/boundary_region.hxx" +//#include "bout/parallel_boundary_region.hxx" + #include #include #include @@ -2814,6 +2817,9 @@ void BoutMesh::addBoundaryRegions() { } RangeIterator BoutMesh::iterateBndryLowerInnerY() const { + if (this->isFci()) { + throw BoutException("FCI should never use this iterator"); + } int xs = 0; int xe = LocalNx - 1; @@ -2849,6 +2855,9 @@ RangeIterator BoutMesh::iterateBndryLowerInnerY() const { } RangeIterator BoutMesh::iterateBndryLowerOuterY() const { + if (this->isFci()) { + throw BoutException("FCI should never use this iterator"); + } int xs = 0; int xe = LocalNx - 1; @@ -2883,6 +2892,10 @@ RangeIterator BoutMesh::iterateBndryLowerOuterY() const { } RangeIterator BoutMesh::iterateBndryLowerY() const { + if (this->isFci()) { + throw BoutException("FCI should never use this iterator"); + } + int xs = 0; int xe = LocalNx - 1; if ((DDATA_INDEST >= 0) && (DDATA_XSPLIT > xstart)) { @@ -2912,6 +2925,10 @@ RangeIterator BoutMesh::iterateBndryLowerY() const { } RangeIterator BoutMesh::iterateBndryUpperInnerY() const { + if (this->isFci()) { + throw BoutException("FCI should never use this iterator"); + } + int xs = 0; int xe = LocalNx - 1; @@ -2946,6 +2963,10 @@ RangeIterator BoutMesh::iterateBndryUpperInnerY() const { } RangeIterator BoutMesh::iterateBndryUpperOuterY() const { + if (this->isFci()) { + throw BoutException("FCI should never use this iterator"); + } + int xs = 0; int xe = LocalNx - 1; @@ -2980,6 +3001,10 @@ RangeIterator BoutMesh::iterateBndryUpperOuterY() const { } RangeIterator BoutMesh::iterateBndryUpperY() const { + if (this->isFci()) { + throw BoutException("FCI should never use this iterator"); + } + int xs = 0; int xe = LocalNx - 1; if ((UDATA_INDEST >= 0) && (UDATA_XSPLIT > xstart)) { diff --git a/src/mesh/interpolation/bilinear_xz.cxx b/src/mesh/interpolation/bilinear_xz.cxx index 8445764a8f..4facdac34c 100644 --- a/src/mesh/interpolation/bilinear_xz.cxx +++ b/src/mesh/interpolation/bilinear_xz.cxx @@ -31,6 +31,10 @@ XZBilinear::XZBilinear(int y_offset, Mesh* mesh) : XZInterpolation(y_offset, mesh), w0(localmesh), w1(localmesh), w2(localmesh), w3(localmesh) { + if (localmesh->getNXPE() > 1) { + throw BoutException("Do not support MPI splitting in X"); + } + // Index arrays contain guard cells in order to get subscripts right i_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); k_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index a8e5df7cdf..1973ff56cd 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -20,15 +20,89 @@ * **************************************************************************/ +#include "../impls/bout/boutmesh.hxx" #include "bout/globals.hxx" #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" -#include "bout/mesh.hxx" #include -XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* mesh) - : XZInterpolation(y_offset, mesh), h00_x(localmesh), h01_x(localmesh), +class IndConverter { +public: + IndConverter(Mesh* mesh) + : mesh(dynamic_cast(mesh)), nxpe(mesh->getNXPE()), nype(mesh->getNYPE()), + xstart(mesh->xstart), ystart(mesh->ystart), zstart(0), + lnx(mesh->LocalNx - 2 * xstart), lny(mesh->LocalNy - 2 * ystart), + lnz(mesh->LocalNz - 2 * zstart) {} + // ix and iy are global indices + // iy is local + int fromMeshToGlobal(int ix, int iy, int iz) { + const int xstart = mesh->xstart; + const int lnx = mesh->LocalNx - xstart * 2; + // x-proc-id + int pex = divToNeg(ix - xstart, lnx); + if (pex < 0) { + pex = 0; + } + if (pex >= nxpe) { + pex = nxpe - 1; + } + const int zstart = 0; + const int lnz = mesh->LocalNz - zstart * 2; + // z-proc-id + // pez only for wrapping around ; later needs similar treatment than pey + const int pez = divToNeg(iz - zstart, lnz); + // y proc-id - y is already local + const int ystart = mesh->ystart; + const int lny = mesh->LocalNy - ystart * 2; + const int pey_offset = divToNeg(iy - ystart, lny); + int pey = pey_offset + mesh->getYProcIndex(); + while (pey < 0) { + pey += nype; + } + while (pey >= nype) { + pey -= nype; + } + ASSERT2(pex >= 0); + ASSERT2(pex < nxpe); + ASSERT2(pey >= 0); + ASSERT2(pey < nype); + return fromLocalToGlobal(ix - pex * lnx, iy - pey_offset * lny, iz - pez * lnz, pex, + pey, 0); + } + int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz) { + return fromLocalToGlobal(ilocalx, ilocaly, ilocalz, mesh->getXProcIndex(), + mesh->getYProcIndex(), 0); + } + int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz, + const int pex, const int pey, const int pez) { + ASSERT3(ilocalx >= 0); + ASSERT3(ilocaly >= 0); + ASSERT3(ilocalz >= 0); + const int ilocal = ((ilocalx * mesh->LocalNy) + ilocaly) * mesh->LocalNz + ilocalz; + const int ret = ilocal + + mesh->LocalNx * mesh->LocalNy * mesh->LocalNz + * ((pey * nxpe + pex) * nzpe + pez); + ASSERT3(ret >= 0); + ASSERT3(ret < nxpe * nype * mesh->LocalNx * mesh->LocalNy * mesh->LocalNz); + return ret; + } + +private: + // number of procs + BoutMesh* mesh; + const int nxpe; + const int nype; + const int nzpe{1}; + const int xstart, ystart, zstart; + const int lnx, lny, lnz; + static int divToNeg(const int n, const int d) { + return (n < 0) ? ((n - d + 1) / d) : (n / d); + } +}; + +XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) + : XZInterpolation(y_offset, meshin), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), h10_z(localmesh), h11_z(localmesh) { @@ -38,7 +112,6 @@ XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* mesh) // Initialise in order to avoid 'uninitialized value' errors from Valgrind when using // guard-cell values - i_corner = -1; k_corner = -1; // Allocate Field3D members @@ -50,45 +123,77 @@ XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* mesh) h01_z.allocate(); h10_z.allocate(); h11_z.allocate(); + +#if USE_NEW_WEIGHTS + newWeights.reserve(16); + for (int w = 0; w < 16; ++w) { + newWeights.emplace_back(localmesh); + newWeights[w].allocate(); + } +#ifdef HS_USE_PETSC + petsclib = new PetscLib( + &Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]); + // MatCreate(MPI_Comm comm,Mat *A) + // MatCreate(MPI_COMM_WORLD, &petscWeights); + // MatSetSizes(petscWeights, m, m, M, M); + // PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, + // PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], + // PetscInt o_nz, const PetscInt o_nnz[], Mat *A) + // MatSetSizes(Mat A,PetscInt m,PetscInt n,PetscInt M,PetscInt N) + const int m = localmesh->LocalNx * localmesh->LocalNy * localmesh->LocalNz; + const int M = m * localmesh->getNXPE() * localmesh->getNYPE(); + MatCreateAIJ(MPI_COMM_WORLD, m, m, M, M, 16, nullptr, 16, nullptr, &petscWeights); +#endif +#endif +#ifndef HS_USE_PETSC + if (localmesh->getNXPE() > 1) { + throw BoutException("Require PETSc for MPI splitting in X"); + } +#endif } void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region) { - const int ncz = localmesh->LocalNz; - const auto curregion{getRegion(region)}; - BOUT_FOR(i, curregion) { + const int ny = localmesh->LocalNy; + const int nz = localmesh->LocalNz; + const int xend = (localmesh->xend - localmesh->xstart + 1) * localmesh->getNXPE() + + localmesh->xstart - 1; +#ifdef HS_USE_PETSC + IndConverter conv{localmesh}; +#endif + BOUT_FOR(i, getRegion(region)) { const int x = i.x(); const int y = i.y(); const int z = i.z(); // The integer part of xt_prime, zt_prime are the indices of the cell // containing the field line end-point - i_corner(x, y, z) = static_cast(floor(delta_x(x, y, z))); + int i_corn = static_cast(floor(delta_x(x, y, z))); k_corner(x, y, z) = static_cast(floor(delta_z(x, y, z))); // t_x, t_z are the normalised coordinates \in [0,1) within the cell // calculated by taking the remainder of the floating point index - BoutReal t_x = delta_x(x, y, z) - static_cast(i_corner(x, y, z)); + BoutReal t_x = delta_x(x, y, z) - static_cast(i_corn); BoutReal t_z = delta_z(x, y, z) - static_cast(k_corner(x, y, z)); // NOTE: A (small) hack to avoid one-sided differences - if (i_corner(x, y, z) >= localmesh->xend) { - i_corner(x, y, z) = localmesh->xend - 1; + if (i_corn >= xend) { + i_corn = xend - 1; t_x = 1.0; } - if (i_corner(x, y, z) < localmesh->xstart) { - i_corner(x, y, z) = localmesh->xstart; + if (i_corn < localmesh->xstart) { + i_corn = localmesh->xstart; t_x = 0.0; } - k_corner(x, y, z) = ((k_corner(x, y, z) % ncz) + ncz) % ncz; + k_corner(x, y, z) = ((k_corner(x, y, z) % nz) + nz) % nz; // Check that t_x and t_z are in range if ((t_x < 0.0) || (t_x > 1.0)) { throw BoutException( - "t_x={:e} out of range at ({:d},{:d},{:d}) (delta_x={:e}, i_corner={:d})", t_x, - x, y, z, delta_x(x, y, z), i_corner(x, y, z)); + "t_x={:e} out of range at ({:d},{:d},{:d}) (delta_x={:e}, i_corn={:d})", t_x, x, + y, z, delta_x(x, y, z), i_corn); } if ((t_z < 0.0) || (t_z > 1.0)) { @@ -97,18 +202,107 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z x, y, z, delta_z(x, y, z), k_corner(x, y, z)); } - h00_x(x, y, z) = (2. * t_x * t_x * t_x) - (3. * t_x * t_x) + 1.; - h00_z(x, y, z) = (2. * t_z * t_z * t_z) - (3. * t_z * t_z) + 1.; + i_corner[i] = SpecificInd( + (((i_corn * ny) + (y + y_offset)) * nz + k_corner(x, y, z)), ny, nz); + + h00_x[i] = (2. * t_x * t_x * t_x) - (3. * t_x * t_x) + 1.; + h00_z[i] = (2. * t_z * t_z * t_z) - (3. * t_z * t_z) + 1.; - h01_x(x, y, z) = (-2. * t_x * t_x * t_x) + (3. * t_x * t_x); - h01_z(x, y, z) = (-2. * t_z * t_z * t_z) + (3. * t_z * t_z); + h01_x[i] = (-2. * t_x * t_x * t_x) + (3. * t_x * t_x); + h01_z[i] = (-2. * t_z * t_z * t_z) + (3. * t_z * t_z); - h10_x(x, y, z) = t_x * (1. - t_x) * (1. - t_x); - h10_z(x, y, z) = t_z * (1. - t_z) * (1. - t_z); + h10_x[i] = t_x * (1. - t_x) * (1. - t_x); + h10_z[i] = t_z * (1. - t_z) * (1. - t_z); - h11_x(x, y, z) = (t_x * t_x * t_x) - (t_x * t_x); - h11_z(x, y, z) = (t_z * t_z * t_z) - (t_z * t_z); + h11_x[i] = (t_x * t_x * t_x) - (t_x * t_x); + h11_z[i] = (t_z * t_z * t_z) - (t_z * t_z); + +#if USE_NEW_WEIGHTS + + for (int w = 0; w < 16; ++w) { + newWeights[w][i] = 0; + } + // The distribution of our weights: + // 0 4 8 12 + // 1 5 9 13 + // 2 6 10 14 + // 3 7 11 15 + // e.g. 1 == ic.xm(); 4 == ic.zm(); 5 == ic; 7 == ic.zp(2); + + // f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; + newWeights[5][i] += h00_x[i] * h00_z[i]; + newWeights[9][i] += h01_x[i] * h00_z[i]; + newWeights[9][i] += h10_x[i] * h00_z[i] / 2; + newWeights[1][i] -= h10_x[i] * h00_z[i] / 2; + newWeights[13][i] += h11_x[i] * h00_z[i] / 2; + newWeights[5][i] -= h11_x[i] * h00_z[i] / 2; + + // f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + + // fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; + newWeights[6][i] += h00_x[i] * h01_z[i]; + newWeights[10][i] += h01_x[i] * h01_z[i]; + newWeights[10][i] += h10_x[i] * h01_z[i] / 2; + newWeights[2][i] -= h10_x[i] * h01_z[i] / 2; + newWeights[14][i] += h11_x[i] * h01_z[i] / 2; + newWeights[6][i] -= h11_x[i] * h01_z[i] / 2; + + // fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + + // fxz[ic] * h10_x[i]+ fxz[icxp] * h11_x[i]; + newWeights[6][i] += h00_x[i] * h10_z[i] / 2; + newWeights[4][i] -= h00_x[i] * h10_z[i] / 2; + newWeights[10][i] += h01_x[i] * h10_z[i] / 2; + newWeights[8][i] -= h01_x[i] * h10_z[i] / 2; + newWeights[10][i] += h10_x[i] * h10_z[i] / 4; + newWeights[8][i] -= h10_x[i] * h10_z[i] / 4; + newWeights[2][i] -= h10_x[i] * h10_z[i] / 4; + newWeights[0][i] += h10_x[i] * h10_z[i] / 4; + newWeights[14][i] += h11_x[i] * h10_z[i] / 4; + newWeights[12][i] -= h11_x[i] * h10_z[i] / 4; + newWeights[6][i] -= h11_x[i] * h10_z[i] / 4; + newWeights[4][i] += h11_x[i] * h10_z[i] / 4; + + // fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + + // fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; + newWeights[7][i] += h00_x[i] * h11_z[i] / 2; + newWeights[5][i] -= h00_x[i] * h11_z[i] / 2; + newWeights[11][i] += h01_x[i] * h11_z[i] / 2; + newWeights[9][i] -= h01_x[i] * h11_z[i] / 2; + newWeights[11][i] += h10_x[i] * h11_z[i] / 4; + newWeights[9][i] -= h10_x[i] * h11_z[i] / 4; + newWeights[3][i] -= h10_x[i] * h11_z[i] / 4; + newWeights[1][i] += h10_x[i] * h11_z[i] / 4; + newWeights[15][i] += h11_x[i] * h11_z[i] / 4; + newWeights[13][i] -= h11_x[i] * h11_z[i] / 4; + newWeights[7][i] -= h11_x[i] * h11_z[i] / 4; + newWeights[5][i] += h11_x[i] * h11_z[i] / 4; +#ifdef HS_USE_PETSC + PetscInt idxn[1] = {conv.fromLocalToGlobal(x, y + y_offset, z)}; + // output.write("debug: {:d} -> {:d}: {:d}:{:d} -> {:d}:{:d}\n", + // conv.fromLocalToGlobal(x, y + y_offset, z), + // conv.fromMeshToGlobal(i_corn, y + y_offset, k_corner(x, y, z)), + // x, z, i_corn, k_corner(x, y, z)); + // ixstep = mesh->LocalNx * mesh->LocalNz; + for (int j = 0; j < 4; ++j) { + PetscInt idxm[4]; + PetscScalar vals[4]; + for (int k = 0; k < 4; ++k) { + idxm[k] = conv.fromMeshToGlobal(i_corn - 1 + j, y + y_offset, + k_corner(x, y, z) - 1 + k); + vals[k] = newWeights[j * 4 + k][i]; + } + MatSetValues(petscWeights, 1, idxn, 4, idxm, vals, INSERT_VALUES); + } +#endif +#endif } +#ifdef HS_USE_PETSC + MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(petscWeights, MAT_FINAL_ASSEMBLY); + if (!isInit) { + MatCreateVecs(petscWeights, &rhs, &result); + } + isInit = true; +#endif } void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, @@ -135,11 +329,11 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z */ std::vector XZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { - const int ncz = localmesh->LocalNz; + const int nz = localmesh->LocalNz; const int k_mod = k_corner(i, j, k); - const int k_mod_m1 = (k_mod > 0) ? (k_mod - 1) : (ncz - 1); - const int k_mod_p1 = (k_mod + 1) % ncz; - const int k_mod_p2 = (k_mod + 2) % ncz; + const int k_mod_m1 = (k_mod > 0) ? (k_mod - 1) : (nz - 1); + const int k_mod_p1 = (k_mod == nz) ? 0 : k_mod + 1; + const int k_mod_p2 = (k_mod_p1 == nz) ? 0 : k_mod_p1 + 1; return {{i, j + yoffset, k_mod_m1, -0.5 * h10_z(i, j, k)}, {i, j + yoffset, k_mod, h00_z(i, j, k) - 0.5 * h11_z(i, j, k)}, @@ -152,75 +346,81 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; - // Derivatives are used for tension and need to be on dimensionless - // coordinates - Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT"); - localmesh->communicateXZ(fx); - // communicate in y, but do not calculate parallel slices - { - auto h = localmesh->sendY(fx); - localmesh->wait(h); + const auto region2 = y_offset ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; + +#if USE_NEW_WEIGHTS +#ifdef HS_USE_PETSC + BoutReal* ptr; + const BoutReal* cptr; + VecGetArray(rhs, &ptr); + BOUT_FOR(i, f.getRegion("RGN_NOY")) { ptr[int(i)] = f[i]; } + VecRestoreArray(rhs, &ptr); + MatMult(petscWeights, rhs, result); + VecGetArrayRead(result, &cptr); + BOUT_FOR(i, f.getRegion(region2)) { + f_interp[i] = cptr[int(i)]; + ASSERT2(std::isfinite(cptr[int(i)])); } - Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", "RGN_ALL"); - localmesh->communicateXZ(fz); - // communicate in y, but do not calculate parallel slices - { - auto h = localmesh->sendY(fz); - localmesh->wait(h); - } - Field3D fxz = bout::derivatives::index::DDX(fz, CELL_DEFAULT, "DEFAULT"); - localmesh->communicateXZ(fxz); - // communicate in y, but do not calculate parallel slices - { - auto h = localmesh->sendY(fxz); - localmesh->wait(h); + VecRestoreArrayRead(result, &cptr); +#else + BOUT_FOR(i, getRegion(region)) { + auto ic = i_corner[i]; + auto iyp = i.yp(y_offset); + + f_interp[iyp] = 0; + for (int w = 0; w < 4; ++w) { + f_interp[iyp] += newWeights[w * 4 + 0][i] * f[ic.zm().xp(w - 1)]; + f_interp[iyp] += newWeights[w * 4 + 1][i] * f[ic.xp(w - 1)]; + f_interp[iyp] += newWeights[w * 4 + 2][i] * f[ic.zp().xp(w - 1)]; + f_interp[iyp] += newWeights[w * 4 + 3][i] * f[ic.zp(2).xp(w - 1)]; + } } +#endif +#else + // Derivatives are used for tension and need to be on dimensionless + // coordinates - const auto curregion{getRegion(region)}; - BOUT_FOR(i, curregion) { - const int x = i.x(); - const int y = i.y(); - const int z = i.z(); + // f has been communcated, and thus we can assume that the x-boundaries are + // also valid in the y-boundary. Thus the differentiated field needs no + // extra comms. + Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT", region2); + Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", region2); + Field3D fxz = bout::derivatives::index::DDZ(fx, CELL_DEFAULT, "DEFAULT", region2); - // Due to lack of guard cells in z-direction, we need to ensure z-index - // wraps around - const int z_mod = k_corner(x, y, z); - const int z_mod_p1 = (z_mod + 1) % localmesh->LocalNz; + BOUT_FOR(i, getRegion(region)) { + const auto iyp = i.yp(y_offset); - const int y_next = y + y_offset; + const auto ic = i_corner[i]; + const auto iczp = ic.zp(); + const auto icxp = ic.xp(); + const auto icxpzp = iczp.xp(); // Interpolate f in X at Z - const BoutReal f_z = f(i_corner(x, y, z), y_next, z_mod) * h00_x(x, y, z) - + f(i_corner(x, y, z) + 1, y_next, z_mod) * h01_x(x, y, z) - + fx(i_corner(x, y, z), y_next, z_mod) * h10_x(x, y, z) - + fx(i_corner(x, y, z) + 1, y_next, z_mod) * h11_x(x, y, z); + const BoutReal f_z = + f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; // Interpolate f in X at Z+1 - const BoutReal f_zp1 = f(i_corner(x, y, z), y_next, z_mod_p1) * h00_x(x, y, z) - + f(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h01_x(x, y, z) - + fx(i_corner(x, y, z), y_next, z_mod_p1) * h10_x(x, y, z) - + fx(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h11_x(x, y, z); + const BoutReal f_zp1 = f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + fx[iczp] * h10_x[i] + + fx[icxpzp] * h11_x[i]; // Interpolate fz in X at Z - const BoutReal fz_z = fz(i_corner(x, y, z), y_next, z_mod) * h00_x(x, y, z) - + fz(i_corner(x, y, z) + 1, y_next, z_mod) * h01_x(x, y, z) - + fxz(i_corner(x, y, z), y_next, z_mod) * h10_x(x, y, z) - + fxz(i_corner(x, y, z) + 1, y_next, z_mod) * h11_x(x, y, z); + const BoutReal fz_z = fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i] + + fxz[icxp] * h11_x[i]; // Interpolate fz in X at Z+1 - const BoutReal fz_zp1 = - fz(i_corner(x, y, z), y_next, z_mod_p1) * h00_x(x, y, z) - + fz(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h01_x(x, y, z) - + fxz(i_corner(x, y, z), y_next, z_mod_p1) * h10_x(x, y, z) - + fxz(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h11_x(x, y, z); + const BoutReal fz_zp1 = fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; // Interpolate in Z - f_interp(x, y_next, z) = +f_z * h00_z(x, y, z) + f_zp1 * h01_z(x, y, z) - + fz_z * h10_z(x, y, z) + fz_zp1 * h11_z(x, y, z); + f_interp[iyp] = + +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; - ASSERT2(std::isfinite(f_interp(x, y_next, z)) || x < localmesh->xstart - || x > localmesh->xend); + ASSERT2(std::isfinite(f_interp[iyp]) || i.x() < localmesh->xstart + || i.x() > localmesh->xend); } +#endif + f_interp.setRegion(region2); + ASSERT2(f_interp.getRegionID()); return f_interp; } diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index 92c14ecfd5..8fa201ba72 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -29,6 +29,10 @@ XZLagrange4pt::XZLagrange4pt(int y_offset, Mesh* mesh) : XZInterpolation(y_offset, mesh), t_x(localmesh), t_z(localmesh) { + if (localmesh->getNXPE() > 1) { + throw BoutException("Do not support MPI splitting in X"); + } + // Index arrays contain guard cells in order to get subscripts right i_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); k_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); diff --git a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx b/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx index abedb27733..4b84bcd265 100644 --- a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx +++ b/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx @@ -59,48 +59,35 @@ Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f, const auto curregion{getRegion(region)}; BOUT_FOR(i, curregion) { - const int x = i.x(); - const int y = i.y(); - const int z = i.z(); + const auto iyp = i.yp(y_offset); - // Due to lack of guard cells in z-direction, we need to ensure z-index - // wraps around - const int ncz = localmesh->LocalNz; - const int z_mod = ((k_corner(x, y, z) % ncz) + ncz) % ncz; - const int z_mod_p1 = (z_mod + 1) % ncz; - - const int y_next = y + y_offset; + const auto ic = i_corner[i]; + const auto iczp = ic.zp(); + const auto icxp = ic.xp(); + const auto icxpzp = iczp.xp(); // Interpolate f in X at Z - const BoutReal f_z = f(i_corner(x, y, z), y_next, z_mod) * h00_x(x, y, z) - + f(i_corner(x, y, z) + 1, y_next, z_mod) * h01_x(x, y, z) - + fx(i_corner(x, y, z), y_next, z_mod) * h10_x(x, y, z) - + fx(i_corner(x, y, z) + 1, y_next, z_mod) * h11_x(x, y, z); + const BoutReal f_z = + f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; // Interpolate f in X at Z+1 - const BoutReal f_zp1 = f(i_corner(x, y, z), y_next, z_mod_p1) * h00_x(x, y, z) - + f(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h01_x(x, y, z) - + fx(i_corner(x, y, z), y_next, z_mod_p1) * h10_x(x, y, z) - + fx(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h11_x(x, y, z); + const BoutReal f_zp1 = f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + fx[iczp] * h10_x[i] + + fx[icxpzp] * h11_x[i]; // Interpolate fz in X at Z - const BoutReal fz_z = fz(i_corner(x, y, z), y_next, z_mod) * h00_x(x, y, z) - + fz(i_corner(x, y, z) + 1, y_next, z_mod) * h01_x(x, y, z) - + fxz(i_corner(x, y, z), y_next, z_mod) * h10_x(x, y, z) - + fxz(i_corner(x, y, z) + 1, y_next, z_mod) * h11_x(x, y, z); + const BoutReal fz_z = fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i] + + fxz[icxp] * h11_x[i]; // Interpolate fz in X at Z+1 - const BoutReal fz_zp1 = - fz(i_corner(x, y, z), y_next, z_mod_p1) * h00_x(x, y, z) - + fz(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h01_x(x, y, z) - + fxz(i_corner(x, y, z), y_next, z_mod_p1) * h10_x(x, y, z) - + fxz(i_corner(x, y, z) + 1, y_next, z_mod_p1) * h11_x(x, y, z); + const BoutReal fz_zp1 = fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; // Interpolate in Z - BoutReal result = +f_z * h00_z(x, y, z) + f_zp1 * h01_z(x, y, z) - + fz_z * h10_z(x, y, z) + fz_zp1 * h11_z(x, y, z); + BoutReal result = + +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; - ASSERT2(std::isfinite(result) || x < localmesh->xstart || x > localmesh->xend); + ASSERT2(std::isfinite(result) || i.x() < localmesh->xstart + || i.x() > localmesh->xend); // Monotonicity // Force the interpolated result to be in the range of the @@ -108,18 +95,14 @@ Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f, // but also degrades accuracy near maxima and minima. // Perhaps should only impose near boundaries, since that is where // problems most obviously occur. - const BoutReal localmax = BOUTMAX(f(i_corner(x, y, z), y_next, z_mod), - f(i_corner(x, y, z) + 1, y_next, z_mod), - f(i_corner(x, y, z), y_next, z_mod_p1), - f(i_corner(x, y, z) + 1, y_next, z_mod_p1)); + const BoutReal localmax = BOUTMAX(f[ic], f[icxp], f[iczp], f[icxpzp]); - const BoutReal localmin = BOUTMIN(f(i_corner(x, y, z), y_next, z_mod), - f(i_corner(x, y, z) + 1, y_next, z_mod), - f(i_corner(x, y, z), y_next, z_mod_p1), - f(i_corner(x, y, z) + 1, y_next, z_mod_p1)); + const BoutReal localmin = BOUTMIN(f[ic], f[icxp], f[iczp], f[icxpzp]); - ASSERT2(std::isfinite(localmax) || x < localmesh->xstart || x > localmesh->xend); - ASSERT2(std::isfinite(localmin) || x < localmesh->xstart || x > localmesh->xend); + ASSERT2(std::isfinite(localmax) || i.x() < localmesh->xstart + || i.x() > localmesh->xend); + ASSERT2(std::isfinite(localmin) || i.x() < localmesh->xstart + || i.x() > localmesh->xend); if (result > localmax) { result = localmax; @@ -128,7 +111,7 @@ Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f, result = localmin; } - f_interp(x, y_next, z) = result; + f_interp[iyp] = result; } return f_interp; } diff --git a/src/mesh/parallel/fci.cxx b/src/mesh/parallel/fci.cxx index cb8c19bbd7..3363d331e1 100644 --- a/src/mesh/parallel/fci.cxx +++ b/src/mesh/parallel/fci.cxx @@ -160,6 +160,8 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options& const int ncz = map_mesh.LocalNz; BoutMask to_remove(map_mesh); + const int xend = + map_mesh.xstart + (map_mesh.xend - map_mesh.xstart + 1) * map_mesh.getNXPE() - 1; // Serial loop because call to BoundaryRegionPar::addPoint // (probably?) can't be done in parallel BOUT_FOR_SERIAL(i, xt_prime.getRegion("RGN_NOBNDRY")) { @@ -173,7 +175,7 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options& } } - if ((xt_prime[i] >= map_mesh.xstart) and (xt_prime[i] <= map_mesh.xend)) { + if ((xt_prime[i] >= map_mesh.xstart) and (xt_prime[i] <= xend)) { // Not a boundary continue; } diff --git a/src/mesh/parallel/shiftedmetricinterp.cxx b/src/mesh/parallel/shiftedmetricinterp.cxx index 7f3637e79c..aa400b3141 100644 --- a/src/mesh/parallel/shiftedmetricinterp.cxx +++ b/src/mesh/parallel/shiftedmetricinterp.cxx @@ -29,6 +29,8 @@ #include "shiftedmetricinterp.hxx" #include "bout/constants.hxx" +//#include "bout/mask.hxx" +//#include #include "bout/parallel_boundary_region.hxx" ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, diff --git a/src/mesh/parallel_boundary_op.cxx b/src/mesh/parallel_boundary_op.cxx index ebd9852791..df164ce43f 100644 --- a/src/mesh/parallel_boundary_op.cxx +++ b/src/mesh/parallel_boundary_op.cxx @@ -5,17 +5,14 @@ #include "bout/mesh.hxx" #include "bout/output.hxx" -BoutReal BoundaryOpPar::getValue(const BoundaryRegionPar& bndry, BoutReal t) { - BoutReal value; - +BoutReal BoundaryOpPar::getValue(const BoundaryRegionParIter& bndry, BoutReal t) { switch (value_type) { case ValueType::GEN: return gen_values->generate(bout::generator::Context( bndry.s_x(), bndry.s_y(), bndry.s_z(), CELL_CENTRE, bndry.localmesh, t)); case ValueType::FIELD: // FIXME: Interpolate to s_x, s_y, s_z... - value = (*field_values)[bndry.ind()]; - return value; + return (*field_values)[bndry.ind()]; case ValueType::REAL: return real_value; default: diff --git a/src/sys/options.cxx b/src/sys/options.cxx index e2f39542fd..0b48c1b1b8 100644 --- a/src/sys/options.cxx +++ b/src/sys/options.cxx @@ -337,6 +337,22 @@ void Options::assign<>(Tensor val, std::string source) { _set_no_check(std::move(val), std::move(source)); } +void saveParallel(Options& opt, const std::string name, const Field3D& tosave) { + ASSERT2(tosave.hasParallelSlices()); + opt[name] = tosave; + for (size_t i0 = 1; i0 <= tosave.numberParallelSlices(); ++i0) { + for (int i : {i0, -i0}) { + Field3D tmp; + tmp.allocate(); + const auto& fpar = tosave.ynext(i); + for (auto j : fpar.getValidRegionWithDefault("RGN_NOBNDRY")) { + tmp[j.yp(-i)] = fpar[j]; + } + opt[fmt::format("{}_y{:+d}", name, i)] = tmp; + } + } +} + namespace { /// Use FieldFactory to evaluate expression double parseExpression(const Options::ValueType& value, const Options* options, diff --git a/tests/MMS/spatial/fci/data/BOUT.inp b/tests/MMS/spatial/fci/data/BOUT.inp index 802f70f99b..b4825c6207 100644 --- a/tests/MMS/spatial/fci/data/BOUT.inp +++ b/tests/MMS/spatial/fci/data/BOUT.inp @@ -21,4 +21,4 @@ y_periodic = true z_periodic = true [mesh:paralleltransform:xzinterpolation] -type = lagrange4pt +type = hermitespline diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest index 204a9cc271..b51c311a50 100755 --- a/tests/MMS/spatial/fci/runtest +++ b/tests/MMS/spatial/fci/runtest @@ -21,7 +21,7 @@ from sys import stdout import zoidberg as zb -nx = 3 # Not changed for these tests +nx = 4 # Not changed for these tests # Resolution in y and z nlist = [8, 16, 32, 64, 128] @@ -49,92 +49,115 @@ failures = [] build_and_log("FCI MMS test") for nslice in nslices: - error_2[nslice] = [] - error_inf[nslice] = [] - - # Which central difference scheme to use and its expected order - order = nslice * 2 - method_orders[nslice] = {"name": "C{}".format(order), "order": order} - - for n in nlist: - # Define the magnetic field using new poloidal gridding method - # Note that the Bz and Bzprime parameters here must be the same as in mms.py - field = zb.field.Slab(Bz=0.05, Bzprime=0.1) - # Create rectangular poloidal grids - poloidal_grid = zb.poloidal_grid.RectangularPoloidalGrid(nx, n, 0.1, 1.0) - # Set the ylength and y locations - ylength = 10.0 - - if yperiodic: - ycoords = linspace(0.0, ylength, n, endpoint=False) + for method in [ + "hermitespline", + "lagrange4pt", + "bilinear", + # "monotonichermitespline", + ]: + error_2[nslice] = [] + error_inf[nslice] = [] + + # Which central difference scheme to use and its expected order + order = nslice * 2 + method_orders[nslice] = {"name": "C{}".format(order), "order": order} + + for n in nlist: + # Define the magnetic field using new poloidal gridding method + # Note that the Bz and Bzprime parameters here must be the same as in mms.py + field = zb.field.Slab(Bz=0.05, Bzprime=0.1) + # Create rectangular poloidal grids + poloidal_grid = zb.poloidal_grid.RectangularPoloidalGrid( + nx, n, 0.1, 1.0, MXG=1 + ) + # Set the ylength and y locations + ylength = 10.0 + + if yperiodic: + ycoords = linspace(0.0, ylength, n, endpoint=False) + else: + # Doesn't include the end points + ycoords = (arange(n) + 0.5) * ylength / float(n) + + # Create the grid + grid = zb.grid.Grid(poloidal_grid, ycoords, ylength, yperiodic=yperiodic) + # Make and write maps + maps = zb.make_maps(grid, field, nslice=nslice, quiet=True, MXG=1) + zb.write_maps( + grid, + field, + maps, + new_names=False, + metric2d=conf.isMetric2D(), + quiet=True, + ) + + args = " MZ={} MYG={} mesh:paralleltransform:y_periodic={} mesh:ddy:first={} NXPE={}".format( + n, + nslice, + yperiodic, + method_orders[nslice]["name"], + 2 if conf.has["petsc"] and method == "hermitespline" else 1, + ) + args += f" mesh:paralleltransform:xzinterpolation:type={method}" + + # Command to run + cmd = "./fci_mms " + args + + print("Running command: " + cmd) + + # Launch using MPI + s, out = launch_safe(cmd, nproc=nproc, mthread=mthread, pipe=True) + + # Save output to log file + with open("run.log." + str(n), "w") as f: + f.write(out) + + if s: + print("Run failed!\nOutput was:\n") + print(out) + exit(s) + + # Collect data + l_2 = collect( + "l_2", + tind=[1, 1], + info=False, + path=directory, + xguards=False, + yguards=False, + ) + l_inf = collect( + "l_inf", + tind=[1, 1], + info=False, + path=directory, + xguards=False, + yguards=False, + ) + + error_2[nslice].append(l_2) + error_inf[nslice].append(l_inf) + + print("Errors : l-2 {:f} l-inf {:f}".format(l_2, l_inf)) + + dx = 1.0 / array(nlist) + + # Calculate convergence order + fit = polyfit(log(dx), log(error_2[nslice]), 1) + order = fit[0] + stdout.write("Convergence order = {:f} (fit)".format(order)) + + order = log(error_2[nslice][-2] / error_2[nslice][-1]) / log(dx[-2] / dx[-1]) + stdout.write(", {:f} (small spacing)".format(order)) + + # Should be close to the expected order + if order > method_orders[nslice]["order"] * 0.95: + print("............ PASS\n") else: - # Doesn't include the end points - ycoords = (arange(n) + 0.5) * ylength / float(n) - - # Create the grid - grid = zb.grid.Grid(poloidal_grid, ycoords, ylength, yperiodic=yperiodic) - # Make and write maps - maps = zb.make_maps(grid, field, nslice=nslice, quiet=True) - zb.write_maps( - grid, field, maps, new_names=False, metric2d=conf.isMetric2D(), quiet=True - ) - - args = " MZ={} MYG={} mesh:paralleltransform:y_periodic={} mesh:ddy:first={}".format( - n, nslice, yperiodic, method_orders[nslice]["name"] - ) - - # Command to run - cmd = "./fci_mms " + args - - print("Running command: " + cmd) - - # Launch using MPI - s, out = launch_safe(cmd, nproc=nproc, mthread=mthread, pipe=True) - - # Save output to log file - with open("run.log." + str(n), "w") as f: - f.write(out) - - if s: - print("Run failed!\nOutput was:\n") - print(out) - exit(s) - - # Collect data - l_2 = collect( - "l_2", tind=[1, 1], info=False, path=directory, xguards=False, yguards=False - ) - l_inf = collect( - "l_inf", - tind=[1, 1], - info=False, - path=directory, - xguards=False, - yguards=False, - ) - - error_2[nslice].append(l_2) - error_inf[nslice].append(l_inf) - - print("Errors : l-2 {:f} l-inf {:f}".format(l_2, l_inf)) - - dx = 1.0 / array(nlist) - - # Calculate convergence order - fit = polyfit(log(dx), log(error_2[nslice]), 1) - order = fit[0] - stdout.write("Convergence order = {:f} (fit)".format(order)) - - order = log(error_2[nslice][-2] / error_2[nslice][-1]) / log(dx[-2] / dx[-1]) - stdout.write(", {:f} (small spacing)".format(order)) - - # Should be close to the expected order - if order > method_orders[nslice]["order"] * 0.95: - print("............ PASS\n") - else: - print("............ FAIL\n") - success = False - failures.append(method_orders[nslice]["name"]) + print("............ FAIL\n") + success = False + failures.append(method_orders[nslice]["name"]) with open("fci_mms.pkl", "wb") as output: diff --git a/tests/integrated/CMakeLists.txt b/tests/integrated/CMakeLists.txt index ef173db7df..e11403efb9 100644 --- a/tests/integrated/CMakeLists.txt +++ b/tests/integrated/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(test-delp2) add_subdirectory(test-datafilefacade) add_subdirectory(test-drift-instability) add_subdirectory(test-drift-instability-staggered) +add_subdirectory(test-fci-mpi) add_subdirectory(test-fieldgroupComm) add_subdirectory(test-fci-boundary) add_subdirectory(test-griddata) diff --git a/tests/integrated/test-fci-boundary/get_par_bndry.cxx b/tests/integrated/test-fci-boundary/get_par_bndry.cxx index ac0f5de2a6..9d49640682 100644 --- a/tests/integrated/test-fci-boundary/get_par_bndry.cxx +++ b/tests/integrated/test-fci-boundary/get_par_bndry.cxx @@ -17,8 +17,8 @@ int main(int argc, char** argv) { for (const auto& bndry_par : mesh->getBoundariesPar(static_cast(i))) { output.write("{:s} region\n", toString(static_cast(i))); - for (bndry_par->first(); !bndry_par->isDone(); bndry_par->next()) { - fields[i][bndry_par->ind()] += 1; + for (auto& pnt : *bndry_par) { + fields[i][pnt.ind()] += 1; output.write("{:s} increment\n", toString(static_cast(i))); } } diff --git a/tests/integrated/test-fci-mpi/CMakeLists.txt b/tests/integrated/test-fci-mpi/CMakeLists.txt new file mode 100644 index 0000000000..0dd38487a3 --- /dev/null +++ b/tests/integrated/test-fci-mpi/CMakeLists.txt @@ -0,0 +1,9 @@ +bout_add_mms_test(test-fci-mpi + SOURCES fci_mpi.cxx + USE_RUNTEST + USE_DATA_BOUT_INP + PROCESSORS 6 + DOWNLOAD https://zenodo.org/record/7614499/files/W7X-conf4-36x8x128.fci.nc?download=1 + DOWNLOAD_NAME grid.fci.nc + REQUIRES BOUT_HAS_PETSC +) diff --git a/tests/integrated/test-fci-mpi/data/BOUT.inp b/tests/integrated/test-fci-mpi/data/BOUT.inp new file mode 100644 index 0000000000..47272dab61 --- /dev/null +++ b/tests/integrated/test-fci-mpi/data/BOUT.inp @@ -0,0 +1,28 @@ +grid = grid.fci.nc + +[mesh] +symmetricglobalx = true + +[mesh:ddy] +first = C2 +second = C2 + +[mesh:paralleltransform] +type = fci +y_periodic = true +z_periodic = true + +[mesh:paralleltransform:xzinterpolation] +type = hermitespline + +[input_0] +function = sin(z) + +[input_1] +function = cos(y) + +[input_2] +function = sin(x) + +[input_3] +function = sin(x) * sin(z) * cos(y) diff --git a/tests/integrated/test-fci-mpi/fci_mpi.cxx b/tests/integrated/test-fci-mpi/fci_mpi.cxx new file mode 100644 index 0000000000..94520dd4a6 --- /dev/null +++ b/tests/integrated/test-fci-mpi/fci_mpi.cxx @@ -0,0 +1,38 @@ +#include "bout/bout.hxx" +#include "bout/derivs.hxx" +#include "bout/field_factory.hxx" + +int main(int argc, char** argv) { + BoutInitialise(argc, argv); + { + using bout::globals::mesh; + Options* options = Options::getRoot(); + int i = 0; + const std::string default_str{"not_set"}; + Options dump; + while (true) { + std::string temp_str; + options->get(fmt::format("input_{:d}:function", i), temp_str, default_str); + if (temp_str == default_str) { + break; + } + Field3D input{FieldFactory::get()->create3D(fmt::format("input_{:d}:function", i), + Options::getRoot(), mesh)}; + // options->get(fmt::format("input_{:d}:boundary_perp", i), temp_str, s"free_o3"); + mesh->communicate(input); + input.applyParallelBoundary("parallel_neumann_o2"); + for (int slice = -mesh->ystart; slice <= mesh->ystart; ++slice) { + if (slice != 0) { + Field3D tmp{0.}; + BOUT_FOR(i, tmp.getRegion("RGN_NOBNDRY")) { + tmp[i] = input.ynext(slice)[i.yp(slice)]; + } + dump[fmt::format("output_{:d}_{:+d}", i, slice)] = tmp; + } + } + ++i; + } + bout::writeDefaultOutputFile(dump); + } + BoutFinalise(); +} diff --git a/tests/integrated/test-fci-mpi/runtest b/tests/integrated/test-fci-mpi/runtest new file mode 100755 index 0000000000..6676f8f7a5 --- /dev/null +++ b/tests/integrated/test-fci-mpi/runtest @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +# +# Python script to run and analyse MMS test +# + +# Cores: 8 +# requires: metric_3d + +from boututils.run_wrapper import build_and_log, launch_safe, shell_safe +from boutdata.collect import collect +import boutconfig as conf +import itertools + +import numpy as np + +# Resolution in x and y +nlist = [1, 2, 4] + +maxcores = 8 + +nslices = [1] + +success = True + +build_and_log("FCI MMS test") + +for nslice in nslices: + for NXPE, NYPE in itertools.product(nlist, nlist): + if NXPE * NYPE > maxcores: + continue + + args = f"NXPE={NXPE} NYPE={NYPE}" + # Command to run + cmd = f"./fci_mpi {args}" + + print(f"Running command: {cmd}") + + mthread = maxcores // (NXPE * NYPE) + # Launch using MPI + _, out = launch_safe(cmd, nproc=NXPE * NYPE, mthread=mthread, pipe=True) + + # Save output to log file + with open(f"run.log.{NXPE}.{NYPE}.{nslice}.log", "w") as f: + f.write(out) + + collect_kw = dict(info=False, xguards=False, yguards=False, path="data") + if NXPE == NYPE == 1: + # reference data! + ref = {} + for i in range(4): + for yp in range(1, nslice + 1): + for y in [-yp, yp]: + name = f"output_{i}_{y:+d}" + ref[name] = collect(name, **collect_kw) + else: + for name, val in ref.items(): + assert np.allclose(val, collect(name, **collect_kw)) diff --git a/tests/integrated/test-interpolate/data/BOUT.inp b/tests/integrated/test-interpolate/data/BOUT.inp index 101c63f3c7..804e780bbe 100644 --- a/tests/integrated/test-interpolate/data/BOUT.inp +++ b/tests/integrated/test-interpolate/data/BOUT.inp @@ -4,7 +4,6 @@ # MZ = 4 # Z size -NXPE = 1 ZMAX = 1 MXG = 2 diff --git a/tests/integrated/test-interpolate/runtest b/tests/integrated/test-interpolate/runtest index 08975cfd33..f5460aff2a 100755 --- a/tests/integrated/test-interpolate/runtest +++ b/tests/integrated/test-interpolate/runtest @@ -6,6 +6,7 @@ from boututils.run_wrapper import build_and_log, shell, launch_safe from boutdata import collect +import boutconfig from numpy import sqrt, max, abs, mean, array, log, polyfit from sys import stdout, exit @@ -15,9 +16,6 @@ show_plot = False # List of NX values to use nxlist = [16, 32, 64, 128] -# Only testing 2D (x, z) slices, so only need one processor -nproc = 1 - # Variables to compare varlist = ["a", "b", "c"] markers = ["bo", "r^", "kx"] @@ -48,11 +46,9 @@ for method in methods: for nx in nxlist: dx = 1.0 / (nx) - args = ( - " mesh:nx={nx4} mesh:dx={dx} MZ={nx} xzinterpolation:type={method}".format( - nx4=nx + 4, dx=dx, nx=nx, method=method - ) - ) + args = f" mesh:nx={nx + 4} mesh:dx={dx} MZ={nx} xzinterpolation:type={method}" + nproc = 2 if method == "hermitespline" and boutconfig.has["petsc"] else 1 + args += f" NXPE={nproc}" cmd = "./test_interpolate" + args @@ -71,6 +67,17 @@ for method in methods: E = interp - solution + if False: + import matplotlib.pyplot as plt + + def myplot(f, lbl=None): + plt.plot(f[:, 0, 6], label=lbl) + + myplot(interp, "interp") + myplot(solution, "sol") + plt.legend() + plt.show() + l2 = float(sqrt(mean(E**2))) linf = float(max(abs(E))) diff --git a/tests/integrated/test-interpolate/test_interpolate.cxx b/tests/integrated/test-interpolate/test_interpolate.cxx index a090877552..33963dbb9e 100644 --- a/tests/integrated/test-interpolate/test_interpolate.cxx +++ b/tests/integrated/test-interpolate/test_interpolate.cxx @@ -30,88 +30,90 @@ std::shared_ptr getGeneratorFromOptions(const std::string& varna int main(int argc, char** argv) { BoutInitialise(argc, argv); - - // Random number generator - std::default_random_engine generator; - // Uniform distribution of BoutReals from 0 to 1 - std::uniform_real_distribution distribution{0.0, 1.0}; - - using bout::globals::mesh; - - FieldFactory f(mesh); - - // Set up generators and solutions for three different analtyic functions - std::string a_func; - auto a_gen = getGeneratorFromOptions("a", a_func); - Field3D a = f.create3D(a_func); - Field3D a_solution = 0.0; - Field3D a_interp = 0.0; - - std::string b_func; - auto b_gen = getGeneratorFromOptions("b", b_func); - Field3D b = f.create3D(b_func); - Field3D b_solution = 0.0; - Field3D b_interp = 0.0; - - std::string c_func; - auto c_gen = getGeneratorFromOptions("c", c_func); - Field3D c = f.create3D(c_func); - Field3D c_solution = 0.0; - Field3D c_interp = 0.0; - - // x and z displacements - Field3D deltax = 0.0; - Field3D deltaz = 0.0; - - // Bind the random number generator and distribution into a single function - auto dice = std::bind(distribution, generator); - - for (const auto& index : deltax) { - // Get some random displacements - BoutReal dx = index.x() + dice(); - BoutReal dz = index.z() + dice(); - // For the last point, put the displacement inwards - // Otherwise we try to interpolate in the guard cells, which doesn't work so well - if (index.x() >= mesh->xend) { - dx = index.x() - dice(); + { + // Random number generator + const std::default_random_engine generator; + // Uniform distribution of BoutReals from 0 to 1 + const std::uniform_real_distribution distribution{0.0, 1.0}; + + using bout::globals::mesh; + + const FieldFactory fieldfact(mesh); + + // Set up generators and solutions for three different analtyic functions + std::string a_func; + auto a_gen = getGeneratorFromOptions("a", a_func); + const Field3D a = fieldfact.create3D(a_func); + Field3D a_solution = 0.0; + Field3D a_interp = 0.0; + + std::string b_func; + auto b_gen = getGeneratorFromOptions("b", b_func); + const Field3D b = fieldfact.create3D(b_func); + Field3D b_solution = 0.0; + Field3D b_interp = 0.0; + + std::string c_func; + auto c_gen = getGeneratorFromOptions("c", c_func); + const Field3D c = fieldfact.create3D(c_func); + Field3D c_solution = 0.0; + Field3D c_interp = 0.0; + + // x and z displacements + Field3D deltax = 0.0; + Field3D deltaz = 0.0; + + // Bind the random number generator and distribution into a single function + auto dice = std::bind(distribution, generator); + + for (const auto& index : deltax) { + // Get some random displacements + BoutReal dx = index.x() + dice(); + BoutReal dz = index.z() + dice(); + // For the last point, put the displacement inwards + // Otherwise we try to interpolate in the guard cells, which doesn't work so well + if (index.x() >= mesh->xend && mesh->getNXPE() - 1 == mesh->getXProcIndex()) { + dx = index.x() - dice(); + } + deltax[index] = dx; + deltaz[index] = dz; + // Get the global indices + bout::generator::Context pos{index, CELL_CENTRE, deltax.getMesh(), 0.0}; + pos.set("x", mesh->GlobalX(dx), "z", + TWOPI * static_cast(dz) / static_cast(mesh->LocalNz)); + // Generate the analytic solution at the displacements + a_solution[index] = a_gen->generate(pos); + b_solution[index] = b_gen->generate(pos); + c_solution[index] = c_gen->generate(pos); } - deltax[index] = dx; - deltaz[index] = dz; - // Get the global indices - bout::generator::Context pos{index, CELL_CENTRE, deltax.getMesh(), 0.0}; - pos.set("x", mesh->GlobalX(dx), "z", - TWOPI * static_cast(dz) / static_cast(mesh->LocalNz)); - // Generate the analytic solution at the displacements - a_solution[index] = a_gen->generate(pos); - b_solution[index] = b_gen->generate(pos); - c_solution[index] = c_gen->generate(pos); - } - // Create the interpolation object from the input options - auto interp = XZInterpolationFactory::getInstance().create(); + deltax += (mesh->LocalNx - mesh->xstart * 2) * mesh->getXProcIndex(); + // Create the interpolation object from the input options + auto interp = XZInterpolationFactory::getInstance().create(); - // Interpolate the analytic functions at the displacements - a_interp = interp->interpolate(a, deltax, deltaz); - b_interp = interp->interpolate(b, deltax, deltaz); - c_interp = interp->interpolate(c, deltax, deltaz); + // Interpolate the analytic functions at the displacements + a_interp = interp->interpolate(a, deltax, deltaz); + b_interp = interp->interpolate(b, deltax, deltaz); + c_interp = interp->interpolate(c, deltax, deltaz); - Options dump; + Options dump; - dump["a"] = a; - dump["a_interp"] = a_interp; - dump["a_solution"] = a_solution; + dump["a"] = a; + dump["a_interp"] = a_interp; + dump["a_solution"] = a_solution; - dump["b"] = b; - dump["b_interp"] = b_interp; - dump["b_solution"] = b_solution; + dump["b"] = b; + dump["b_interp"] = b_interp; + dump["b_solution"] = b_solution; - dump["c"] = c; - dump["c_interp"] = c_interp; - dump["c_solution"] = c_solution; + dump["c"] = c; + dump["c_interp"] = c_interp; + dump["c_solution"] = c_solution; - bout::writeDefaultOutputFile(dump); + bout::writeDefaultOutputFile(dump); - bout::checkForUnusedOptions(); + bout::checkForUnusedOptions(); + } BoutFinalise(); return 0; diff --git a/tests/unit/include/test_derivs.cxx b/tests/unit/include/test_derivs.cxx index a6b8e43ef0..783af4f446 100644 --- a/tests/unit/include/test_derivs.cxx +++ b/tests/unit/include/test_derivs.cxx @@ -332,6 +332,7 @@ TEST_P(FirstDerivativesInterfaceTest, Sanity) { result = bout::derivatives::index::DDX(input); break; case DIRECTION::Y: + input.setDirectionY(YDirectionType::Aligned); result = bout::derivatives::index::DDY(input); break; case DIRECTION::Z: diff --git a/tests/unit/sys/test_options.cxx b/tests/unit/sys/test_options.cxx index dbb687880d..866e61f90e 100644 --- a/tests/unit/sys/test_options.cxx +++ b/tests/unit/sys/test_options.cxx @@ -1097,7 +1097,8 @@ value6 = 12 } TEST_F(OptionsTest, InvalidFormat) { - EXPECT_THROW(fmt::format("{:nope}", Options{}), fmt::format_error); + std::string unused; + EXPECT_THROW(unused = fmt::format("{:nope}", Options{}), fmt::format_error); } TEST_F(OptionsTest, FormatValue) {