diff --git a/cmake/dca_config.cmake b/cmake/dca_config.cmake index bcf8ff05f..702d8af7f 100644 --- a/cmake/dca_config.cmake +++ b/cmake/dca_config.cmake @@ -145,6 +145,8 @@ elseif (DCA_LATTICE STREQUAL "Kagome") "dca/phys/models/analytic_hamiltonians/Kagome_hubbard.hpp") elseif (DCA_LATTICE STREQUAL "hund") set(DCA_LATTICE_TYPE dca::phys::models::HundLattice) + set(DCA_LATTICE_INCLUDE + "dca/phys/models/analytic_hamiltonians/hund_lattice.hpp") elseif (DCA_LATTICE STREQUAL "threeband") set(DCA_LATTICE_TYPE dca::phys::models::ThreebandHubbard) set(DCA_LATTICE_INCLUDE diff --git a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp index 5aef9dd73..6cc5d55df 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp @@ -117,6 +117,8 @@ class CtauxClusterSolver { return g0_; }; + Walker::Resource& getResource() { return dummy_walker_resource_; }; + protected: void warmUp(Walker& walker); @@ -161,6 +163,7 @@ class CtauxClusterSolver { G0Interpolation g0_; + Walker::Resource walker_dummy_resource_; private: Rng rng_; diff --git a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp index 1d60d0522..e822f4025 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp @@ -66,6 +66,9 @@ class CtauxWalker : public WalkerBIT, public CtauxWalkerData::value; + struct DummyResource {}; + using Resource = DummyResource; + public: /** constructor * param[in] id thread id diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp index a68e8f26f..b73a9faed 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp @@ -35,6 +35,7 @@ #include "dca/phys/dca_step/cluster_solver/ctint/details/solver_methods.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/domains/common_domains.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_choice.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/interpolation/g0_interpolation.hpp" //#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator.hpp" #include "dca/phys/dca_data/dca_data.hpp" @@ -74,6 +75,8 @@ class CtintClusterSolver { using Data = DcaData; static constexpr linalg::DeviceType device = device_t; + using DMatrixBuilder = ctint::DMatrixBuilder; + CtintClusterSolver(Parameters& parameters_ref, Data& Data_ref, std::shared_ptr> writer); @@ -108,7 +111,8 @@ class CtintClusterSolver { // Returns the function G(k,w) without averaging across MPI ranks. auto local_G_k_w() const; -protected: // thread jacket interface. + DMatrixBuilder& getResource() { return *d_matrix_builder_; }; +protected: // thread jacket interface. using ParametersType = Parameters; using DataType = Data; using Rng = typename Parameters::random_number_generator; @@ -166,6 +170,7 @@ class CtintClusterSolver { std::unique_ptr walker_; // Walker input. Rng rng_; + std::unique_ptr d_matrix_builder_; }; template @@ -174,11 +179,10 @@ CtintClusterSolver::CtintClusterSolver( : parameters_(parameters_ref), concurrency_(parameters_.get_concurrency()), data_(data_ref), - accumulator_(parameters_, data_), writer_(writer), rng_(concurrency_.id(), concurrency_.number_of_processors(), parameters_.get_seed()) { - Walker::setDMatrixBuilder(g0_); + d_matrix_builder_ = std::make_unique(g0_, PARAM::bands, RDmn()); Walker::setInteractionVertices(data_, parameters_); if (concurrency_.id() == concurrency_.first()) @@ -191,7 +195,7 @@ void CtintClusterSolver::initialize(i g0_.initializeShrinked(data_.G0_r_t_cluster_excluded); - Walker::setDMatrixAlpha(parameters_.getAlphas(), parameters_.adjustAlphaDd()); + d_matrix_builder_->setAlphas(parameters_.getAlphas(), parameters_.adjustAlphaDd()); // It is a waiting to happen bug for this to be here and in CtintAccumulator accumulator_.initialize(dca_iteration_); @@ -203,7 +207,7 @@ void CtintClusterSolver::initialize(i template void CtintClusterSolver::integrate() { - walker_ = std::make_unique(parameters_, data_, rng_, 0); + walker_ = std::make_unique(parameters_, data_, rng_, d_matrix_builder_, 0); walker_->initialize(dca_iteration_); dca::profiling::WallTime start_time; diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp index 44015c6a7..70cfb39c4 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp @@ -80,15 +80,22 @@ class CtintWalkerBase { void computeM(MatrixPair& m_accum) const; // Reset the counters and recompute the configuration sign and weight. - void markThermalized(); + virtual void markThermalized() = 0; /** Recompute the matrix M from the configuration in O(expansion_order^3) time. * Postcondition: sign_ and mc_log_weight_ are recomputed. * mc_log_weight is the negative sum of the log det of both sectors of M * + log of the abs of each vertices interaction strength. */ - virtual void setMFromConfig(); + virtual void setMFromConfig() = 0; + virtual void doStep() = 0; + + virtual void doSweep() = 0; + + template + void setMFromConfigImpl(DMatrixBuilder& d_matrix_builder); + bool is_thermalized() const { return thermalized_; } @@ -152,14 +159,6 @@ class CtintWalkerBase { buff >> configuration_; } - // Initialize the builder object shared by all walkers. - template - static void setDMatrixBuilder(const G0Interpolation& g0); - - static void setDMatrixAlpha(const std::array& alphas, bool adjust_dd); - - static void setInteractionVertices(const Data& data, const Parameters& parameters); - double stealFLOPs() { auto flop = flop_; flop_ = 0.; @@ -173,12 +172,11 @@ class CtintWalkerBase { static void sumConcurrency(const Concurrency&) {} - const auto& get_dmatrix_builder() const { - return *d_builder_ptr_; - } - void writeAlphas() const; + static void setInteractionVertices(const Data& data, + const Parameters& parameters); + protected: // typedefs using RDmn = typename Parameters::RClusterDmn; @@ -187,7 +185,6 @@ class CtintWalkerBase { void updateSweepAverages(); protected: // Members. - static inline std::unique_ptr> d_builder_ptr_; static inline InteractionVertices vertices_; const Parameters& parameters_; @@ -271,41 +268,6 @@ void CtintWalkerBase::initialize(int iteration) { setMFromConfig(); } -template -void CtintWalkerBase::setMFromConfig() { - mc_log_weight_ = 0.; - phase_.reset(); - - for (int s = 0; s < 2; ++s) { - // compute Mij = g0(t_i,t_j) - I* alpha(s_i) - - const auto& sector = configuration_.getSector(s); - auto& M = M_[s]; - const int n = sector.size(); - M.resize(n); - if (!n) - continue; - for (int j = 0; j < n; ++j) - for (int i = 0; i < n; ++i) - M(i, j) = d_builder_ptr_->computeD(i, j, sector); - - if (M.nrRows()) { - const auto [log_det, phase] = linalg::matrixop::inverseAndLogDeterminant(M); - - mc_log_weight_ -= log_det; // Weight proportional to det(M^{-1}) - phase_.divide(phase); - } - } - - // So what is going on here. - for (int i = 0; i < configuration_.size(); ++i) { - // This is actual interaction strength of the vertex i.e H_int(nu1, nu2, delta_r) - const Real term = -configuration_.getStrength(i); - mc_log_weight_ += std::log(std::abs(term)); - phase_.multiply(term); - } -} - template void CtintWalkerBase::updateSweepAverages() { order_avg_.addSample(order()); @@ -315,39 +277,22 @@ void CtintWalkerBase::updateSweepAverages() { partial_order_avg_.addSample(order()); } -template -void CtintWalkerBase::writeAlphas() const { - std::cout << "For initial configuration integration:\n"; - for (int isec = 0; isec < 2; ++isec) { - std::cout << "Sector: " << isec << '\n'; - for (int ic = 0; ic < order(); ++ic) { - auto aux_spin = configuration_.getSector(isec).getAuxFieldType(ic); - auto left_b = configuration_.getSector(isec).getLeftB(ic); - auto alpha_left = get_dmatrix_builder().computeAlpha(aux_spin, left_b); - std::cout << "vertex: " << std::setw(6) << ic; - std::cout << " | aux spin: " << aux_spin << " | left B: " << left_b - << " | alpha left = " << alpha_left << '\n'; - } - } -} - -template -void CtintWalkerBase::markThermalized() { - thermalized_ = true; - - nb_steps_per_sweep_ = std::max(1., std::ceil(sweeps_per_meas_ * partial_order_avg_.mean())); - thermalization_steps_ = n_steps_; - - order_avg_.reset(); - sign_avg_.reset(); - n_accepted_ = 0; +// template +// void CtintWalkerBase::writeAlphas() const { +// std::cout << "For initial configuration integration:\n"; +// for (int isec = 0; isec < 2; ++isec) { +// std::cout << "Sector: " << isec << '\n'; +// for (int ic = 0; ic < order(); ++ic) { +// auto aux_spin = configuration_.getSector(isec).getAuxFieldType(ic); +// auto left_b = configuration_.getSector(isec).getLeftB(ic); +// auto alpha_left = get_dmatrix_builder().computeAlpha(aux_spin, left_b); +// std::cout << "vertex: " << std::setw(6) << ic; +// std::cout << " | aux spin: " << aux_spin << " | left B: " << left_b +// << " | alpha left = " << alpha_left << '\n'; +// } +// } +// } - // Recompute the Monte Carlo weight. - setMFromConfig(); -#ifndef NDEBUG - //writeAlphas(); -#endif -} template void CtintWalkerBase::updateShell(int meas_id, int meas_to_do) const { @@ -383,25 +328,6 @@ void CtintWalkerBase::printSummary() const { std::cout << std::endl; } -template -template -void CtintWalkerBase::setDMatrixBuilder( - const dca::phys::solver::G0Interpolation& g0) { - using RDmn = typename Parameters::RClusterDmn; - - if (d_builder_ptr_) - std::cerr << "Warning: DMatrixBuilder already set." << std::endl; - - d_builder_ptr_ = std::make_unique>(g0, n_bands_, RDmn()); -} - -template -void CtintWalkerBase::setDMatrixAlpha(const std::array& alphas, - bool adjust_dd) { - assert(d_builder_ptr_); - d_builder_ptr_->setAlphas(alphas, adjust_dd); -} - template void CtintWalkerBase::setInteractionVertices(const Data& data, const Parameters& parameters) { @@ -419,6 +345,78 @@ void CtintWalkerBase::computeM(MatrixPair& m_accum) const { m_accum = M_; } +// template +// void setMFromConfigHelper(WALKER& walker, DMatrixBuilder& d_matrix_builder) { +// walker.mc_log_weight_ = 0.; +// walker.phase_.reset(); + +// for (int s = 0; s < 2; ++s) { +// // compute Mij = g0(t_i,t_j) - I* alpha(s_i) + +// const auto& sector = walker.configuration_.getSector(s); +// auto& M = walker.M_[s]; +// const int n = sector.size(); +// M.resize(n); +// if (!n) +// continue; +// for (int j = 0; j < n; ++j) +// for (int i = 0; i < n; ++i) +// M(i, j) = d_matrix_builder.computeD(i, j, sector); + +// if (M.nrRows()) { +// const auto [log_det, phase] = linalg::matrixop::inverseAndLogDeterminant(M); + +// walker.mc_log_weight_ -= log_det; // Weight proportional to det(M^{-1}) +// walker.phase_.divide(phase); +// } +// } + +// // So what is going on here. +// for (int i = 0; i < walker.configuration_.size(); ++i) { +// // This is actual interaction strength of the vertex i.e H_int(nu1, nu2, delta_r) +// const typename decltype(walker)::Real term = -walker.configuration_.getStrength(i); +// walker.mc_log_weight_ += std::log(std::abs(term)); +// walker.phase_.multiply(term); +// } +// } + + +template +template +void CtintWalkerBase::setMFromConfigImpl(DMatrixBuilder& d_matrix_builder) { + mc_log_weight_ = 0.; + phase_.reset(); + + for (int s = 0; s < 2; ++s) { + // compute Mij = g0(t_i,t_j) - I* alpha(s_i) + + const auto& sector = configuration_.getSector(s); + auto& M = M_[s]; + const int n = sector.size(); + M.resize(n); + if (!n) + continue; + for (int j = 0; j < n; ++j) + for (int i = 0; i < n; ++i) + M(i, j) = d_matrix_builder.computeD(i, j, sector); + + if (M.nrRows()) { + const auto [log_det, phase] = linalg::matrixop::inverseAndLogDeterminant(M); + + mc_log_weight_ -= log_det; // Weight proportional to det(M^{-1}) + phase_.divide(phase); + } + } + + // So what is going on here. + for (int i = 0; i < configuration_.size(); ++i) { + // This is actual interaction strength of the vertex i.e H_int(nu1, nu2, delta_r) + const Real term = -configuration_.getStrength(i); + mc_log_weight_ += std::log(std::abs(term)); + phase_.multiply(term); + } +} + } // namespace ctint } // namespace solver } // namespace phys diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu.hpp index b6c3fe687..695ee4961 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu.hpp @@ -49,13 +49,16 @@ class CtintWalker : public CtintWalkerBase; using ConstView = typename linalg::MatrixView; + using Resource = DMatrixBuilder; + public: - CtintWalker(const Parameters& pars_ref, const Data& /*data*/, Rng& rng_ref, int id = 0); + CtintWalker(const Parameters& pars_ref, const Data& /*data*/, Rng& rng_ref, DMatrixBuilder& d_matrix_builder, int id = 0); - void doSweep(); + void doSweep() override; + void markThermalized() override; protected: - void doStep(); + void doStep() override; /** try to insertVertex, if accepted do it. * on accept applyInsertion is called. */ @@ -72,6 +75,7 @@ class CtintWalker : public CtintWalkerBase : public CtintWalkerBase& d_matrix_builder_; using BaseClass::total_interaction_; using BaseClass::beta_; using BaseClass::phase_; @@ -98,11 +102,16 @@ class CtintWalker : public CtintWalkerBase det_ratio_; - + using BaseClass::sweeps_per_meas_; + using BaseClass::partial_order_avg_; + using BaseClass::thermalization_steps_; + using BaseClass::order_avg_; + using BaseClass::sign_avg_; + using BaseClass::n_steps_; + using BaseClass::mc_log_weight_; private: std::array, 2> S_, Q_, R_; // work spaces @@ -121,14 +130,33 @@ class CtintWalker : public CtintWalkerBase CtintWalker::CtintWalker(const Parameters& parameters_ref, - const Data& /*data*/, Rng& rng_ref, int id) + const Data& /*data*/, Rng& rng_ref, DMatrixBuilder& d_matrix_builder, int id) : BaseClass(parameters_ref, rng_ref, id), det_ratio_{1, 1}, + d_matrix_builder_(d_matrix_builder), // If we perform double updates, we need at most 3 rng values for: selecting the first vertex, // deciding if we select a second one, select the second vertex. Otherwise only the first is // needed. n_removal_rngs_(configuration_.getDoubleUpdateProb() ? 3 : 1) {} +template +void CtintWalker::markThermalized() { + thermalized_ = true; + + nb_steps_per_sweep_ = std::max(1., std::ceil(sweeps_per_meas_ * partial_order_avg_.mean())); + thermalization_steps_ = n_steps_; + + order_avg_.reset(); + sign_avg_.reset(); + n_accepted_ = 0; + + // Recompute the Monte Carlo weight. + setMFromConfig(); +#ifndef NDEBUG + //writeAlphas(); +#endif +} + template void CtintWalker::doSweep() { int nb_of_steps; @@ -172,7 +200,7 @@ bool CtintWalker::tryVertexInsert() { const int delta_vertices = configuration_.lastInsertionSize(); // Compute the new pieces of the D(= M^-1) matrix. - d_builder_ptr_->buildSQR(S_, Q_, R_, configuration_); + d_matrix_builder_.buildSQR(S_, Q_, R_, configuration_); acceptance_prob_ = insertionProbability(delta_vertices); @@ -434,6 +462,12 @@ void CtintWalker::initializeStep() { } } +template +void CtintWalker::setMFromConfig() { + BaseClass::setMFromConfigImpl(d_matrix_builder_); +} + + } // namespace ctint } // namespace solver } // namespace phys diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp index 1763ebaaf..861df5cb6 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp @@ -31,6 +31,7 @@ #include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/interpolation/g0_interpolation.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/walker_methods.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp" #include "dca/phys/parameters/parameters.hpp" #include "dca/util/integer_division.hpp" @@ -40,9 +41,10 @@ namespace solver { namespace ctint { template -class CtintWalkerSubmatrixCpu : public CtintWalkerBase { +class CtintWalkerSubmatrixCpu : public CtintWalkerSubmatrixBase { public: using this_type = CtintWalkerSubmatrixCpu; + using SubmatrixBase = CtintWalkerSubmatrixBase; using BaseClass = CtintWalkerBase; using typename BaseClass::Rng; using typename BaseClass::Data; @@ -51,21 +53,19 @@ class CtintWalkerSubmatrixCpu : public CtintWalkerBase { using typename BaseClass::Real; using typename BaseClass::Scalar; - CtintWalkerSubmatrixCpu(const Parameters& pars_ref, const Data& /*data*/, Rng& rng_ref, int id = 0); + using Resource = DMatrixBuilder; - virtual ~CtintWalkerSubmatrixCpu() = default; + CtintWalkerSubmatrixCpu(const Parameters& pars_ref, const Data& /*data*/, Rng& rng_ref, DMatrixBuilder& d_matrix_builder, int id = 0); - virtual void doSweep(); + virtual ~CtintWalkerSubmatrixCpu() = default; void computeM(typename BaseClass::MatrixPair& m_accum); using BaseClass::order; + void setMFromConfig() override; protected: - virtual void setMFromConfig() override; - void doSteps(); - void generateDelayedMoves(int nbr_of_movesto_delay); /** This does a bunch of things, this is the majority of a step @@ -84,15 +84,14 @@ class CtintWalkerSubmatrixCpu : public CtintWalkerBase { */ void mainSubmatrixProcess(); - void updateM(); + void markThermalized() override; - void transformM(); + void updateM() override; - // For testing purposes. - void doStep(const int nbr_of_movesto_delay); + void transformM(); + DMatrixBuilder& d_matrix_builder_; private: - virtual void doStep(); void doSubmatrixUpdate(); @@ -104,10 +103,10 @@ class CtintWalkerSubmatrixCpu : public CtintWalkerBase { void removeRowAndColOfGammaInv(); - void computeMInit(); + void computeMInit() override; // void computeG0Init(); - void computeGInit(); + void computeGInit() override; Move generateMoveType(); @@ -121,11 +120,6 @@ class CtintWalkerSubmatrixCpu : public CtintWalkerBase { void recomputeGammaInv(); - bool recentlyAdded(int move_idx, int s) const { - assert(move_idx < sector_indices_[s].size()); - return sector_indices_[s][move_idx] >= n_init_[s]; - } - protected: using MatrixView = linalg::MatrixView; using Matrix = linalg::Matrix; @@ -134,7 +128,6 @@ class CtintWalkerSubmatrixCpu : public CtintWalkerBase { using BaseClass::configuration_; using BaseClass::rng_; using BaseClass::thread_id_; - using BaseClass::d_builder_ptr_; using BaseClass::total_interaction_; using BaseClass::phase_; using BaseClass::M_; @@ -142,438 +135,98 @@ class CtintWalkerSubmatrixCpu : public CtintWalkerBase { using BaseClass::beta_; using BaseClass::thermalized_; + using BaseClass::sweeps_per_meas_; + using BaseClass::partial_order_avg_; + using BaseClass::thermalization_steps_; + using BaseClass::order_avg_; + using BaseClass::sign_avg_; + using BaseClass::n_steps_; + using BaseClass::mc_log_weight_; + using BaseClass::n_accepted_; protected: - int max_submatrix_size_; - - struct DelayedMoveType { - Move move_type; - std::array removal_rng{1., 1., 1.}; - Real acceptance_rng; - std::array indices{-1, -1}; - }; - + using SubmatrixBase::max_submatrix_size_; + + using typename SubmatrixBase::DelayedMoveType; + using SubmatrixBase::f_; + using SubmatrixBase::gamma_values_; + using SubmatrixBase::prob_const_; + using SubmatrixBase::nb_steps_per_sweep_; + using SubmatrixBase::n_max_; + using SubmatrixBase::n_init_; + using SubmatrixBase::D_; + using SubmatrixBase::G_; + using SubmatrixBase::M_; + using SubmatrixBase::s_; + using SubmatrixBase::Gamma_indices_; + using SubmatrixBase::Gamma_inv_; + using SubmatrixBase::Gamma_inv_cpy_; + using SubmatrixBase::Gamma_q_; + using SubmatrixBase::workspace_; + using SubmatrixBase::r_; + using SubmatrixBase::move_indices_; + using SubmatrixBase::gamma_; + using SubmatrixBase::source_list_; + using SubmatrixBase::removal_list_; + using SubmatrixBase::conf_removal_list_; + using SubmatrixBase::sector_indices_; + using SubmatrixBase::index_; + using SubmatrixBase::nbr_of_indices_; + using SubmatrixBase::q_; + using SubmatrixBase::det_ratio_; protected: using BaseClass::acceptance_prob_; protected: - static constexpr Scalar the_one_ = dca::util::TheOne::value; - - std::vector delayed_moves_; - - using MatrixPair = std::array, 2>; - MatrixPair G_; - MatrixPair G0_; - MatrixPair Gamma_inv_; // TODO: don't pin - MatrixPair Gamma_inv_cpy_; // TODO: don't pin - MatrixPair q_; // TODO: don't pin or store in Gamma inv - MatrixPair r_; - MatrixPair s_; - std::array, 2> gamma_; - - Scalar det_ratio_; - std::map> f_; - std::map> prob_const_; - std::map, std::array> gamma_values_; - - using BaseClass::nb_steps_per_sweep_; - int nbr_of_steps_; - int nbr_of_moves_to_delay_; - int max_nbr_of_moves; - - std::array Gamma_size_; - std::array, 2> Gamma_indices_; - std::array, 2> sector_indices_; - - const DelayedMoveType* current_move_; - - std::array nbr_of_indices_; - - std::vector index_; - - // Initial configuration size. - unsigned config_size_init_; - - // Initial and current sector sizes. - std::array n_init_; - unsigned n_; - - // Maximal sector size after submatrix update. - - std::array n_max_; - - std::array, 2> move_indices_; - std::array, 2> removal_list_; - std::array, 2> source_list_; - std::array, 2> insertion_list_; - std::array, 2> insertion_Gamma_indices_; - - std::vector conf_removal_list_; - - std::vector::iterator insertion_list_it_; - - std::array Gamma_q_; - Matrix workspace_; - Matrix D_; using BaseClass::flop_; }; template -CtintWalkerSubmatrixCpu::CtintWalkerSubmatrixCpu(const Parameters& parameters_ref, - const Data& /*data*/, - Rng& rng_ref, int id) - : BaseClass(parameters_ref, rng_ref, id) { - for (int b = 0; b < n_bands_; ++b) { +CtintWalkerSubmatrixCpu::CtintWalkerSubmatrixCpu( + const Parameters& parameters_ref, const Data& data, Rng& rng_ref, + DMatrixBuilder& d_matrix_builder, int id) + : SubmatrixBase(parameters_ref, data, rng_ref, id), d_matrix_builder_(d_matrix_builder) { + + for (int b = 0; b < n_bands_; ++b) { for (int i = 1; i <= 3; ++i) { - f_[i][b] = d_builder_ptr_->computeF(i, b); - f_[-i][b] = d_builder_ptr_->computeF(-i, b); + f_[i][b] = d_matrix_builder_.computeF(i, b); + f_[-i][b] = d_matrix_builder_.computeF(-i, b); - gamma_values_[std::make_pair(0, i)][b] = d_builder_ptr_->computeGamma(0, i, b); - gamma_values_[std::make_pair(0, -i)][b] = d_builder_ptr_->computeGamma(0, -i, b); - gamma_values_[std::make_pair(i, 0)][b] = d_builder_ptr_->computeGamma(i, 0, b); - gamma_values_[std::make_pair(-i, 0)][b] = d_builder_ptr_->computeGamma(-i, 0, b); + gamma_values_[std::make_pair(0, i)][b] = d_matrix_builder_.computeGamma(0, i, b); + gamma_values_[std::make_pair(0, -i)][b] = d_matrix_builder_.computeGamma(0, -i, b); + gamma_values_[std::make_pair(i, 0)][b] = d_matrix_builder_.computeGamma(i, 0, b); + gamma_values_[std::make_pair(-i, 0)][b] = d_matrix_builder_.computeGamma(-i, 0, b); prob_const_[i][b] = prob_const_[-i][b] = -1. / (f_[i][b] - 1) / (f_[-i][b] - 1); } f_[0][b] = 1; } - if (BaseClass::concurrency_.id() == BaseClass::concurrency_.first() && thread_id_ == 0) - std::cout << "\nCT-INT submatrix walker created." << std::endl; } template void CtintWalkerSubmatrixCpu::setMFromConfig() { - BaseClass::setMFromConfig(); + BaseClass::setMFromConfigImpl(d_matrix_builder_); transformM(); } template -void CtintWalkerSubmatrixCpu::doSweep() { - Profiler profiler(__FUNCTION__, "CT-INT walker", __LINE__, thread_id_); - doSteps(); -} - -template -void CtintWalkerSubmatrixCpu::doSteps() { - // Here nbr_of_steps is the number of single steps/moves during the current sweep, - // while nbr_of_submatrix_steps is the number of times the entire submatrix algorithm is run. - - if (nb_steps_per_sweep_ < 0) // Not thermalized or fixed. - nbr_of_steps_ = BaseClass::avgOrder() + 1; - else - nbr_of_steps_ = nb_steps_per_sweep_; - - // Get the maximum of Monte Carlo steps/moves that can be performed during one submatrix step. - max_nbr_of_moves = parameters_.getMaxSubmatrixSize(); - - BaseClass::n_steps_ += nbr_of_steps_; - while (nbr_of_steps_ > 0) { - nbr_of_moves_to_delay_ = std::min(nbr_of_steps_, max_nbr_of_moves); - nbr_of_steps_ -= nbr_of_moves_to_delay_; - - doStep(); - } - - BaseClass::updateSweepAverages(); -} - -template -void CtintWalkerSubmatrixCpu::doStep() { - generateDelayedMoves(nbr_of_moves_to_delay_); - doSubmatrixUpdate(); -} - -// Do one step with arbitrary number of moves. For testing. -template -void CtintWalkerSubmatrixCpu::doStep(const int nbr_of_movesto_delay) { - std::cout << "\nStarted doStep() function for testing." << std::endl; - - generateDelayedMoves(nbr_of_movesto_delay); - - std::cout << "\nGenerated " << nbr_of_movesto_delay << " moves for testing.\n" << std::endl; - - doSubmatrixUpdate(); -} - -template -void CtintWalkerSubmatrixCpu::generateDelayedMoves(const int nbr_of_movesto_delay) { - assert(nbr_of_movesto_delay > 0); - - delayed_moves_.clear(); - - for (int s = 0; s < 2; ++s) { - n_init_[s] = configuration_.getSector(s).size(); - } - n_ = config_size_init_ = configuration_.size(); - - // Generate delayed moves. - for (int i = 0; i < nbr_of_movesto_delay; ++i) { - DelayedMoveType delayed_move; - - delayed_move.move_type = generateMoveType(); - - switch (delayed_move.move_type) { - case REMOVAL: - if (configuration_.getDoubleUpdateProb()) - delayed_move.removal_rng = {rng_(), rng_(), rng_()}; - else - delayed_move.removal_rng[0] = rng_(); - break; - - case INSERTION: - configuration_.insertRandom(rng_); - for (int i = 0; i < configuration_.lastInsertionSize(); ++i) - delayed_move.indices[i] = configuration_.size() - configuration_.lastInsertionSize() + i; - break; - - default: - throw(std::logic_error("Unknown move type encountered.")); - } - - delayed_move.acceptance_rng = rng_(); - delayed_moves_.push_back(delayed_move); - } - - for (int s = 0; s < 2; ++s) { - n_max_[s] = configuration_.getSector(s).size(); - } - - const int config_size_final = configuration_.size(); - conf_removal_list_.resize(config_size_final - config_size_init_); - std::iota(conf_removal_list_.begin(), conf_removal_list_.end(), config_size_init_); -} - -template -void CtintWalkerSubmatrixCpu::doSubmatrixUpdate() { - computeMInit(); - computeGInit(); - mainSubmatrixProcess(); - updateM(); -} - -template -void CtintWalkerSubmatrixCpu::mainSubmatrixProcess() { - Profiler profiler(__FUNCTION__, "CT-INT walker", __LINE__, thread_id_); - - for (int s = 0; s < 2; ++s) { - Gamma_inv_[s].resizeNoCopy(0); - gamma_[s].clear(); - - move_indices_[s].clear(); - insertion_list_[s].clear(); - insertion_Gamma_indices_[s].clear(); - } - - std::vector aux_spin_type, new_aux_spin_type, move_band; - - acceptance_prob_ = 1.0; - for (int delay_ind = 0; delay_ind < delayed_moves_.size(); ++delay_ind) { - current_move_ = &delayed_moves_[delay_ind]; - const auto move_type = current_move_->move_type; - - det_ratio_ = 1.; - std::array indices_array; - - if (move_type == INSERTION) { - indices_array = current_move_->indices; - } - else { // move_type == REMOVAL - indices_array = configuration_.randomRemovalCandidate(delayed_moves_[delay_ind].removal_rng); - } - - index_.clear(); - for (auto idx : indices_array) { - if (idx >= 0) - index_.push_back(idx); - } - - // This leads to a bunch of complex branchy and premature looking optimization - // The branch predictor must be weeping - bool at_least_one_recently_added = false; - bool all_recently_added = false; - if (move_type == REMOVAL) { - // Check if the vertex to remove was inserted during the current submatrix update. - const auto recently_added = [=](int id) { return id >= config_size_init_; }; - all_recently_added = math::util::all(index_, recently_added); - at_least_one_recently_added = math::util::any(index_, recently_added); - } +void CtintWalkerSubmatrixCpu::markThermalized() { + thermalized_ = true; - for (int s = 0; s < 2; ++s) { - Gamma_indices_[s].clear(); - new_aux_spin_type.clear(); - aux_spin_type.clear(); - move_band.clear(); + nb_steps_per_sweep_ = std::max(1., std::ceil(sweeps_per_meas_ * partial_order_avg_.mean())); + thermalization_steps_ = n_steps_; - findSectorIndices(s); + order_avg_.reset(); + sign_avg_.reset(); + n_accepted_ = 0; - if (move_type == INSERTION) { - for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { - new_aux_spin_type.push_back( - configuration_.getSector(s).getAuxFieldType(sector_indices_[s][ind])); - aux_spin_type.push_back(0); - } - } - else if (move_type == REMOVAL) { - for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { - new_aux_spin_type.push_back(0); - aux_spin_type.push_back( - configuration_.getSector(s).getAuxFieldType(sector_indices_[s][ind])); - - if (recentlyAdded(ind, s)) { - // Find pre-existing Gamma_indices_. - insertion_list_it_ = std::find(insertion_list_[s].begin(), insertion_list_[s].end(), - sector_indices_[s][ind]); - Gamma_indices_[s].push_back(insertion_Gamma_indices_[s][std::distance( - insertion_list_[s].begin(), insertion_list_it_)]); - } - } - } // endif removal - for (int index : sector_indices_[s]) - move_band.push_back(configuration_.getSector(s).getLeftB(index)); - - for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { - if (move_type == INSERTION || !recentlyAdded(ind, s)) - gamma_[s].push_back( - gamma_values_[std::make_pair(aux_spin_type[ind], new_aux_spin_type[ind])][move_band[ind]]); - else { - // TODO: find instead of adding. - gamma_[s].push_back( - gamma_values_[std::make_pair(new_aux_spin_type[ind], aux_spin_type[ind])][move_band[ind]]); - } - } - - if (!at_least_one_recently_added) - computeInsertionMatrices(sector_indices_[s], s); - else { - if (all_recently_added) - computeRemovalMatrix(s); - else - computeMixedInsertionAndRemoval(s); - } - - det_ratio_ *= details::smallDeterminant(s_[s]); - } // s loop. - - if (n_ == 0 && move_type == REMOVAL) - continue; - - // Compute acceptance probability. - auto [acceptance_prob, mc_weight_ratio] = computeAcceptanceProbability(); - acceptance_prob_ = acceptance_prob; - - // Note: acceptance and rejection can be forced for testing with the appropriate "acceptance_rng". - const bool accepted = - delayed_moves_[delay_ind].acceptance_rng < std::min(std::abs(acceptance_prob_), Real(1.)); - - // NB: recomputeGammaInv is just a inefficient alternative to updateGammaInv. Only for testing - // or debbuging. - // recomputeGammaInv(); - - // Update - if (accepted) { - ++BaseClass::n_accepted_; - - BaseClass::mc_log_weight_ += std::log(std::abs(mc_weight_ratio)); - - // Are we capturing the avg sign properly wrt multiple delayed moves - phase_.multiply(acceptance_prob_); - - // Update GammaInv if necessary. - if (!at_least_one_recently_added) - for (int s = 0; s < 2; ++s) - updateGammaInv(s); - else { - removeRowAndColOfGammaInv(); - for (int s = 0; s < 2; ++s) { - for (int ind = sector_indices_[s].size() - 1; ind >= 0; --ind) { - if (!recentlyAdded(ind, s)) - continue; - insertion_list_[s].erase(std::remove(insertion_list_[s].begin(), - insertion_list_[s].end(), sector_indices_[s][ind]), - insertion_list_[s].end()); - } - for (int ind = Gamma_indices_[s].size() - 1; ind >= 0; --ind) { - insertion_Gamma_indices_[s].erase(std::remove(insertion_Gamma_indices_[s].begin(), - insertion_Gamma_indices_[s].end(), - Gamma_indices_[s][ind]), - insertion_Gamma_indices_[s].end()); - gamma_[s].erase(gamma_[s].begin() + Gamma_indices_[s][ind]); - - for (int i = 0; i < insertion_Gamma_indices_[s].size(); ++i) { - if (insertion_Gamma_indices_[s][i] > Gamma_indices_[s][ind]) - --insertion_Gamma_indices_[s][i]; - } - } - } - } - - for (int s = 0; s < 2; ++s) { - for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { - if (move_type == INSERTION || !recentlyAdded(ind, s)) { - move_indices_[s].push_back(sector_indices_[s][ind]); - } - else { - gamma_[s].pop_back(); - move_indices_[s].erase(std::remove(move_indices_[s].begin(), move_indices_[s].end(), - sector_indices_[s][ind]), - move_indices_[s].end()); - } - } - } - - if (move_type == INSERTION) { - n_ += index_.size(); - for (auto idx : index_) - configuration_.commitInsertion(idx); - - for (int s = 0; s < 2; ++s) { - for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { - insertion_list_[s].push_back(sector_indices_[s][ind]); - insertion_Gamma_indices_[s].push_back(Gamma_inv_[s].nrRows() - nbr_of_indices_[s] + ind); - } - } - - // TODO: cleanup - for (int idx : index_) - conf_removal_list_.erase( - std::remove(conf_removal_list_.begin(), conf_removal_list_.end(), idx), - conf_removal_list_.end()); - } - else if (move_type == REMOVAL) { - n_ -= index_.size(); - for (auto idx : index_) - configuration_.markForRemoval(idx); - - // TODO: cleanup. - for (int idx : index_) - conf_removal_list_.push_back(idx); - } - } - else { // The move is rejected: - for (int s = 0; s < 2; ++s) { - for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) - gamma_[s].pop_back(); - } - if (at_least_one_recently_added && !all_recently_added) - for (int s = 0; s < 2; ++s) { - Gamma_inv_[s].swap(Gamma_inv_cpy_[s]); - } - } - } - - // This seems correct in that we count all steps - // just as is done for the non submatrix cpu version - BaseClass::n_steps_ += delayed_moves_.size(); -} - -template -Move CtintWalkerSubmatrixCpu::generateMoveType() { - if (rng_() <= 0.5) - return INSERTION; - else - return REMOVAL; + // Recompute the Monte Carlo weight. + setMFromConfig(); +#ifndef NDEBUG + //writeAlphas(); +#endif } // Extend M by adding non-interacting vertices. @@ -589,7 +242,7 @@ void CtintWalkerSubmatrixCpu::computeMInit() { Scalar f_j; D_.resize(std::make_pair(delta, n_init_[s])); - d_builder_ptr_->computeG0(D_, configuration_.getSector(s), n_init_[s], n_max_[s], 0); + d_matrix_builder_.computeG0(D_, configuration_.getSector(s), n_init_[s], n_max_[s], 0); for (int j = 0; j < n_init_[s]; ++j) { const auto field_type = configuration_.getSector(s).getAuxFieldType(j); @@ -646,7 +299,7 @@ void CtintWalkerSubmatrixCpu::computeGInit() { } if (delta > 0) { - d_builder_ptr_->computeG0(G0, configuration_.getSector(s), n_init_[s], n_max_[s], 1); + d_matrix_builder_.computeG0(G0, configuration_.getSector(s), n_init_[s], n_max_[s], 1); MatrixView G(G_[s], 0, n_init_[s], n_max_[s], delta); @@ -656,76 +309,6 @@ void CtintWalkerSubmatrixCpu::computeGInit() { } } -template -auto CtintWalkerSubmatrixCpu::computeAcceptanceProbability() { - Scalar acceptance_probability = det_ratio_; - - Scalar gamma_factor = the_one_; - const auto move_type = current_move_->move_type; - for (int s = 0; s < 2; ++s) { - if (!sector_indices_[s].size()) - continue; - - for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { - if (move_type == INSERTION || !recentlyAdded(ind, s)) - gamma_factor *= gamma_[s].at(gamma_[s].size() - nbr_of_indices_[s] + ind); - else - gamma_factor /= gamma_[s].at(gamma_[s].size() - nbr_of_indices_[s] + ind); - } - } - acceptance_probability *= gamma_factor; - - const int delta_vertices = index_.size(); - const int interaction_sign = configuration_.getSign(index_[0]); - const int non_empty_sector = sector_indices_[0].size() ? 0 : 1; - - Scalar mc_weight_ratio = acceptance_probability; - Real K = total_interaction_; - - for (int v_id = 0; v_id < delta_vertices; ++v_id) { - const auto field_type = configuration_.getSector(non_empty_sector) - .getAuxFieldType(sector_indices_[non_empty_sector][v_id]); - const auto b = - configuration_.getSector(non_empty_sector).getLeftB(sector_indices_[non_empty_sector][v_id]); - K *= beta_ * prob_const_[field_type][b] * interaction_sign; - - const Scalar weight_term = prob_const_[field_type][b] * configuration_.getStrength(index_[v_id]); - if (move_type == INSERTION) - mc_weight_ratio *= weight_term; - else - mc_weight_ratio /= weight_term; - } - - // Account for combinatorial factor and update acceptance probability. - if (move_type == INSERTION) { - if (delta_vertices == 1) { - acceptance_probability *= K / (n_ + 1); - } - else if (delta_vertices == 2) { - const Real possible_partners = configuration_.possiblePartners(index_[0]); - const Real combinatorial_factor = - (n_ + 2) * (configuration_.nPartners(index_[0]) + 1) / possible_partners; - acceptance_probability *= configuration_.getStrength(index_[1]) * K / combinatorial_factor; - } - else - throw(std::logic_error("Not implemented")); - } - else if (move_type == REMOVAL) { - if (delta_vertices == 1) { - acceptance_probability *= n_ / K; - } - else if (delta_vertices == 2) { - const Real possible_partners = configuration_.possiblePartners(index_[0]); - const Real combinatorial_factor = n_ * configuration_.nPartners(index_[0]) / possible_partners; - acceptance_probability *= combinatorial_factor / (configuration_.getStrength(index_[1]) * K); - } - else - throw(std::logic_error("Not implemented")); - } - - return std::make_tuple(acceptance_probability, mc_weight_ratio); -} - template void CtintWalkerSubmatrixCpu::updateGammaInv(int s) { Profiler profiler(__FUNCTION__, "CT-INT walker", __LINE__, thread_id_); @@ -997,7 +580,7 @@ void CtintWalkerSubmatrixCpu::computeMixedInsertionAndRemoval( // TODO: avoid reallocation. std::vector insertion_indices; for (int i = 0; i < nbr_of_indices_[s]; ++i) - if (!recentlyAdded(i, s)) + if (!SubmatrixBase::recentlyAdded(i, s)) insertion_indices.push_back(sector_indices_[s][i]); assert(Gamma_indices_[s].size() + insertion_indices.size() == sector_indices_.size()); diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp index 885bcebe4..e098599e6 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp @@ -25,6 +25,7 @@ #include #include "dca/linalg/util/gpu_event.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/structs/device_configuration_manager.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/walker/kernels_interface.hpp" @@ -35,11 +36,11 @@ namespace solver { namespace ctint { template -class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixCpu { +class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixBase { public: using this_type = CtintWalkerSubmatrixGpu; - using BaseClass = CtintWalkerSubmatrixCpu; - using RootClass = CtintWalkerBase; + using SubmatrixBase = CtintWalkerSubmatrixBase; + using BaseClass = CtintWalkerBase; using typename BaseClass::Data; using typename BaseClass::Profiler; @@ -48,6 +49,8 @@ class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixCpu using typename BaseClass::Real; using typename BaseClass::Scalar; + using Resource = DMatrixBuilder; + template using MatrixView = linalg::MatrixView; template @@ -58,7 +61,7 @@ class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixCpu constexpr static linalg::DeviceType device = linalg::GPU; CtintWalkerSubmatrixGpu(const Parameters& pars_ref, const Data& /*data_ref*/, Rng& rng_ref, - int id = 0); + DMatrixBuilder& d_matrix_builder, int id = 0); void computeM(MatrixPair& m_accum); @@ -67,16 +70,15 @@ class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixCpu void synchronize() const override; using BaseClass::order; - using RootClass::get_stream; + using BaseClass::get_stream; std::size_t deviceFingerprint() const override; + void setMFromConfig() override; protected: // For testing purposes: void doStep(int n_moves_to_delay); - void setMFromConfig() override; - private: void doStep() override; @@ -89,27 +91,38 @@ class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixCpu protected: using BaseClass::configuration_; using BaseClass::M_; + using BaseClass::sweeps_per_meas_; + using BaseClass::partial_order_avg_; + using BaseClass::thermalization_steps_; + using BaseClass::order_avg_; + using BaseClass::sign_avg_; + using BaseClass::n_steps_; + using BaseClass::mc_log_weight_; + using BaseClass::n_accepted_; + + private: using BaseClass::concurrency_; using BaseClass::thread_id_; - using BaseClass::removal_list_; - using BaseClass::source_list_; - using BaseClass::conf_removal_list_; - using RootClass::d_builder_ptr_; - using BaseClass::parameters_; - using BaseClass::rng_; - using BaseClass::Gamma_inv_; - using BaseClass::f_; + using SubmatrixBase::removal_list_; + using SubmatrixBase::source_list_; + using SubmatrixBase::conf_removal_list_; + using SubmatrixBase::parameters_; + using SubmatrixBase::rng_; + using SubmatrixBase::Gamma_inv_; + using SubmatrixBase::gamma_values_; + using SubmatrixBase::f_; // Initial and current sector sizes. - using BaseClass::n_init_; + using SubmatrixBase::n_init_; // Maximal sector size after submatrix update. - using BaseClass::n_max_; - using BaseClass::gamma_; - using BaseClass::G_; - using BaseClass::move_indices_; - using BaseClass::flop_; - + using SubmatrixBase::n_max_; + using SubmatrixBase::n_bands_; + using SubmatrixBase::gamma_; + using SubmatrixBase::G_; + using SubmatrixBase::move_indices_; + using SubmatrixBase::flop_; + using SubmatrixBase::prob_const_; DeviceConfigurationManager device_config_; std::array, 2> removal_list_dev_; @@ -129,19 +142,37 @@ class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixCpu std::array, linalg::GPU>, 2> gamma_index_dev_; std::array config_copied_; + + DMatrixBuilder& d_matrix_builder_; }; template CtintWalkerSubmatrixGpu::CtintWalkerSubmatrixGpu( - const Parameters& pars_ref, const Data& data, Rng& rng_ref, int id) - : BaseClass(pars_ref, data, rng_ref, id) { + const Parameters& pars_ref, const Data& data, Rng& rng_ref, + DMatrixBuilder& d_matrix_builder, int id) + : SubmatrixBase(pars_ref, data, rng_ref, id), d_matrix_builder_(d_matrix_builder) { + for (int b = 0; b < n_bands_; ++b) { + for (int i = 1; i <= 3; ++i) { + f_[i][b] = d_matrix_builder_.computeF(i, b); + f_[-i][b] = d_matrix_builder_.computeF(-i, b); + + gamma_values_[std::make_pair(0, i)][b] = d_matrix_builder_.computeGamma(0, i, b); + gamma_values_[std::make_pair(0, -i)][b] = d_matrix_builder_.computeGamma(0, -i, b); + gamma_values_[std::make_pair(i, 0)][b] = d_matrix_builder_.computeGamma(i, 0, b); + gamma_values_[std::make_pair(-i, 0)][b] = d_matrix_builder_.computeGamma(-i, 0, b); + + prob_const_[i][b] = prob_const_[-i][b] = -1. / (f_[i][b] - 1) / (f_[-i][b] - 1); + } + f_[0][b] = 1; + } + if (concurrency_.id() == concurrency_.first() && thread_id_ == 0) std::cout << "\nCT-INT submatrix walker extended to GPU." << std::endl; } - + template void CtintWalkerSubmatrixGpu::setMFromConfig() { - BaseClass::setMFromConfig(); + BaseClass::setMFromConfigImpl(d_matrix_builder_); for (int s = 0; s < 2; ++s) { M_dev_[s].setAsync(M_[s], get_stream(s)); } @@ -159,26 +190,26 @@ template void CtintWalkerSubmatrixGpu::doSweep() { Profiler profiler(__FUNCTION__, "CT-INT GPU walker", __LINE__, thread_id_); - BaseClass::doSteps(); + SubmatrixBase::doSteps(); uploadConfiguration(); } template void CtintWalkerSubmatrixGpu::doStep(const int n_moves_to_delay) { - BaseClass::nbr_of_moves_to_delay_ = n_moves_to_delay; + SubmatrixBase::nbr_of_moves_to_delay_ = n_moves_to_delay; doStep(); uploadConfiguration(); } template void CtintWalkerSubmatrixGpu::doStep() { - BaseClass::generateDelayedMoves(BaseClass::nbr_of_moves_to_delay_); + SubmatrixBase::generateDelayedMoves(SubmatrixBase::nbr_of_moves_to_delay_); uploadConfiguration(); computeMInit(); computeGInit(); synchronize(); - BaseClass::mainSubmatrixProcess(); + SubmatrixBase::mainSubmatrixProcess(); updateM(); } @@ -216,8 +247,8 @@ void CtintWalkerSubmatrixGpu::computeMInit() { const int delta = n_max_[s] - n_init_[s]; if (delta > 0) { D_dev_[s].resizeNoCopy(std::make_pair(delta, n_init_[s])); - d_builder_ptr_->computeG0(D_dev_[s], device_config_.getDeviceData(s), n_init_[s], false, - get_stream(s)); + d_matrix_builder_.computeG0(D_dev_[s], device_config_.getDeviceData(s), n_init_[s], false, + get_stream(s)); flop_ += D_dev_[s].nrCols() * D_dev_[s].nrRows() * 10; MatrixView D_view(D_dev_[s]); @@ -249,12 +280,12 @@ void CtintWalkerSubmatrixGpu::computeGInit() { const MatrixView M(M_dev_[s]); details::computeGLeft(G, M, f_dev.ptr(), n_init_[s], get_stream(s)); flop_ += n_init_[s] * n_max_[s] * 2; - + if (delta > 0) { G0_dev_[s].resizeNoCopy(std::make_pair(n_max_[s], delta)); // This doesn't do flops but the g0 interp data it uses does somewhere. - d_builder_ptr_->computeG0(G0_dev_[s], device_config_.getDeviceData(s), n_init_[s], true, - get_stream(s)); + d_matrix_builder_.computeG0(G0_dev_[s], device_config_.getDeviceData(s), n_init_[s], true, + get_stream(s)); flop_ += G0_dev_[s].nrCols() * G0_dev_[s].nrRows() * 10; MatrixView G(G_dev_[s], 0, n_init_[s], n_max_[s], delta); // compute G right. diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp new file mode 100644 index 000000000..3b2901aa6 --- /dev/null +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp @@ -0,0 +1,868 @@ +#ifndef DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_CTINT_WALKER_CTINT_WALKER_SUBMATRIX_BASE_HPP +#define DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_CTINT_WALKER_CTINT_WALKER_SUBMATRIX_BASE_HPP + +#include "dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +#include "dca/linalg/linalg.hpp" +#include "dca/math/util/vector_operations.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/structs/solver_configuration.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/walker/move.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp" +#include "dca/phys/dca_step/cluster_solver/shared_tools/interpolation/g0_interpolation.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/walker_methods.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/util/integer_division.hpp" + +namespace dca { +namespace phys { +namespace solver { +namespace ctint { + +template +class CtintWalkerSubmatrixBase : public CtintWalkerBase { +public: + using this_type = CtintWalkerSubmatrixBase; + using BaseClass = CtintWalkerBase; + using typename BaseClass::Rng; + using typename BaseClass::Data; + using typename BaseClass::Profiler; + using typename BaseClass::GpuStream; + using typename BaseClass::Real; + using typename BaseClass::Scalar; + + CtintWalkerSubmatrixBase(const Parameters& pars_ref, const Data& /*data*/, Rng& rng_ref, int id = 0); + + virtual ~CtintWalkerSubmatrixBase() = default; + + void doSweep() override; + + void computeM(typename BaseClass::MatrixPair& m_accum); + + using BaseClass::order; + + virtual void setMFromConfig() = 0; +protected: + virtual void doStep() override; + void doSteps(); + void generateDelayedMoves(int nbr_of_movesto_delay); + + /** This does a bunch of things, this is the majority of a step + * For each delayed_move it: + * * computes the acceptance prob + * * compares std::abs(acceptance prob) and random value + * * Does an update of the log_weight for a configuration + * mc_weight_ratio = acceptance_probability; + * For a new vertex + * mc_weight ratio *= weight_term + * For a removed vertes + * mc_weight_ratio /= weight_term + * with + * weight_term = prob_const_[field_type][b] * the vertex interaction strength; + * BaseClass::mc_log_weight_ += std::log(std::abs(mc_weight_ratio)); + */ + void mainSubmatrixProcess(); + + void markThermalized() override; + + void transformM(); + + // For testing purposes. + virtual void doStep(const int nbr_of_movesto_delay); + + virtual void updateM() = 0; + +private: + + void doSubmatrixUpdate(); + + /** returns [acceptance_probability , mc_weight_ratio ] + */ + auto computeAcceptanceProbability(); + + void updateGammaInv(int s); + + void removeRowAndColOfGammaInv(); + + virtual void computeMInit() = 0; + + // void computeG0Init(); + virtual void computeGInit() = 0; + + Move generateMoveType(); + + void computeInsertionMatrices(const std::vector& indices, const int s); + + void computeRemovalMatrix(int s); + + void computeMixedInsertionAndRemoval(int s); + + void findSectorIndices(const int s); + + void recomputeGammaInv(); + + bool recentlyAdded(int move_idx, int s) const { + assert(move_idx < sector_indices_[s].size()); + return sector_indices_[s][move_idx] >= n_init_[s]; + } + +protected: + using MatrixView = linalg::MatrixView; + using Matrix = linalg::Matrix; + + using BaseClass::parameters_; + using BaseClass::configuration_; + using BaseClass::rng_; + using BaseClass::thread_id_; + using BaseClass::total_interaction_; + using BaseClass::phase_; + using BaseClass::M_; + using BaseClass::n_bands_; + using BaseClass::beta_; + + using BaseClass::thermalized_; + using BaseClass::sweeps_per_meas_; + using BaseClass::partial_order_avg_; + using BaseClass::thermalization_steps_; + using BaseClass::order_avg_; + using BaseClass::sign_avg_; + using BaseClass::n_steps_; + using BaseClass::mc_log_weight_; + using BaseClass::n_accepted_; + +protected: + int max_submatrix_size_; + + struct DelayedMoveType { + Move move_type; + std::array removal_rng{1., 1., 1.}; + Real acceptance_rng; + std::array indices{-1, -1}; + }; + +protected: + using BaseClass::acceptance_prob_; + +protected: + static constexpr Scalar the_one_ = dca::util::TheOne::value; + + std::vector delayed_moves_; + + using MatrixPair = std::array, 2>; + MatrixPair G_; + MatrixPair G0_; + MatrixPair Gamma_inv_; // TODO: don't pin + MatrixPair Gamma_inv_cpy_; // TODO: don't pin + MatrixPair q_; // TODO: don't pin or store in Gamma inv + MatrixPair r_; + MatrixPair s_; + std::array, 2> gamma_; + + Scalar det_ratio_; + std::map> f_; + std::map> prob_const_; + std::map, std::array> gamma_values_; + + using BaseClass::nb_steps_per_sweep_; + int nbr_of_steps_; + int nbr_of_moves_to_delay_; + int max_nbr_of_moves; + + std::array Gamma_size_; + std::array, 2> Gamma_indices_; + std::array, 2> sector_indices_; + + const DelayedMoveType* current_move_; + + std::array nbr_of_indices_; + + std::vector index_; + + // Initial configuration size. + unsigned config_size_init_; + + // Initial and current sector sizes. + std::array n_init_; + unsigned n_; + + // Maximal sector size after submatrix update. + + std::array n_max_; + + std::array, 2> move_indices_; + std::array, 2> removal_list_; + std::array, 2> source_list_; + std::array, 2> insertion_list_; + std::array, 2> insertion_Gamma_indices_; + + std::vector conf_removal_list_; + + std::vector::iterator insertion_list_it_; + + std::array Gamma_q_; + Matrix workspace_; + Matrix D_; + + using BaseClass::flop_; +}; + +template +CtintWalkerSubmatrixBase::CtintWalkerSubmatrixBase(const Parameters& parameters_ref, + const Data& /*data*/, + Rng& rng_ref, + int id) + : BaseClass(parameters_ref, rng_ref, id) { + if (BaseClass::concurrency_.id() == BaseClass::concurrency_.first() && thread_id_ == 0) + std::cout << "\nCT-INT submatrix walker created." << std::endl; +} + +template +void CtintWalkerSubmatrixBase::markThermalized() { + thermalized_ = true; + + nb_steps_per_sweep_ = std::max(1., std::ceil(sweeps_per_meas_ * partial_order_avg_.mean())); + thermalization_steps_ = n_steps_; + + order_avg_.reset(); + sign_avg_.reset(); + n_accepted_ = 0; + + // Recompute the Monte Carlo weight. + setMFromConfig(); +#ifndef NDEBUG + //writeAlphas(); +#endif +} + +template +void CtintWalkerSubmatrixBase::doSweep() { + Profiler profiler(__FUNCTION__, "CT-INT walker", __LINE__, thread_id_); + doSteps(); +} + +template +void CtintWalkerSubmatrixBase::doSteps() { + // Here nbr_of_steps is the number of single steps/moves during the current sweep, + // while nbr_of_submatrix_steps is the number of times the entire submatrix algorithm is run. + + if (nb_steps_per_sweep_ < 0) // Not thermalized or fixed. + nbr_of_steps_ = BaseClass::avgOrder() + 1; + else + nbr_of_steps_ = nb_steps_per_sweep_; + + // Get the maximum of Monte Carlo steps/moves that can be performed during one submatrix step. + max_nbr_of_moves = parameters_.getMaxSubmatrixSize(); + + BaseClass::n_steps_ += nbr_of_steps_; + while (nbr_of_steps_ > 0) { + nbr_of_moves_to_delay_ = std::min(nbr_of_steps_, max_nbr_of_moves); + nbr_of_steps_ -= nbr_of_moves_to_delay_; + + doStep(); + } + + BaseClass::updateSweepAverages(); +} + +template +void CtintWalkerSubmatrixBase::doStep() { + generateDelayedMoves(nbr_of_moves_to_delay_); + doSubmatrixUpdate(); +} + +// Do one step with arbitrary number of moves. For testing. +template +void CtintWalkerSubmatrixBase::doStep(const int nbr_of_movesto_delay) { + std::cout << "\nStarted doStep() function for testing." << std::endl; + + generateDelayedMoves(nbr_of_movesto_delay); + + std::cout << "\nGenerated " << nbr_of_movesto_delay << " moves for testing.\n" << std::endl; + + doSubmatrixUpdate(); +} + +template +void CtintWalkerSubmatrixBase::generateDelayedMoves(const int nbr_of_movesto_delay) { + assert(nbr_of_movesto_delay > 0); + + delayed_moves_.clear(); + + for (int s = 0; s < 2; ++s) { + n_init_[s] = configuration_.getSector(s).size(); + } + n_ = config_size_init_ = configuration_.size(); + + // Generate delayed moves. + for (int i = 0; i < nbr_of_movesto_delay; ++i) { + DelayedMoveType delayed_move; + + delayed_move.move_type = generateMoveType(); + + switch (delayed_move.move_type) { + case REMOVAL: + if (configuration_.getDoubleUpdateProb()) + delayed_move.removal_rng = {rng_(), rng_(), rng_()}; + else + delayed_move.removal_rng[0] = rng_(); + break; + + case INSERTION: + configuration_.insertRandom(rng_); + for (int i = 0; i < configuration_.lastInsertionSize(); ++i) + delayed_move.indices[i] = configuration_.size() - configuration_.lastInsertionSize() + i; + break; + + default: + throw(std::logic_error("Unknown move type encountered.")); + } + + delayed_move.acceptance_rng = rng_(); + delayed_moves_.push_back(delayed_move); + } + + for (int s = 0; s < 2; ++s) { + n_max_[s] = configuration_.getSector(s).size(); + } + + const int config_size_final = configuration_.size(); + conf_removal_list_.resize(config_size_final - config_size_init_); + std::iota(conf_removal_list_.begin(), conf_removal_list_.end(), config_size_init_); +} + +template +void CtintWalkerSubmatrixBase::doSubmatrixUpdate() { + computeMInit(); + computeGInit(); + mainSubmatrixProcess(); + updateM(); +} + +template +void CtintWalkerSubmatrixBase::mainSubmatrixProcess() { + Profiler profiler(__FUNCTION__, "CT-INT walker", __LINE__, thread_id_); + + for (int s = 0; s < 2; ++s) { + Gamma_inv_[s].resizeNoCopy(0); + gamma_[s].clear(); + + move_indices_[s].clear(); + insertion_list_[s].clear(); + insertion_Gamma_indices_[s].clear(); + } + + std::vector aux_spin_type, new_aux_spin_type, move_band; + + acceptance_prob_ = 1.0; + for (int delay_ind = 0; delay_ind < delayed_moves_.size(); ++delay_ind) { + current_move_ = &delayed_moves_[delay_ind]; + const auto move_type = current_move_->move_type; + + det_ratio_ = 1.; + std::array indices_array; + + if (move_type == INSERTION) { + indices_array = current_move_->indices; + } + else { // move_type == REMOVAL + indices_array = configuration_.randomRemovalCandidate(delayed_moves_[delay_ind].removal_rng); + } + + index_.clear(); + for (auto idx : indices_array) { + if (idx >= 0) + index_.push_back(idx); + } + + // This leads to a bunch of complex branchy and premature looking optimization + // The branch predictor must be weeping + bool at_least_one_recently_added = false; + bool all_recently_added = false; + if (move_type == REMOVAL) { + // Check if the vertex to remove was inserted during the current submatrix update. + const auto recently_added = [=](int id) { return id >= config_size_init_; }; + all_recently_added = math::util::all(index_, recently_added); + at_least_one_recently_added = math::util::any(index_, recently_added); + } + + for (int s = 0; s < 2; ++s) { + Gamma_indices_[s].clear(); + new_aux_spin_type.clear(); + aux_spin_type.clear(); + move_band.clear(); + + findSectorIndices(s); + + if (move_type == INSERTION) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + new_aux_spin_type.push_back( + configuration_.getSector(s).getAuxFieldType(sector_indices_[s][ind])); + aux_spin_type.push_back(0); + } + } + else if (move_type == REMOVAL) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + new_aux_spin_type.push_back(0); + aux_spin_type.push_back( + configuration_.getSector(s).getAuxFieldType(sector_indices_[s][ind])); + + if (recentlyAdded(ind, s)) { + // Find pre-existing Gamma_indices_. + insertion_list_it_ = std::find(insertion_list_[s].begin(), insertion_list_[s].end(), + sector_indices_[s][ind]); + Gamma_indices_[s].push_back(insertion_Gamma_indices_[s][std::distance( + insertion_list_[s].begin(), insertion_list_it_)]); + } + } + } // endif removal + for (int index : sector_indices_[s]) + move_band.push_back(configuration_.getSector(s).getLeftB(index)); + + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + if (move_type == INSERTION || !recentlyAdded(ind, s)) + gamma_[s].push_back( + gamma_values_[std::make_pair(aux_spin_type[ind], new_aux_spin_type[ind])][move_band[ind]]); + else { + // TODO: find instead of adding. + gamma_[s].push_back( + gamma_values_[std::make_pair(new_aux_spin_type[ind], aux_spin_type[ind])][move_band[ind]]); + } + } + + if (!at_least_one_recently_added) + computeInsertionMatrices(sector_indices_[s], s); + else { + if (all_recently_added) + computeRemovalMatrix(s); + else + computeMixedInsertionAndRemoval(s); + } + + det_ratio_ *= details::smallDeterminant(s_[s]); + } // s loop. + + if (n_ == 0 && move_type == REMOVAL) + continue; + + // Compute acceptance probability. + auto [acceptance_prob, mc_weight_ratio] = computeAcceptanceProbability(); + acceptance_prob_ = acceptance_prob; + + // Note: acceptance and rejection can be forced for testing with the appropriate "acceptance_rng". + const bool accepted = + delayed_moves_[delay_ind].acceptance_rng < std::min(std::abs(acceptance_prob_), Real(1.)); + + // NB: recomputeGammaInv is just a inefficient alternative to updateGammaInv. Only for testing + // or debbuging. + // recomputeGammaInv(); + + // Update + if (accepted) { + ++BaseClass::n_accepted_; + + BaseClass::mc_log_weight_ += std::log(std::abs(mc_weight_ratio)); + + // Are we capturing the avg sign properly wrt multiple delayed moves + phase_.multiply(acceptance_prob_); + + // Update GammaInv if necessary. + if (!at_least_one_recently_added) + for (int s = 0; s < 2; ++s) + updateGammaInv(s); + else { + removeRowAndColOfGammaInv(); + for (int s = 0; s < 2; ++s) { + for (int ind = sector_indices_[s].size() - 1; ind >= 0; --ind) { + if (!recentlyAdded(ind, s)) + continue; + insertion_list_[s].erase(std::remove(insertion_list_[s].begin(), + insertion_list_[s].end(), sector_indices_[s][ind]), + insertion_list_[s].end()); + } + for (int ind = Gamma_indices_[s].size() - 1; ind >= 0; --ind) { + insertion_Gamma_indices_[s].erase(std::remove(insertion_Gamma_indices_[s].begin(), + insertion_Gamma_indices_[s].end(), + Gamma_indices_[s][ind]), + insertion_Gamma_indices_[s].end()); + gamma_[s].erase(gamma_[s].begin() + Gamma_indices_[s][ind]); + + for (int i = 0; i < insertion_Gamma_indices_[s].size(); ++i) { + if (insertion_Gamma_indices_[s][i] > Gamma_indices_[s][ind]) + --insertion_Gamma_indices_[s][i]; + } + } + } + } + + for (int s = 0; s < 2; ++s) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + if (move_type == INSERTION || !recentlyAdded(ind, s)) { + move_indices_[s].push_back(sector_indices_[s][ind]); + } + else { + gamma_[s].pop_back(); + move_indices_[s].erase(std::remove(move_indices_[s].begin(), move_indices_[s].end(), + sector_indices_[s][ind]), + move_indices_[s].end()); + } + } + } + + if (move_type == INSERTION) { + n_ += index_.size(); + for (auto idx : index_) + configuration_.commitInsertion(idx); + + for (int s = 0; s < 2; ++s) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + insertion_list_[s].push_back(sector_indices_[s][ind]); + insertion_Gamma_indices_[s].push_back(Gamma_inv_[s].nrRows() - nbr_of_indices_[s] + ind); + } + } + + // TODO: cleanup + for (int idx : index_) + conf_removal_list_.erase( + std::remove(conf_removal_list_.begin(), conf_removal_list_.end(), idx), + conf_removal_list_.end()); + } + else if (move_type == REMOVAL) { + n_ -= index_.size(); + for (auto idx : index_) + configuration_.markForRemoval(idx); + + // TODO: cleanup. + for (int idx : index_) + conf_removal_list_.push_back(idx); + } + } + else { // The move is rejected: + for (int s = 0; s < 2; ++s) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) + gamma_[s].pop_back(); + } + if (at_least_one_recently_added && !all_recently_added) + for (int s = 0; s < 2; ++s) { + Gamma_inv_[s].swap(Gamma_inv_cpy_[s]); + } + } + } + + // This seems correct in that we count all steps + // just as is done for the non submatrix cpu version + BaseClass::n_steps_ += delayed_moves_.size(); +} + +template +Move CtintWalkerSubmatrixBase::generateMoveType() { + if (rng_() <= 0.5) + return INSERTION; + else + return REMOVAL; +} + +template +auto CtintWalkerSubmatrixBase::computeAcceptanceProbability() { + Scalar acceptance_probability = det_ratio_; + + Scalar gamma_factor = the_one_; + const auto move_type = current_move_->move_type; + for (int s = 0; s < 2; ++s) { + if (!sector_indices_[s].size()) + continue; + + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + if (move_type == INSERTION || !recentlyAdded(ind, s)) + gamma_factor *= gamma_[s].at(gamma_[s].size() - nbr_of_indices_[s] + ind); + else + gamma_factor /= gamma_[s].at(gamma_[s].size() - nbr_of_indices_[s] + ind); + } + } + acceptance_probability *= gamma_factor; + + const int delta_vertices = index_.size(); + const int interaction_sign = configuration_.getSign(index_[0]); + const int non_empty_sector = sector_indices_[0].size() ? 0 : 1; + + Scalar mc_weight_ratio = acceptance_probability; + Real K = total_interaction_; + + for (int v_id = 0; v_id < delta_vertices; ++v_id) { + const auto field_type = configuration_.getSector(non_empty_sector) + .getAuxFieldType(sector_indices_[non_empty_sector][v_id]); + const auto b = + configuration_.getSector(non_empty_sector).getLeftB(sector_indices_[non_empty_sector][v_id]); + K *= beta_ * prob_const_[field_type][b] * interaction_sign; + + const Scalar weight_term = prob_const_[field_type][b] * configuration_.getStrength(index_[v_id]); + if (move_type == INSERTION) + mc_weight_ratio *= weight_term; + else + mc_weight_ratio /= weight_term; + } + + // Account for combinatorial factor and update acceptance probability. + if (move_type == INSERTION) { + if (delta_vertices == 1) { + acceptance_probability *= K / (n_ + 1); + } + else if (delta_vertices == 2) { + const Real possible_partners = configuration_.possiblePartners(index_[0]); + const Real combinatorial_factor = + (n_ + 2) * (configuration_.nPartners(index_[0]) + 1) / possible_partners; + acceptance_probability *= configuration_.getStrength(index_[1]) * K / combinatorial_factor; + } + else + throw(std::logic_error("Not implemented")); + } + else if (move_type == REMOVAL) { + if (delta_vertices == 1) { + acceptance_probability *= n_ / K; + } + else if (delta_vertices == 2) { + const Real possible_partners = configuration_.possiblePartners(index_[0]); + const Real combinatorial_factor = n_ * configuration_.nPartners(index_[0]) / possible_partners; + acceptance_probability *= combinatorial_factor / (configuration_.getStrength(index_[1]) * K); + } + else + throw(std::logic_error("Not implemented")); + } + + return std::make_tuple(acceptance_probability, mc_weight_ratio); +} + +template +void CtintWalkerSubmatrixBase::updateGammaInv(int s) { + Profiler profiler(__FUNCTION__, "CT-INT walker", __LINE__, thread_id_); + const int delta = s_[s].nrRows(); + if (delta == 0) + return; + const int old_size = Gamma_inv_[s].nrRows(); + Gamma_inv_[s].resize(old_size + delta); + + if (old_size > 0) { + MatrixView bulk(Gamma_inv_[s], 0, 0, old_size, old_size); + MatrixView q_inv(Gamma_inv_[s], 0, old_size, old_size, delta); + MatrixView r_inv(Gamma_inv_[s], old_size, 0, delta, old_size); + MatrixView s_inv(Gamma_inv_[s], old_size, old_size, delta, delta); + + details::smallInverse(s_[s], s_inv); + + auto& Gamma_q = Gamma_q_[s]; + linalg::matrixop::gemm(Scalar(-1.), Gamma_q, s_inv, Scalar(0.), q_inv); + + auto& r_Gamma = workspace_; + r_Gamma.resizeNoCopy(r_[s].size()); + linalg::matrixop::gemm(r_[s], bulk, r_Gamma); + linalg::matrixop::gemm(Scalar(-1.), s_inv, r_Gamma, Scalar(0.), r_inv); + + // Gamma_ += Gamma_ * q_ * s_^-1 * r_ * Gamma_ + linalg::matrixop::gemm(Scalar(-1.), q_inv, r_Gamma, Scalar(1.), bulk); + } + else { + Gamma_inv_[s].resizeNoCopy(delta); + details::smallInverse(s_[s], Gamma_inv_[s]); + } +} + +template +void CtintWalkerSubmatrixBase::findSectorIndices(const int s) { + sector_indices_[s].clear(); + for (auto index : index_) { + configuration_.findIndices(sector_indices_[s], index, s); + } + + if (index_.size() > 1) + std::sort(sector_indices_[s].begin(), sector_indices_[s].end()); + nbr_of_indices_[s] = sector_indices_[s].size(); +} + +// Remove row and column of Gamma_inv with Woodbury's formula. +// Gamma <- Gamma - U.V => GammaInv <- GammaInv + GammaInv.U.(Id-V.GammaInv.U)^(-1).V.GammaInv. +template +void CtintWalkerSubmatrixBase::removeRowAndColOfGammaInv() { + Profiler profiler(__FUNCTION__, "CT-INT walker", __LINE__, thread_id_); + for (int s = 0; s < 2; ++s) { + const int delta = Gamma_indices_[s].size(); + if (delta == 0) + continue; + const int n_init = Gamma_inv_[s].nrRows(); + const int n = n_init - delta; + + if (n) { + q_[s].resizeNoCopy(std::make_pair(n, delta)); + r_[s].resizeNoCopy(std::make_pair(delta, n)); + auto& q_s = workspace_; + q_s.resizeNoCopy(q_[s].size()); + + // TODO: check if gamma indices are ordered. + for (int j = 0; j < delta; ++j) { + int idx = 0; + for (int i = 0; i < n_init; ++i) { + if (idx == delta || i != Gamma_indices_[s][idx]) + q_[s](i - idx, j) = Gamma_inv_[s](i, Gamma_indices_[s][j]); + else + ++idx; + } + } + + int idx = 0; + for (int j = 0; j < n_init; ++j) { + if (idx == delta || j != Gamma_indices_[s][idx]) + for (int i = 0; i < delta; ++i) + r_[s](i, j - idx) = Gamma_inv_[s](Gamma_indices_[s][i], j); + else + ++idx; + } + + // TODO: this could be avoided in the case of pure removal. + s_[s].resizeNoCopy(delta); + for (int j = 0; j < delta; ++j) + for (int i = 0; i < delta; ++i) + s_[s](i, j) = Gamma_inv_[s](Gamma_indices_[s][i], Gamma_indices_[s][j]); + + for (int ind = delta - 1; ind >= 0; --ind) + linalg::matrixop::removeRowAndCol(Gamma_inv_[s], Gamma_indices_[s][ind]); + + details::smallInverse(s_[s]); + + linalg::matrixop::gemm(q_[s], s_[s], q_s); + + // Gamma_inv_ -= Q*S^-1*R + linalg::matrixop::gemm(Scalar(-1.), q_s, r_[s], Scalar(1.), Gamma_inv_[s]); + } // if n + else { + Gamma_inv_[s].resizeNoCopy(0); + } + } +} + +// This method is unused and left to potentially use as a testing reference. +template +void CtintWalkerSubmatrixBase::recomputeGammaInv() { + for (int s = 0; s < 2; ++s) { + if (Gamma_inv_[s].nrRows() > 0) + linalg::matrixop::inverse(Gamma_inv_[s]); + + Gamma_inv_[s].resize(Gamma_inv_[s].nrRows() + nbr_of_indices_[s]); + + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + for (int i = 0; i < Gamma_inv_[s].nrRows(); ++i) { + Gamma_inv_[s](Gamma_inv_[s].nrRows() + ind, i) = + G_[s](sector_indices_[s][ind], move_indices_[s][i]); + Gamma_inv_[s](i, Gamma_inv_[s].nrRows() + ind) = + G_[s](move_indices_[s][i], sector_indices_[s][ind]); + } + + for (int ind_2 = 0; ind_2 < nbr_of_indices_[s]; ++ind_2) { + Gamma_inv_[s](Gamma_inv_[s].nrRows() + ind, Gamma_inv_[s].nrRows() + ind_2) = + G_[s](sector_indices_[s][ind], sector_indices_[s][ind_2]); + } + Gamma_inv_[s](Gamma_inv_[s].nrRows() + ind, Gamma_inv_[s].nrRows() + ind) -= + (1 + gamma_[s].end()[-nbr_of_indices_[s] + ind]) / + gamma_[s].end()[-nbr_of_indices_[s] + ind]; + } + + Gamma_inv_[s].nrRows() = Gamma_inv_[s].nrRows(); + + if (Gamma_inv_[s].nrRows() > 0) + details::smallInverse(Gamma_inv_[s]); + } +} + +template +void CtintWalkerSubmatrixBase::transformM() { + for (int s = 0; s < 2; ++s) { + for (int j = 0; j < M_[s].size().second; ++j) { + for (int i = 0; i < M_[s].size().first; ++i) { + const auto field_type = configuration_.getSector(s).getAuxFieldType(i); + const auto b = configuration_.getSector(s).getLeftB(i); + const Scalar f_i = -(f_[field_type][b] - 1); + M_[s](i, j) /= f_i; + } + } + } +} + +template +void CtintWalkerSubmatrixBase::computeInsertionMatrices( + const std::vector& insertion_indices, const int s) { + const int delta = insertion_indices.size(); + + s_[s].resizeNoCopy(delta); + if (delta == 0) + return; + + for (int i = 0; i < delta; ++i) + for (int j = 0; j < delta; ++j) { + s_[s](i, j) = G_[s](insertion_indices[i], insertion_indices[j]); + if (i == j) { + const Real gamma_val = gamma_[s].at(gamma_[s].size() + i - nbr_of_indices_[s]); + s_[s](i, j) -= (1 + gamma_val) / gamma_val; + } + } + + assert(Gamma_inv_[s].nrRows() == move_indices_[s].size()); + if (Gamma_inv_[s].nrRows() > 0) { + q_[s].resizeNoCopy(std::make_pair(Gamma_inv_[s].nrRows(), delta)); + r_[s].resizeNoCopy(std::make_pair(delta, Gamma_inv_[s].nrRows())); + + for (int i = 0; i < Gamma_inv_[s].nrRows(); ++i) + for (int j = 0; j < delta; ++j) { + q_[s](i, j) = G_[s](move_indices_[s][i], insertion_indices[j]); + r_[s](j, i) = G_[s](insertion_indices[j], move_indices_[s][i]); + } + + auto& Gamma_q = Gamma_q_[s]; + Gamma_q.resizeNoCopy(q_[s].size()); + linalg::matrixop::gemm(Gamma_inv_[s], q_[s], Gamma_q); + linalg::matrixop::gemm(Scalar(-1.), r_[s], Gamma_q, Scalar(1.), s_[s]); + } +} + +template +void CtintWalkerSubmatrixBase::computeRemovalMatrix(const int s) { + const int delta = Gamma_indices_[s].size(); + s_[s].resizeNoCopy(delta); + for (int j = 0; j < delta; ++j) + for (int i = 0; i < delta; ++i) + s_[s](i, j) = Gamma_inv_[s](Gamma_indices_[s][i], Gamma_indices_[s][j]); +} + +template +void CtintWalkerSubmatrixBase::computeMixedInsertionAndRemoval(int s) { + Gamma_inv_cpy_[s] = Gamma_inv_[s]; + + if (sector_indices_[s].size() == 0) { + s_[s].resizeNoCopy(0); + return; + } + + // TODO: avoid reallocation. + std::vector insertion_indices; + for (int i = 0; i < nbr_of_indices_[s]; ++i) + if (!recentlyAdded(i, s)) + insertion_indices.push_back(sector_indices_[s][i]); + assert(Gamma_indices_[s].size() + insertion_indices.size() == sector_indices_.size()); + + computeInsertionMatrices(insertion_indices, s); + det_ratio_ *= details::smallDeterminant(s_[s]); + updateGammaInv(s); + + computeRemovalMatrix(s); +} + +} // namespace ctint +} // namespace solver +} // namespace phys +} // namespace dca + +#endif diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/submat_impl.cpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/submat_impl.cpp new file mode 100644 index 000000000..ec86d92d6 --- /dev/null +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/submat_impl.cpp @@ -0,0 +1,221 @@ +#include "submat_impl.hpp" + +namespace dca { +namespace phys { +namespace solver { +namespace ctint { + +template +void SubmatImpl::mainSubmatrixProcess() { + for (int s = 0; s < 2; ++s) { + move_indices_[s].clear(); + insertion_list_[s].clear(); + insertion_Gamma_indices_[s].clear(); + } + + std::vector aux_spin_type, new_aux_spin_type, move_band; + + SCALAR acceptance_prob_ = 1.0; + for (int delay_ind = 0; delay_ind < delayed_moves_.size(); ++delay_ind) { + current_move_ = &delayed_moves_[delay_ind]; + const auto move_type = current_move_->move_type; + // there seems to be no need that this be persistent. + det_ratio_ = 1.; + std::array indices_array; + + if (move_type == INSERTION) { + indices_array = current_move_->indices; + } + else { // move_type == REMOVAL + indices_array = configuration_.randomRemovalCandidate(delayed_moves_[delay_ind].removal_rng); + } + + index_.clear(); + for (auto idx : indices_array) { + if (idx >= 0) + index_.push_back(idx); + } + + // This leads to a bunch of complex branchy and premature looking optimization + // The branch predictor must be weeping + bool at_least_one_recently_added = false; + bool all_recently_added = false; + if (move_type == REMOVAL) { + // Check if the vertex to remove was inserted during the current submatrix update. + const auto recently_added = [=](int id) { return id >= config_size_init_; }; + all_recently_added = math::util::all(index_, recently_added); + at_least_one_recently_added = math::util::any(index_, recently_added); + } + + for (int s = 0; s < 2; ++s) { + Gamma_indices_[s].clear(); + new_aux_spin_type.clear(); + aux_spin_type.clear(); + move_band.clear(); + + findSectorIndices(s); + + if (move_type == INSERTION) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + new_aux_spin_type.push_back( + configuration_.getSector(s).getAuxFieldType(sector_indices_[s][ind])); + aux_spin_type.push_back(0); + } + } + else if (move_type == REMOVAL) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + new_aux_spin_type.push_back(0); + aux_spin_type.push_back( + configuration_.getSector(s).getAuxFieldType(sector_indices_[s][ind])); + + if (recentlyAdded(ind, s)) { + // Find pre-existing Gamma_indices_. + insertion_list_it_ = std::find(insertion_list_[s].begin(), insertion_list_[s].end(), + sector_indices_[s][ind]); + Gamma_indices_[s].push_back(insertion_Gamma_indices_[s][std::distance( + insertion_list_[s].begin(), insertion_list_it_)]); + } + } + } // endif removal + for (int index : sector_indices_[s]) + move_band.push_back(configuration_.getSector(s).getLeftB(index)); + + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + if (move_type == INSERTION || !recentlyAdded(ind, s)) + gamma_[s].push_back( + gamma_values_[std::make_pair(aux_spin_type[ind], new_aux_spin_type[ind])][move_band[ind]]); + else { + // TODO: find instead of adding. + gamma_[s].push_back( + gamma_values_[std::make_pair(new_aux_spin_type[ind], aux_spin_type[ind])][move_band[ind]]); + } + } + + if (!at_least_one_recently_added) + computeInsertionMatrices(sector_indices_[s], s); + else { + if (all_recently_added) + computeRemovalMatrix(s); + else + computeMixedInsertionAndRemoval(s); + } + + det_ratio_ *= details::smallDeterminant(s_[s]); + } // s loop. + + if (n_ == 0 && move_type == REMOVAL) + continue; + + // Compute acceptance probability. + auto [acceptance_prob, mc_weight_ratio] = computeAcceptanceProbability(); + acceptance_prob_ = acceptance_prob; + + // Note: acceptance and rejection can be forced for testing with the appropriate "acceptance_rng". + const bool accepted = + delayed_moves_[delay_ind].acceptance_rng < std::min(std::abs(acceptance_prob_), Real(1.)); + + // NB: recomputeGammaInv is just a inefficient alternative to updateGammaInv. Only for testing + // or debbuging. + // recomputeGammaInv(); + + // Update + if (accepted) { + ++BaseClass::n_accepted_; + + BaseClass::mc_log_weight_ += std::log(std::abs(mc_weight_ratio)); + + // Are we capturing the avg sign properly wrt multiple delayed moves + phase_.multiply(acceptance_prob_); + + // Update GammaInv if necessary. + if (!at_least_one_recently_added) + for (int s = 0; s < 2; ++s) + updateGammaInv(s); + else { + removeRowAndColOfGammaInv(); + for (int s = 0; s < 2; ++s) { + for (int ind = sector_indices_[s].size() - 1; ind >= 0; --ind) { + if (!recentlyAdded(ind, s)) + continue; + insertion_list_[s].erase(std::remove(insertion_list_[s].begin(), + insertion_list_[s].end(), sector_indices_[s][ind]), + insertion_list_[s].end()); + } + for (int ind = Gamma_indices_[s].size() - 1; ind >= 0; --ind) { + insertion_Gamma_indices_[s].erase(std::remove(insertion_Gamma_indices_[s].begin(), + insertion_Gamma_indices_[s].end(), + Gamma_indices_[s][ind]), + insertion_Gamma_indices_[s].end()); + gamma_[s].erase(gamma_[s].begin() + Gamma_indices_[s][ind]); + + for (int i = 0; i < insertion_Gamma_indices_[s].size(); ++i) { + if (insertion_Gamma_indices_[s][i] > Gamma_indices_[s][ind]) + --insertion_Gamma_indices_[s][i]; + } + } + } + } + + for (int s = 0; s < 2; ++s) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + if (move_type == INSERTION || !recentlyAdded(ind, s)) { + move_indices_[s].push_back(sector_indices_[s][ind]); + } + else { + gamma_[s].pop_back(); + move_indices_[s].erase(std::remove(move_indices_[s].begin(), move_indices_[s].end(), + sector_indices_[s][ind]), + move_indices_[s].end()); + } + } + } + + if (move_type == INSERTION) { + n_ += index_.size(); + for (auto idx : index_) + configuration_.commitInsertion(idx); + + for (int s = 0; s < 2; ++s) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) { + insertion_list_[s].push_back(sector_indices_[s][ind]); + insertion_Gamma_indices_[s].push_back(Gamma_inv_[s].nrRows() - nbr_of_indices_[s] + ind); + } + } + + // TODO: cleanup + for (int idx : index_) + conf_removal_list_.erase( + std::remove(conf_removal_list_.begin(), conf_removal_list_.end(), idx), + conf_removal_list_.end()); + } + else if (move_type == REMOVAL) { + n_ -= index_.size(); + for (auto idx : index_) + configuration_.markForRemoval(idx); + + // TODO: cleanup. + for (int idx : index_) + conf_removal_list_.push_back(idx); + } + } + else { // The move is rejected: + for (int s = 0; s < 2; ++s) { + for (int ind = 0; ind < nbr_of_indices_[s]; ++ind) + gamma_[s].pop_back(); + } + if (at_least_one_recently_added && !all_recently_added) + for (int s = 0; s < 2; ++s) { + Gamma_inv_[s].swap(Gamma_inv_cpy_[s]); + } + } + } + + // This seems correct in that we count all steps + // just as is done for the non submatrix cpu version + BaseClass::n_steps_ += delayed_moves_.size(); +} + +} +} +} +} diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/submat_impl.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/submat_impl.hpp new file mode 100644 index 000000000..1f93b81c8 --- /dev/null +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/submat_impl.hpp @@ -0,0 +1,51 @@ +#ifndef SUBMATIMPL_HPP +#define SUBMATIMPL_HPP + +#include +#include +#include "dca/linalg/linalg.hpp" + +namespace dca { +namespace phys { +namespace solver { +namespace ctint { + +template +class SubmatImpl { +public: + struct DelayedMoveType { + Move move_type; + std::array removal_rng{1., 1., 1.}; + Real acceptance_rng; + std::array indices{-1, -1}; + }; + +private: + const DelayedMoveType* current_move_; + std::array Gamma_size_; + std::array, 2> Gamma_indices_; + std::array, 2> sector_indices_; + + std::array nbr_of_indices_; + + std::vector index_; + + std::vector delayed_moves_; + std::array, 2> move_indices; + std::array, 2> removal_list; + std::array, 2> source_list; + std::array, 2> insertion_list; + std::array, 2> insertion_Gamma_indices; + std::vector conf_removal_list; + std::vector::iterator insertion_list_it; + +public: + void mainSubmatrixProcess(); +}; + +} // namespace ctint +} // namespace solver +} // namespace phys +} // namespace dca + +#endif diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp index 6601fb439..ecec506e0 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp @@ -86,7 +86,7 @@ class DMatrixBuilder { #ifdef DCA_HAVE_GPU virtual void computeG0(linalg::Matrix& /*G0*/, const details::DeviceConfiguration& /*configuration*/, int /*n_init*/, - bool /*right_section*/, GpuStream /*stream*/) const { + bool /*right_section*/, const GpuStream& /*stream*/) const { throw(std::runtime_error("Not implemented.")); } #endif // DCA_HAVE_GPU diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.hpp index 752c3283a..0e742d182 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.hpp @@ -39,6 +39,7 @@ class DMatrixBuilder final : public DMatrixBuilder; using GpuStream = dca::linalg::util::GpuStream; public: + using BaseClass::setAlphas; template DMatrixBuilder(const G0Interpolation& g0, int nb, const RDmn& /*rdmn*/); diff --git a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp index 825abdcba..554b2e652 100644 --- a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp +++ b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp @@ -366,7 +366,7 @@ void StdThreadQmciClusterSolver::startWalker(int id) { const int walker_index = thread_task_handler_.walkerIDToRngIndex(id); auto walker_log = last_iteration_ ? BaseClass::writer_ : nullptr; - Walker walker(parameters_, data_, rng_vector_[walker_index], concurrency_.get_id(), id, + Walker walker(parameters_, data_, rng_vector_[walker_index], BaseClass::getResource(), concurrency_.get_id(), id, walker_log, BaseClass::g0_); std::unique_ptr exception_ptr; @@ -588,7 +588,7 @@ void StdThreadQmciClusterSolver::startWalkerAndAccumulator(int id, // Create and warm a walker. auto walker_log = BaseClass::writer_; - Walker walker(parameters_, data_, rng_vector_[id], concurrency_.get_id(), id, walker_log, + Walker walker(parameters_, data_, rng_vector_[id], BaseClass::getResource(), concurrency_.get_id(), id, walker_log, BaseClass::g0_); initializeAndWarmUp(walker, id, id); diff --git a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_walker.hpp b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_walker.hpp index 10fe87f99..032076278 100644 --- a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_walker.hpp +++ b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_walker.hpp @@ -35,12 +35,13 @@ class StdThreadQmciWalker final using Concurrency = typename Parameters::concurrency_type; using Rng = typename Parameters::random_number_generator; using Real = typename QmciWalker::Scalar; - + using WalkerResource = QmciWalker::Resource; + constexpr static auto device = QmciWalker::device; constexpr static int bands = Parameters::bands; public: - StdThreadQmciWalker(Parameters& parameters_ref, DATA& data_ref, Rng& rng, int concurrency_id, + StdThreadQmciWalker(Parameters& parameters_ref, DATA& data_ref, Rng& rng, WalkerResource& d_matrix_builder, int concurrency_id, int id, const std::shared_ptr>& writer, G0Interpolation& g0); @@ -80,9 +81,10 @@ class StdThreadQmciWalker final template StdThreadQmciWalker::StdThreadQmciWalker( - Parameters& parameters, DATA& data_ref, Rng& rng, int concurrency_id, int id, - const std::shared_ptr>& writer, [[maybe_unused]]G0Interpolation& g0) - : QmciWalker(parameters, data_ref, rng, id), + Parameters& parameters, DATA& data_ref, Rng& rng, WalkerResource& d_matrix_builder, + int concurrency_id, int id, const std::shared_ptr>& writer, + [[maybe_unused]] G0Interpolation& g0) + : QmciWalker(parameters, data_ref, rng, d_matrix_builder, id), // QmciAutocorrelationData(parameters, id, g0), stamping_period_(parameters.stamping_period()), concurrency_id_(concurrency_id), @@ -113,10 +115,13 @@ void StdThreadQmciWalker::doSweep() { template void StdThreadQmciWalker::logConfiguration() const { - const bool print_to_log = writer_ && static_cast(*writer_); // File exists and it is open. \todo possibly this should always be true + const bool print_to_log = + writer_ && + static_cast( + *writer_); // File exists and it is open. \todo possibly this should always be true if (print_to_log && - (writer_->isADIOS2() || - (writer_->isHDF5() && writer_->get_concurrency().id() == writer_->get_concurrency().first()))) { + (writer_->isADIOS2() || (writer_->isHDF5() && writer_->get_concurrency().id() == + writer_->get_concurrency().first()))) { if (stamping_period_ && (meas_id_ % stamping_period_) == 0) { const std::string stamp_name = "r_" + std::to_string(concurrency_id_) + "_meas_" + std::to_string(meas_id_) + "_w_" + std::to_string(thread_id_); diff --git a/include/dca/phys/models/tight_binding_model.hpp b/include/dca/phys/models/tight_binding_model.hpp index bfb51d8e3..565da7005 100644 --- a/include/dca/phys/models/tight_binding_model.hpp +++ b/include/dca/phys/models/tight_binding_model.hpp @@ -28,6 +28,7 @@ namespace models { template class TightBindingModel { public: + using model_type = TightBindingModel; using lattice_type = Lattice; using b = func::dmn_0; using s = func::dmn_0; diff --git a/src/io/hdf5/hdf5_writer.cpp b/src/io/hdf5/hdf5_writer.cpp index 855924a42..bdce48c58 100644 --- a/src/io/hdf5/hdf5_writer.cpp +++ b/src/io/hdf5/hdf5_writer.cpp @@ -27,6 +27,7 @@ void HDF5Writer::open_file(std::string file_name, bool overwrite) { if (file_) throw std::logic_error(__FUNCTION__); + try { if (overwrite) { file_ = std::make_unique(file_name.c_str(), H5F_ACC_TRUNC); } @@ -36,7 +37,9 @@ void HDF5Writer::open_file(std::string file_name, bool overwrite) { else file_ = std::make_unique(file_name.c_str(), H5F_ACC_EXCL); } - + } catch (const H5::Exception& exc) { + std::cerr << exc.getDetailMsg() << '\n'; + } file_id_ = file_->getId(); } diff --git a/test/performance/phys/ctint/ctint_walker_performance_test.cpp b/test/performance/phys/ctint/ctint_walker_performance_test.cpp index 6e62bfba5..e58793e58 100644 --- a/test/performance/phys/ctint/ctint_walker_performance_test.cpp +++ b/test/performance/phys/ctint/ctint_walker_performance_test.cpp @@ -52,18 +52,25 @@ using Real = double; using RngType = dca::math::random::StdRandomWrapper; using Lattice = dca::phys::models::bilayer_lattice; +constexpr int bands = Lattice::BANDS; + using Model = dca::phys::models::TightBindingModel; using NoThreading = dca::parallel::NoThreading; using TestConcurrency = dca::parallel::NoConcurrency; using Profiler = dca::profiling::CountingProfiler>; using Scalar = double; -using Parameters = dca::phys::params::Parameters, Scalar>>; +using Parameters = + dca::phys::params::Parameters, Scalar>>; using Data = dca::phys::DcaData; -template +template using Walker = dca::phys::solver::ctint::CtintWalkerChoice; +template +using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; + using BDmn = dca::func::dmn_0; using RDmn = Parameters::RClusterDmn; using BBRDmn = dca::func::dmn_variadic; @@ -111,9 +118,6 @@ int main(int argc, char** argv) { dca::phys::solver::details::shrinkG0(data.G0_r_t)); BBRDmn bbr_dmn; - Walker::setDMatrixBuilder(g0); - Walker::setDMatrixAlpha(parameters.getAlphas(), false); - Walker::setInteractionVertices(data, parameters); auto printTime = [](const std::string& str, const auto& start, const auto& end) { dca::profiling::Duration time(end, start); @@ -138,9 +142,12 @@ int main(int argc, char** argv) { if (!test_gpu) Profiler::start(); + DMatrixBuilder d_matrix_builder(g0, bands, RDmn()); + Walker::setInteractionVertices(data, parameters); + // Do one integration step. RngType rng(0, 1, 0); - Walker walker(parameters, data, rng); + Walker walker(parameters, data, rng, d_matrix_builder); walker.initialize(0); // Timed section. @@ -164,7 +171,11 @@ int main(int argc, char** argv) { RngType::resetCounter(); RngType rng(0, 1, 0); - Walker walker_gpu(parameters, data, rng, 0); + + DMatrixBuilder d_matrix_builder_gpu(g0, bands, RDmn()); + Walker::setInteractionVertices(data, parameters); + + Walker walker_gpu(parameters, data, rng, d_matrix_builder_gpu, 0); walker_gpu.initialize(0); // Timed section. diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_multinsertion_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_multinsertion_test.cpp index 98d85a934..b8669830a 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_multinsertion_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_multinsertion_test.cpp @@ -46,23 +46,30 @@ using Walker = testing::phys::solver::ctint::WalkerWrapper; +constexpr int bands = dca::testing::LatticeHund::BANDS; + auto getVertexRng = [](int id) { const double n_vertices = 12.; return (n_vertices - id - 0.5) / n_vertices; }; +using CDA = dca::phys::ClusterDomainAliases; +using RDmn = typename CDA::RClusterDmn; + + // Compare single versus double update without a submatrix update. TEST_F(G0Setup, NoSubmatrix) { G0Setup::RngType rng(std::vector{}); G0Interpolation g0( dca::phys::solver::ctint::details::shrinkG0(data_->G0_r_t)); G0Setup::LabelDomain label_dmn; - Walker::setDMatrixBuilder(g0); - Walker::setDMatrixAlpha(parameters_.getAlphas(), false); + using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; + DMatrixBuilder d_matrix_builder(g0, bands, RDmn()); + d_matrix_builder.setAlphas(parameters_.getAlphas(), false); Walker::setInteractionVertices(*data_, parameters_); parameters_.setDoubleUpdateProbability(0); - Walker walker_single(parameters_, rng); + Walker walker_single(parameters_, rng, d_matrix_builder); walker_single.setInteractionVertices(*data_, parameters_); // interaction, tau, aux, accept @@ -101,7 +108,7 @@ TEST_F(G0Setup, NoSubmatrix) { parameters_.setDoubleUpdateProbability(1); Walker::setInteractionVertices(*data_, parameters_); - Walker walker_double(parameters_, rng); + Walker walker_double(parameters_, rng, d_matrix_builder); //////////////////////////////// // interaction, partner, tau, aux, tau, aux, accept @@ -153,12 +160,13 @@ TEST_F(G0Setup, Submatrix) { G0Interpolation g0( dca::phys::solver::ctint::details::shrinkG0(data_->G0_r_t)); G0Setup::LabelDomain label_dmn; - Walker::setDMatrixBuilder(g0); - Walker::setDMatrixAlpha(parameters_.getAlphas(), false); + using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; + DMatrixBuilder d_matrix_builder(g0, bands, RDmn()); + d_matrix_builder.setAlphas(parameters_.getAlphas(), false); parameters_.setDoubleUpdateProbability(1); Walker::setInteractionVertices(*data_, parameters_); - Walker walker(parameters_, rng); + Walker walker(parameters_, rng, d_matrix_builder); // interaction, partner, tau, aux, tau, aux, accept rng.setNewValues(std::vector{getVertexRng(6), 0, 0.41, 0, 0.53, 1, 0}); @@ -173,12 +181,12 @@ TEST_F(G0Setup, Submatrix) { const double prob1 = walker.getAcceptanceProbability(); ///////////////////////////// - - WalkerSubmatrix::setDMatrixBuilder(g0); - WalkerSubmatrix::setDMatrixAlpha(parameters_.getAlphas(), false); + using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; + DMatrixBuilder d_matrix_cpu(g0, bands, RDmn()); + d_matrix_builder.setAlphas(parameters_.getAlphas(), false); WalkerSubmatrix::setInteractionVertices(*data_, parameters_); - WalkerSubmatrix walker_subm(parameters_, rng); + WalkerSubmatrix walker_subm(parameters_, rng, d_matrix_builder); std::vector random_vals{// (insert, first_id, partner_id, tau, aux, tau, aux, accept) x 2 0, getVertexRng(6), 0, 0.41, 0, 0.53, 1, 0, 0, getVertexRng(7), diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_complex_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_complex_test.cpp index bd2c9b5e4..157ceee38 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_complex_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_complex_test.cpp @@ -39,6 +39,13 @@ constexpr char input_name[] = template using CtintWalkerSubmatrixGpuComplexTest = typename dca::testing::G0Setup; +constexpr int bands = dca::testing::LatticeRashba::BANDS; + +template +using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; + +using CDA = dca::phys::ClusterDomainAliases; +using RDmn = typename CDA::RClusterDmn; using namespace dca::phys::solver; @@ -70,15 +77,12 @@ TYPED_TEST(CtintWalkerSubmatrixGpuComplexTest, doSteps) { G0Interpolation g0_gpu(g0_func); typename TestFixture::LabelDomain label_dmn; - // TODO: improve API. - SbmWalkerCpu::setDMatrixBuilder(g0_cpu); - SbmWalkerCpu::setDMatrixAlpha(parameters.getAlphas(), false); - SbmWalkerGpu::setDMatrixBuilder(g0_gpu); - SbmWalkerGpu::setDMatrixAlpha(parameters.getAlphas(), false); - + DMatrixBuilder d_matrix_cpu(g0_cpu, bands, RDmn()); SbmWalkerCpu::setInteractionVertices(data, parameters); - SbmWalkerGpu::setInteractionVertices(data, parameters); - + d_matrix_cpu.setAlphas(parameters.getAlphas(), parameters.adjustAlphaDd()); + DMatrixBuilder d_matrix_gpu(g0_gpu, bands, RDmn()); + d_matrix_gpu.setAlphas(parameters.getAlphas(), parameters.adjustAlphaDd()); + // ************************************ // Test vertex insertion / removal **** // ************************************ @@ -105,9 +109,9 @@ TYPED_TEST(CtintWalkerSubmatrixGpuComplexTest, doSteps) { for (int steps = 1; steps <= 8; ++steps) { rng.setNewValues(setup_rngs); - SbmWalkerCpu walker_cpu(parameters, rng); + SbmWalkerCpu walker_cpu(parameters, rng, d_matrix_cpu); rng.setNewValues(setup_rngs); - SbmWalkerGpu walker_gpu(parameters, rng); + SbmWalkerGpu walker_gpu(parameters, rng, d_matrix_gpu); rng.setNewValues(rng_vals); walker_cpu.doStep(steps); diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp index 7f8478f60..1f727cd4e 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp @@ -51,11 +51,17 @@ struct CtINTWalkerSubmatrixGPUTestT : public ::testing::Test { G0Setup gpu_setup; }; +using CDA = dca::phys::ClusterDomainAliases; +using RDmn = typename CDA::RClusterDmn; + using namespace dca::phys::solver; template using CtintWalkerSubmatrixGpuTest = CtINTWalkerSubmatrixGPUTestT; +template +using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; + // Currently testing float isn't really possible due to the way the Scalar type is // carried through from mc_options. See test_setup.hpp PD using ScalarTypes = ::testing::Types; //double, @@ -91,14 +97,15 @@ TYPED_TEST(CtintWalkerSubmatrixGpuTest, doSteps) { G0Interpolation g0_gpu(g0_func_gpu); typename CtintWalkerSubmatrixGpuTest::G0Setup::LabelDomain label_dmn; - // TODO: improve API. - Walker::setDMatrixBuilder(g0_cpu); - Walker::setDMatrixAlpha(cpu_parameters.getAlphas(), false); - Walker::setInteractionVertices(cpu_data, cpu_parameters); - SbmWalkerGpu::setDMatrixBuilder(g0_gpu); - SbmWalkerGpu::setDMatrixAlpha(gpu_parameters.getAlphas(), false); - SbmWalkerGpu::setInteractionVertices(gpu_data, gpu_parameters); + constexpr int bands = dca::testing::LatticeBilayer::BANDS; + + DMatrixBuilder d_matrix_cpu(g0_cpu, bands, RDmn()); + SbmWalkerCpu::setInteractionVertices(cpu_data, cpu_parameters); + d_matrix_cpu.setAlphas(cpu_parameters.getAlphas(), false); //cpu_parameters.adjustAlphaDd()); + DMatrixBuilder d_matrix_gpu(g0_gpu, bands, RDmn()); + SbmWalkerGpu::setInteractionVertices(cpu_data, gpu_parameters); + d_matrix_gpu.setAlphas(gpu_parameters.getAlphas(), false); //gpu_parameters.adjustAlphaDd()); // ************************************ // Test vertex insertion / removal **** @@ -126,9 +133,9 @@ TYPED_TEST(CtintWalkerSubmatrixGpuTest, doSteps) { gpu_parameters.setInitialConfigurationSize(initial_size); for (int steps = 1; steps <= 8; ++steps) { cpu_rng.setNewValues(setup_rngs); - SbmWalkerCpu walker_cpu(cpu_parameters, cpu_rng); + SbmWalkerCpu walker_cpu(cpu_parameters, cpu_rng, d_matrix_cpu); gpu_rng.setNewValues(setup_rngs); - SbmWalkerGpu walker_gpu(gpu_parameters, gpu_rng); + SbmWalkerGpu walker_gpu(gpu_parameters, gpu_rng, d_matrix_gpu); cpu_rng.setNewValues(rng_vals); walker_cpu.doStep(steps); diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp index 155e5a67a..a35bf3ce7 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp @@ -36,6 +36,10 @@ template using CtintWalkerSubmatrixTest = typename dca::testing::G0Setup; + using CDA = dca::phys::ClusterDomainAliases; + using RDmn = typename CDA::RClusterDmn; + + using namespace dca::phys::solver; // Currently testing float isn't really possible due to the way the Scalar type is @@ -55,7 +59,7 @@ TYPED_TEST(CtintWalkerSubmatrixTest, doSteps) { using MatrixPair = std::array; using SubmatrixWalker = testing::phys::solver::ctint::WalkerWrapperSubmatrix; - + std::vector setup_rngs{0., 0.00, 0.9, 0.5, 0.01, 0, 0.75, 0.02, 0, 0.6, 0.03, 1, 0.99, 0.04, 0.99}; typename TestFixture::RngType rng(setup_rngs); @@ -66,9 +70,10 @@ TYPED_TEST(CtintWalkerSubmatrixTest, doSteps) { G0Interpolation g0( dca::phys::solver::ctint::details::shrinkG0(data.G0_r_t)); typename TestFixture::LabelDomain label_dmn; - Walker::setDMatrixBuilder(g0); - Walker::setDMatrixAlpha(parameters.getAlphas(), false); - Walker::setInteractionVertices(data, parameters); + using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; + DMatrixBuilder d_matrix_builder(g0, 2, RDmn()); + d_matrix_builder.setAlphas(parameters.getAlphas(), false); + SubmatrixWalker::setInteractionVertices(data, parameters); // ************************************ // Test vertex insertion / removal **** @@ -93,7 +98,7 @@ TYPED_TEST(CtintWalkerSubmatrixTest, doSteps) { for (int steps = 1; steps <= 8; ++steps) { rng.setNewValues(setup_rngs); - SubmatrixWalker walker(parameters, rng); + SubmatrixWalker walker(parameters, rng, d_matrix_builder); MatrixPair old_M(walker.getM()); rng.setNewValues(rng_vals); @@ -116,7 +121,7 @@ TYPED_TEST(CtintWalkerSubmatrixTest, doSteps) { // Compare with non submatrix walker. rng.setNewValues(setup_rngs); - Walker walker_nosub(parameters, rng); + Walker walker_nosub(parameters, rng, d_matrix_builder); rng.setNewValues(rng_vals); for (int i = 0; i < steps; ++i) diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp index e3a49c7ec..065f651bb 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp @@ -16,7 +16,7 @@ #include "walker_wrapper.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/details/solver_methods.hpp" #include "test/unit/phys/dca_step/cluster_solver/test_setup.hpp" - +#include "dca/phys/domains/cluster/cluster_domain_aliases.hpp" #include "test/mock_mcconfig.hpp" namespace dca { namespace config { @@ -70,12 +70,16 @@ TYPED_TEST(CtintWalkerTest, InsertAndRemoveVertex) { typename CtintWalkerTest::LabelDomain label_dmn; using Parameters = typename CtintWalkerTest::Parameters; + using CDA = dca::phys::ClusterDomainAliases; + using RDmn = typename CDA::RClusterDmn; + using Walker = testing::phys::solver::ctint::WalkerWrapper; - Walker::setDMatrixBuilder(g0); - Walker::setDMatrixAlpha(parameters.getAlphas(), 0); + dca::phys::solver::ctint::DMatrixBuilder d_matrix_builder(g0, 1, RDmn()); + d_matrix_builder.setAlphas(parameters.getAlphas(), parameters.adjustAlphaDd()); + Walker::setInteractionVertices(data, parameters); - Walker walker(parameters, rng); + Walker walker(parameters, rng, d_matrix_builder); // ******************************* // Test vertex removal *********** diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp index 6e0dba929..bdeb041ef 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp @@ -33,8 +33,8 @@ class WalkerWrapper : public CtintWalker { using Real = dca::util::RealAlias; using Rng = typename Base::Rng; - WalkerWrapper(Parameters& parameters_ref, Rng& rng_ref) - : Base(parameters_ref, dca::phys::DcaData(parameters_ref), rng_ref, 0) { + WalkerWrapper(Parameters& parameters_ref, Rng& rng_ref, DMatrixBuilder& d_matrix_builder) + : Base(parameters_ref, dca::phys::DcaData(parameters_ref), rng_ref, d_matrix_builder, 0) { Base::initialize(0); } diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp index da20194e3..2d5dc66a8 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp @@ -57,9 +57,10 @@ struct WalkerWrapperSubmatrix : public WalkerSelector; using Rng = typename BaseClass::Rng; using Data = typename BaseClass::Data; + using BaseClass::setMFromConfig; - WalkerWrapperSubmatrix(/*const*/ Parameters& parameters_ref, Rng& rng_ref) - : BaseClass(parameters_ref, dca::phys::DcaData(parameters_ref), rng_ref, 0), + WalkerWrapperSubmatrix(/*const*/ Parameters& parameters_ref, Rng& rng_ref, DMatrixBuilder& d_matrix_builder) + : BaseClass(parameters_ref, dca::phys::DcaData(parameters_ref), rng_ref, d_matrix_builder, 0), streams_(3) { BaseClass::initialize(0); @@ -67,7 +68,7 @@ struct WalkerWrapperSubmatrix : public WalkerSelector; @@ -85,8 +86,6 @@ struct WalkerWrapperSubmatrix : public WalkerSelector