From 0c003b21dd090f7135a2d697ae82279414ba2136 Mon Sep 17 00:00:00 2001 From: Jiakun Yan Date: Sun, 24 Sep 2023 22:03:19 -0500 Subject: [PATCH] lci pp: add option to enable/disable in-buffer assembly of header messages --- .../include/hpx/parcelport_lci/config.hpp | 2 ++ .../include/hpx/parcelport_lci/header.hpp | 31 +++++++++++++++++++ .../hpx/parcelport_lci/parcelport_lci.hpp | 3 +- .../sendrecv/sender_connection_sendrecv.hpp | 1 + libs/full/parcelport_lci/src/config.cpp | 2 ++ .../sendrecv/sender_connection_sendrecv.cpp | 28 +++++++++++++---- .../network/pingpong_performance2.cpp | 6 ++++ 7 files changed, 66 insertions(+), 7 deletions(-) diff --git a/libs/full/parcelport_lci/include/hpx/parcelport_lci/config.hpp b/libs/full/parcelport_lci/include/hpx/parcelport_lci/config.hpp index 027e908b4d3d..9e475374704f 100644 --- a/libs/full/parcelport_lci/include/hpx/parcelport_lci/config.hpp +++ b/libs/full/parcelport_lci/include/hpx/parcelport_lci/config.hpp @@ -51,6 +51,8 @@ namespace hpx::parcelset::policies::lci { static int ndevices; // How many completion managers to use static int ncomps; + // Whether to enable in-buffer assembly for the header messages. + static bool enable_in_buffer_assembly; static void init_config(util::runtime_configuration const& rtcfg); }; diff --git a/libs/full/parcelport_lci/include/hpx/parcelport_lci/header.hpp b/libs/full/parcelport_lci/include/hpx/parcelport_lci/header.hpp index d9ad22b1f534..21685508874e 100644 --- a/libs/full/parcelport_lci/include/hpx/parcelport_lci/header.hpp +++ b/libs/full/parcelport_lci/include/hpx/parcelport_lci/header.hpp @@ -49,6 +49,37 @@ namespace hpx::parcelset::policies::lci { pos_piggy_back_address = 8 * sizeof(value_type) + 2 }; + template + static size_t get_header_size( + parcel_buffer const& buffer, + size_t max_header_size) noexcept + { + HPX_ASSERT(max_header_size >= pos_piggy_back_address); + + size_t current_header_size = pos_piggy_back_address; + if (buffer.data_.size() <= (max_header_size - current_header_size)) + { + current_header_size += buffer.data_.size(); + } + int num_zero_copy_chunks = buffer.num_chunks_.first; + [[maybe_unused]] int num_non_zero_copy_chunks = + buffer.num_chunks_.second; + if (num_zero_copy_chunks != 0) + { + HPX_ASSERT(buffer.transmission_chunks_.size() == + size_t(num_zero_copy_chunks + num_non_zero_copy_chunks)); + int tchunk_size = + static_cast(buffer.transmission_chunks_.size() * + sizeof(typename parcel_buffer::transmission_chunk_type)); + if (tchunk_size <= int(max_header_size - current_header_size)) + { + current_header_size += tchunk_size; + } + } + return current_header_size; + } + template header(parcel_buffer const& buffer, char* header_buffer, size_t max_header_size) noexcept diff --git a/libs/full/parcelport_lci/include/hpx/parcelport_lci/parcelport_lci.hpp b/libs/full/parcelport_lci/include/hpx/parcelport_lci/parcelport_lci.hpp index 737af1b966e9..0fa747b67c79 100644 --- a/libs/full/parcelport_lci/include/hpx/parcelport_lci/parcelport_lci.hpp +++ b/libs/full/parcelport_lci/include/hpx/parcelport_lci/parcelport_lci.hpp @@ -266,7 +266,8 @@ namespace hpx::traits { "prepost_recv_num = 1\n" "reg_mem = 1\n" "ndevices = 1\n" - "ncomps = 1\n"; + "ncomps = 1\n" + "enable_in_buffer_assembly = 1\n"; } }; } // namespace hpx::traits diff --git a/libs/full/parcelport_lci/include/hpx/parcelport_lci/sendrecv/sender_connection_sendrecv.hpp b/libs/full/parcelport_lci/include/hpx/parcelport_lci/sendrecv/sender_connection_sendrecv.hpp index 4fa4c4d4ca8e..4a0ebd3c4925 100644 --- a/libs/full/parcelport_lci/include/hpx/parcelport_lci/sendrecv/sender_connection_sendrecv.hpp +++ b/libs/full/parcelport_lci/include/hpx/parcelport_lci/sendrecv/sender_connection_sendrecv.hpp @@ -58,6 +58,7 @@ namespace hpx::parcelset::policies::lci { hpx::chrono::high_resolution_timer timer_; header header_; LCI_mbuffer_t header_buffer; + std::vector header_buffer_vector; bool need_send_data; bool need_send_tchunks; LCI_tag_t tag; diff --git a/libs/full/parcelport_lci/src/config.cpp b/libs/full/parcelport_lci/src/config.cpp index 28d0147cb07a..53b424e9955b 100644 --- a/libs/full/parcelport_lci/src/config.cpp +++ b/libs/full/parcelport_lci/src/config.cpp @@ -28,6 +28,7 @@ namespace hpx::parcelset::policies::lci { bool config_t::reg_mem; int config_t::ndevices; int config_t::ncomps; + bool config_t::enable_in_buffer_assembly; void config_t::init_config(util::runtime_configuration const& rtcfg) { @@ -105,6 +106,7 @@ namespace hpx::parcelset::policies::lci { reg_mem = util::get_entry_as(rtcfg, "hpx.parcel.lci.reg_mem", 1); ndevices = util::get_entry_as(rtcfg, "hpx.parcel.lci.ndevices", 1); ncomps = util::get_entry_as(rtcfg, "hpx.parcel.lci.ncomps", 1); + enable_in_buffer_assembly = util::get_entry_as(rtcfg, "hpx.parcel.lci.enable_in_buffer_assembly", 1); if (!enable_send_immediate && enable_lci_backlog_queue) { diff --git a/libs/full/parcelport_lci/src/sendrecv/sender_connection_sendrecv.cpp b/libs/full/parcelport_lci/src/sendrecv/sender_connection_sendrecv.cpp index fcccfdc84b1c..7c143582a501 100644 --- a/libs/full/parcelport_lci/src/sendrecv/sender_connection_sendrecv.cpp +++ b/libs/full/parcelport_lci/src/sendrecv/sender_connection_sendrecv.cpp @@ -44,12 +44,21 @@ namespace hpx::parcelset::policies::lci { postprocess_handler_ = HPX_MOVE(parcel_postprocess); // build header - while (LCI_mbuffer_alloc(device_p->device, &header_buffer) != LCI_OK) - continue; - HPX_ASSERT(header_buffer.length == (size_t) LCI_MEDIUM_SIZE); - header_ = header( - buffer_, (char*) header_buffer.address, header_buffer.length); - header_buffer.length = header_.size(); + if (config_t::enable_in_buffer_assembly) + { + while ( + LCI_mbuffer_alloc(device_p->device, &header_buffer) != LCI_OK) + continue; + HPX_ASSERT(header_buffer.length == (size_t) LCI_MEDIUM_SIZE); + header_ = header( + buffer_, (char*) header_buffer.address, header_buffer.length); + header_buffer.length = header_.size(); + } else { + header_buffer_vector.resize(header::get_header_size( + buffer_, LCI_MEDIUM_SIZE)); + header_ = header(buffer_, static_cast(header_buffer_vector.data()), + header_buffer_vector.size()); + } HPX_ASSERT((header_.num_zero_copy_chunks() == 0) == buffer_.transmission_chunks_.empty()); need_send_data = false; @@ -85,6 +94,13 @@ namespace hpx::parcelset::policies::lci { "Rank %d Wrap around!\n", LCI_RANK); header_.set_device_idx(device_p->idx); header_.set_tag(tag); + if (!config_t::enable_in_buffer_assembly) { + while ( + LCI_mbuffer_alloc(device_p->device, &header_buffer) != LCI_OK) + continue; + memcpy(header_buffer.address, header_buffer_vector.data(), header_buffer_vector.size()); + header_buffer.length = header_buffer_vector.size(); + } send_chunks_idx = 0; completion = nullptr; segment_to_use = LCI_SEGMENT_ALL; diff --git a/tests/performance/network/pingpong_performance2.cpp b/tests/performance/network/pingpong_performance2.cpp index 57f0772a3df6..2f31c690e486 100644 --- a/tests/performance/network/pingpong_performance2.cpp +++ b/tests/performance/network/pingpong_performance2.cpp @@ -111,6 +111,12 @@ int hpx_main(hpx::program_options::variables_map& b_arg) return 0; } + if (window == 0) + { + std::cout << "window is 0!" << std::endl; + return 0; + } + std::vector localities = hpx::find_remote_localities(); hpx::id_type to;