From 311cf7bef08783f3641dd074e493786de456020a Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Fri, 29 Nov 2024 20:31:19 +0100 Subject: [PATCH 1/7] [onert] Share tensors memory for designated operands This commit improves the tensors memory management to handle sharing memory buffers. It means that more that one tensor can indicate the same buffer. It is determined by operands index map calculated in the previous step. Note that cases like sharing memory from constant tensors requires additional checks. ONE-DCO-1.0-Signed-off-by: Mateusz Bencer m.bencer@partner.samsung.com --- .../backend/basic/BackendContextHelpers.h | 52 +++++++--- .../backend/basic/StaticTensorManager.h | 1 + .../src/backend/basic/StaticTensorManager.cc | 99 ++++++++++++++++--- 3 files changed, 125 insertions(+), 27 deletions(-) diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index 46e57e925e6..ce905ce7a46 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -177,16 +177,31 @@ void planTensors(const std::shared_ptr &tensor_builder, const i } template -ITensorRegistry * -genTensors(const std::shared_ptr &tensor_builder, const ir::Graph &graph, - const util::Set &external_operands, - const std::shared_ptr &tensor_registry, - const std::vector &op_order, - const ir::OperandIndexMap & /*shared_memory_operand_idx*/) +ITensorRegistry *genTensors(const std::shared_ptr &tensor_builder, + const ir::Graph &graph, + const util::Set &external_operands, + const std::shared_ptr &tensor_registry, + const std::vector &op_order, + const ir::OperandIndexMap &shared_memory_operand_idx) { + // process source tensors for shared memory at first + std::vector registered_source_ind; + for (const auto &[_, source_ind] : shared_memory_operand_idx) + { + if (external_operands.contains(source_ind)) + continue; + if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source + continue; + tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info()); + registered_source_ind.emplace_back(source_ind); + } + graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { if (external_operands.contains(ind)) return; + if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) != + std::end(registered_source_ind)) // skip tensors already registered + return; tensor_builder->registerTensorInfo(ind, obj.info()); }); @@ -219,10 +234,14 @@ template ITensorRegistry *genTensors(T_BackendContex inline void initConsts(const ir::Operands &operands, const util::Set &external_operands, ITensorRegistry *tensor_registry, - const ir::OperandIndexMap & /*shared_memory_operands_map*/) + const ir::OperandIndexMap &shared_memory_operands_map) { operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { - if (external_operands.contains(ind) || !operand.isConstant()) + const bool has_const_shared_memory = + shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) && + operands.at(shared_memory_operands_map.at(ind)).isConstant(); + const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory; + if (external_operands.contains(ind) || !can_be_initialized_as_const) return; auto tensor = tensor_registry->getNativeITensor(ind); @@ -230,14 +249,23 @@ inline void initConsts(const ir::Operands &operands, VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl; - auto data = operand.shareData(); - assert(data && data->base()); ExternalTensor *ext_tensor = dynamic_cast(tensor); - if (ext_tensor == nullptr) throw std::runtime_error{"This tensor is not external tensor"}; - ext_tensor->setData(data); + if (has_const_shared_memory) + { + const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind)); + auto memory_source_data = source_operand_ind.shareData(); + assert(memory_source_data && memory_source_data->base()); + ext_tensor->setData(memory_source_data); + } + else + { + auto data = operand.shareData(); + assert(data && data->base()); + ext_tensor->setData(data); + } }); } diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h index a92af7bd45d..f9157cb2a42 100644 --- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h @@ -60,6 +60,7 @@ class StaticTensorManager ir::OperandIndexMap _as_constants; DynamicTensorManager *_dynamic_tensor_manager; ir::OperandIndexMap _shared_memory_operand_indexes; + ir::OperandIndexMap _source_operand_inds_ref_counter; }; } // namespace basic diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index 2e5fadd8d37..f6f69d6af06 100644 --- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include "backend/basic/StaticTensorManager.h" #include "backend/basic/DynamicTensorManager.h" @@ -54,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void) for (auto &&[ind, tensor] : _tensors->native_tensors()) { - if (!_as_constants[ind] && !tensor->is_dynamic()) + bool buffer_set = false; + if (!tensor->is_dynamic()) { - auto *buffer = _nonconst_mgr->getBuffer(ind); - tensor->setBuffer(buffer); - - VERBOSE(CPU_StaticTensorManager) - << "TENSOR " << ind << " : " << static_cast(buffer) << std::endl; + if (_shared_memory_operand_indexes.find(ind) != std::end(_shared_memory_operand_indexes)) + { + const auto &shared_memory_ind = _shared_memory_operand_indexes[ind]; + if (!_as_constants[shared_memory_ind]) + { + tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind)); + buffer_set = true; + } + } + else if (!_as_constants[ind]) + { + tensor->setBuffer(_nonconst_mgr->getBuffer(ind)); + buffer_set = true; + } + if (buffer_set) + { + VERBOSE(CPU_StaticTensorManager) + << "TENSOR " << ind << " : " << static_cast(tensor->buffer()) << std::endl; + } } } } @@ -71,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, bool as_const) { assert(!_tensors->getNativeTensor(ind)); + std::unique_ptr tensor = nullptr; if (as_const) { - auto tensor = std::make_unique(tensor_info); - _tensors->setNativeTensor(ind, std::move(tensor)); + tensor = std::make_unique(tensor_info); } else { - auto tensor = - std::make_unique(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get()); - _tensors->setNativeTensor(ind, std::move(tensor)); + const auto source_operand_ind = _shared_memory_operand_indexes.find(ind); + if (source_operand_ind != std::end(_shared_memory_operand_indexes) && + _as_constants[source_operand_ind->second]) + { + as_const = _as_constants[source_operand_ind->second]; + auto new_tensor_info = tensor_info; + new_tensor_info.setAsConstant(); + tensor = std::make_unique(new_tensor_info); + } + else + { + tensor = + std::make_unique(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get()); + } } + assert(tensor); + _tensors->setNativeTensor(ind, std::move(tensor)); _as_constants[ind] = as_const; } @@ -92,8 +122,26 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - if (!_as_constants[ind]) - _nonconst_mgr->claimPlan(ind, size); + ir::OperandIndex claim_ind; + const auto source_ind = _shared_memory_operand_indexes.find(ind); + if (source_ind == std::end(_shared_memory_operand_indexes)) + { + claim_ind = ind; + } + else + { + claim_ind = source_ind->second; + } + if (_as_constants[claim_ind]) + { + return; + } + ++_source_operand_inds_ref_counter[claim_ind]; + // notify only first usage + if (1 == _source_operand_inds_ref_counter[claim_ind]) + { + _nonconst_mgr->claimPlan(claim_ind, size); + } } void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) @@ -103,8 +151,29 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - if (!_as_constants[ind]) - _nonconst_mgr->releasePlan(ind); + ir::OperandIndex release_ind; + const auto source_operand_ind_ind = _shared_memory_operand_indexes.find(ind); + if (source_operand_ind_ind == std::end(_shared_memory_operand_indexes)) + { + release_ind = ind; + } + else + { + release_ind = source_operand_ind_ind->second; + } + if (_as_constants[release_ind]) + { + return; + } + if (_source_operand_inds_ref_counter[release_ind] > 0) + { + --_source_operand_inds_ref_counter[release_ind]; + } + // notify only last usage + if (0 == _source_operand_inds_ref_counter[release_ind]) + { + _nonconst_mgr->releasePlan(release_ind); + } } void StaticTensorManager::iterate(const std::function &fn) From e2005d88c492a574a26bf105d08ea46d7d5a6625 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Mon, 2 Dec 2024 16:10:21 +0100 Subject: [PATCH 2/7] implementation refactor --- runtime/onert/backend/cpu/Backend.h | 4 +- .../backend/basic/BackendContextHelpers.h | 31 ++++++--- .../backend/basic/StaticTensorManager.h | 5 ++ .../src/backend/basic/StaticTensorManager.cc | 69 +++++++------------ 4 files changed, 51 insertions(+), 58 deletions(-) diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index 1ccff6fba94..a4199f98d37 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -20,6 +20,7 @@ #include "BackendContext.h" #include "Config.h" #include "KernelGenerator.h" +#include "SharedMemoryOperands.h" #include @@ -45,8 +46,7 @@ class Backend : public ::onert::backend::Backend auto &graph = *data.graph; auto context = std::make_unique(this, std::move(data)); auto tr = std::make_shared(); - // TODO: Use findSharedMemoryOperandIndexes method here - auto tb = std::make_shared(tr, ir::OperandIndexMap{}); + auto tb = std::make_shared(tr, findSharedMemoryOperandIndexes(graph)); context->tensor_registry = tr; context->tensor_builder = tb; context->kernel_gen = std::make_shared(graph, tb, tr, custom_kernel_builder, diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index ce905ce7a46..af069c91f20 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -177,15 +177,13 @@ void planTensors(const std::shared_ptr &tensor_builder, const i } template -ITensorRegistry *genTensors(const std::shared_ptr &tensor_builder, - const ir::Graph &graph, - const util::Set &external_operands, - const std::shared_ptr &tensor_registry, - const std::vector &op_order, - const ir::OperandIndexMap &shared_memory_operand_idx) +ir::OperandIndexSequence register_source_memory_tensors( + const std::shared_ptr &tensor_builder, const ir::Graph &graph, + const util::Set &external_operands, + const ir::OperandIndexMap &shared_memory_operand_idx) { - // process source tensors for shared memory at first - std::vector registered_source_ind; + // process source tensors that share memory at first + ir::OperandIndexSequence registered_source_ind; for (const auto &[_, source_ind] : shared_memory_operand_idx) { if (external_operands.contains(source_ind)) @@ -193,14 +191,25 @@ ITensorRegistry *genTensors(const std::shared_ptr &tensor_build if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source continue; tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info()); - registered_source_ind.emplace_back(source_ind); + registered_source_ind.append(source_ind); } + return registered_source_ind; +} +template +ITensorRegistry *genTensors(const std::shared_ptr &tensor_builder, + const ir::Graph &graph, + const util::Set &external_operands, + const std::shared_ptr &tensor_registry, + const std::vector &op_order, + const ir::OperandIndexMap &shared_memory_operand_idx) +{ + const auto registered_source_ind = register_source_memory_tensors( + tensor_builder, graph, external_operands, shared_memory_operand_idx); graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { if (external_operands.contains(ind)) return; - if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) != - std::end(registered_source_ind)) // skip tensors already registered + if (registered_source_ind.contains(ind)) // skip tensors already registered return; tensor_builder->registerTensorInfo(ind, obj.info()); }); diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h index f9157cb2a42..e08d17b25bb 100644 --- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h @@ -54,6 +54,11 @@ class StaticTensorManager void iterate(const std::function &fn); +private: + // Update source operand index if source memory operand exist. + // Otherwise, return unchanged. + ir::OperandIndex adjust_with_memory_source_operand(const ir::OperandIndex &ind); + private: std::unique_ptr _nonconst_mgr; const std::shared_ptr _tensors; diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index f6f69d6af06..1564865e77f 100644 --- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -56,28 +56,14 @@ void StaticTensorManager::allocateNonconsts(void) for (auto &&[ind, tensor] : _tensors->native_tensors()) { - bool buffer_set = false; - if (!tensor->is_dynamic()) + const auto adjusted_ind = adjust_with_memory_source_operand(ind); + if (!_as_constants[adjusted_ind] && !tensor->is_dynamic()) { - if (_shared_memory_operand_indexes.find(ind) != std::end(_shared_memory_operand_indexes)) - { - const auto &shared_memory_ind = _shared_memory_operand_indexes[ind]; - if (!_as_constants[shared_memory_ind]) - { - tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind)); - buffer_set = true; - } - } - else if (!_as_constants[ind]) - { - tensor->setBuffer(_nonconst_mgr->getBuffer(ind)); - buffer_set = true; - } - if (buffer_set) - { - VERBOSE(CPU_StaticTensorManager) - << "TENSOR " << ind << " : " << static_cast(tensor->buffer()) << std::endl; - } + auto *buffer = _nonconst_mgr->getBuffer(adjusted_ind); + tensor->setBuffer(buffer); + + VERBOSE(CPU_StaticTensorManager) + << "TENSOR " << ind << " : " << static_cast(buffer) << std::endl; } } } @@ -95,14 +81,14 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, } else { - const auto source_operand_ind = _shared_memory_operand_indexes.find(ind); - if (source_operand_ind != std::end(_shared_memory_operand_indexes) && - _as_constants[source_operand_ind->second]) + const auto source_operand_ind = adjust_with_memory_source_operand(ind); + if (_as_constants[source_operand_ind]) { - as_const = _as_constants[source_operand_ind->second]; auto new_tensor_info = tensor_info; new_tensor_info.setAsConstant(); + // source memory tensor is a constant tensor = std::make_unique(new_tensor_info); + as_const = true; } else { @@ -122,16 +108,7 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - ir::OperandIndex claim_ind; - const auto source_ind = _shared_memory_operand_indexes.find(ind); - if (source_ind == std::end(_shared_memory_operand_indexes)) - { - claim_ind = ind; - } - else - { - claim_ind = source_ind->second; - } + const auto claim_ind = adjust_with_memory_source_operand(ind); if (_as_constants[claim_ind]) { return; @@ -151,16 +128,7 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - ir::OperandIndex release_ind; - const auto source_operand_ind_ind = _shared_memory_operand_indexes.find(ind); - if (source_operand_ind_ind == std::end(_shared_memory_operand_indexes)) - { - release_ind = ind; - } - else - { - release_ind = source_operand_ind_ind->second; - } + const auto release_ind = adjust_with_memory_source_operand(ind); if (_as_constants[release_ind]) { return; @@ -182,6 +150,17 @@ void StaticTensorManager::iterate(const std::functionsecond; + } + // source memory operand not found + return ind; +} + } // namespace basic } // namespace backend } // namespace onert From 4171e2ed380607a76f230406bc3154cd5b3669f5 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Thu, 5 Dec 2024 14:50:15 +0100 Subject: [PATCH 3/7] fixed handling of const tensor --- runtime/onert/backend/cpu/BackendContext.cc | 6 ++--- .../backend/basic/BackendContextHelpers.h | 16 +++++++++---- .../src/backend/basic/StaticTensorManager.cc | 23 ++++--------------- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc index 3a39df917c4..95635152a9a 100644 --- a/runtime/onert/backend/cpu/BackendContext.cc +++ b/runtime/onert/backend/cpu/BackendContext.cc @@ -41,15 +41,15 @@ FunctionMap BackendContext::genKernels() { FunctionMap ret; + basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(), + tensor_builder->getSharedMemoryOperandIndexes()); + for (auto &&op_ind : _data.op_order) { auto fn_seq = kernel_gen->generate(op_ind); ret.emplace(op_ind, std::move(fn_seq)); } - basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(), - tensor_builder->getSharedMemoryOperandIndexes()); - // NOTE For memory optimization, we want to free some operand data const_cast(*_data.graph) .operands() diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index af069c91f20..aae4404620b 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -258,21 +258,27 @@ inline void initConsts(const ir::Operands &operands, VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl; - ExternalTensor *ext_tensor = dynamic_cast(tensor); - if (ext_tensor == nullptr) - throw std::runtime_error{"This tensor is not external tensor"}; - if (has_const_shared_memory) { const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind)); auto memory_source_data = source_operand_ind.shareData(); assert(memory_source_data && memory_source_data->base()); - ext_tensor->setData(memory_source_data); + auto shared_mem_tensor = dynamic_cast(tensor); + if (nullptr == shared_mem_tensor) + { + throw std::runtime_error{"Incorrect type of tensor to support sharing memory"}; + } + shared_mem_tensor->setBuffer(const_cast(memory_source_data->base())); } else { auto data = operand.shareData(); assert(data && data->base()); + auto ext_tensor = dynamic_cast(tensor); + if (ext_tensor == nullptr) + { + throw std::runtime_error{"This tensor is not external tensor"}; + } ext_tensor->setData(data); } }); diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index 1564865e77f..1b5f807e16f 100644 --- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -74,30 +74,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, bool as_const) { assert(!_tensors->getNativeTensor(ind)); - std::unique_ptr tensor = nullptr; if (as_const) { - tensor = std::make_unique(tensor_info); + auto tensor = std::make_unique(tensor_info); + _tensors->setNativeTensor(ind, std::move(tensor)); } else { - const auto source_operand_ind = adjust_with_memory_source_operand(ind); - if (_as_constants[source_operand_ind]) - { - auto new_tensor_info = tensor_info; - new_tensor_info.setAsConstant(); - // source memory tensor is a constant - tensor = std::make_unique(new_tensor_info); - as_const = true; - } - else - { - tensor = - std::make_unique(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get()); - } + auto tensor = + std::make_unique(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get()); + _tensors->setNativeTensor(ind, std::move(tensor)); } - assert(tensor); - _tensors->setNativeTensor(ind, std::move(tensor)); _as_constants[ind] = as_const; } From f6e7bcd1bab5aa803a1d57a918031cab2a62e756 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Wed, 11 Dec 2024 11:14:22 +0100 Subject: [PATCH 4/7] review remarks --- .../backend/basic/BackendContextHelpers.h | 27 +++++---- .../backend/basic/StaticTensorManager.h | 5 +- .../src/backend/basic/StaticTensorManager.cc | 55 +++++++++++++------ 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index aae4404620b..5030600ca8c 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -249,8 +249,11 @@ inline void initConsts(const ir::Operands &operands, const bool has_const_shared_memory = shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) && operands.at(shared_memory_operands_map.at(ind)).isConstant(); + if (external_operands.contains(ind)) + return; const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory; - if (external_operands.contains(ind) || !can_be_initialized_as_const) + if (!can_be_initialized_as_const) + // tensor currently processed not a const and source memory tensor (if exists) also not a const return; auto tensor = tensor_registry->getNativeITensor(ind); @@ -264,23 +267,19 @@ inline void initConsts(const ir::Operands &operands, auto memory_source_data = source_operand_ind.shareData(); assert(memory_source_data && memory_source_data->base()); auto shared_mem_tensor = dynamic_cast(tensor); - if (nullptr == shared_mem_tensor) - { - throw std::runtime_error{"Incorrect type of tensor to support sharing memory"}; - } + assert(shared_mem_tensor != nullptr); shared_mem_tensor->setBuffer(const_cast(memory_source_data->base())); + return; } - else + // the default flow for constant initialization + auto data = operand.shareData(); + assert(data && data->base()); + auto ext_tensor = dynamic_cast(tensor); + if (ext_tensor == nullptr) { - auto data = operand.shareData(); - assert(data && data->base()); - auto ext_tensor = dynamic_cast(tensor); - if (ext_tensor == nullptr) - { - throw std::runtime_error{"This tensor is not external tensor"}; - } - ext_tensor->setData(data); + throw std::runtime_error{"This tensor is not external tensor"}; } + ext_tensor->setData(data); }); } diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h index e08d17b25bb..e377756f08f 100644 --- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h @@ -57,7 +57,10 @@ class StaticTensorManager private: // Update source operand index if source memory operand exist. // Otherwise, return unchanged. - ir::OperandIndex adjust_with_memory_source_operand(const ir::OperandIndex &ind); + ir::OperandIndex adjustWithMemorySourceOperand(const ir::OperandIndex &ind) const; + // Return true if given ind is shared index or source index of shared memory operands map. + // Otherwise, return false. + bool isSharedMemoryOperand(const ir::OperandIndex &ind) const; private: std::unique_ptr _nonconst_mgr; diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index 1b5f807e16f..3f5d063e980 100644 --- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -56,7 +56,7 @@ void StaticTensorManager::allocateNonconsts(void) for (auto &&[ind, tensor] : _tensors->native_tensors()) { - const auto adjusted_ind = adjust_with_memory_source_operand(ind); + const auto adjusted_ind = adjustWithMemorySourceOperand(ind); if (!_as_constants[adjusted_ind] && !tensor->is_dynamic()) { auto *buffer = _nonconst_mgr->getBuffer(adjusted_ind); @@ -95,17 +95,20 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - const auto claim_ind = adjust_with_memory_source_operand(ind); + const auto claim_ind = adjustWithMemorySourceOperand(ind); if (_as_constants[claim_ind]) { return; } - ++_source_operand_inds_ref_counter[claim_ind]; - // notify only first usage - if (1 == _source_operand_inds_ref_counter[claim_ind]) + if (isSharedMemoryOperand(claim_ind)) { - _nonconst_mgr->claimPlan(claim_ind, size); + ++_source_operand_inds_ref_counter[claim_ind]; + if (_source_operand_inds_ref_counter[claim_ind] > 1) + { + return; // claimPlan should be called only for the first usage + } } + _nonconst_mgr->claimPlan(claim_ind, size); } void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) @@ -115,20 +118,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - const auto release_ind = adjust_with_memory_source_operand(ind); + const auto release_ind = adjustWithMemorySourceOperand(ind); if (_as_constants[release_ind]) { return; } - if (_source_operand_inds_ref_counter[release_ind] > 0) - { - --_source_operand_inds_ref_counter[release_ind]; - } - // notify only last usage - if (0 == _source_operand_inds_ref_counter[release_ind]) + if (isSharedMemoryOperand(release_ind)) { - _nonconst_mgr->releasePlan(release_ind); + if (_source_operand_inds_ref_counter[release_ind] > 0) // sanity check + { + --_source_operand_inds_ref_counter[release_ind]; + } + if (_source_operand_inds_ref_counter[release_ind] > 0) + { + return; // releasePlan should be called only for the first usage + } } + _nonconst_mgr->releasePlan(release_ind); } void StaticTensorManager::iterate(const std::function &fn) @@ -137,17 +143,30 @@ void StaticTensorManager::iterate(const std::functionsecond; + return shared_operand_ind->second; } // source memory operand not found return ind; } +bool StaticTensorManager::isSharedMemoryOperand(const ir::OperandIndex &ind) const +{ + for (const auto &[shared_ind, source_ind] : _shared_memory_operand_indexes) + { + if (shared_ind == ind || source_ind == ind) + { + return true; + } + } + return false; +} + } // namespace basic } // namespace backend } // namespace onert From 1cc0ad9e37a5b3ecbea9ad32b6627d2501ef6996 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Wed, 11 Dec 2024 11:16:51 +0100 Subject: [PATCH 5/7] adjust with styles --- .../onert/core/include/backend/basic/BackendContextHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index 5030600ca8c..c95dd9b7277 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -253,7 +253,7 @@ inline void initConsts(const ir::Operands &operands, return; const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory; if (!can_be_initialized_as_const) - // tensor currently processed not a const and source memory tensor (if exists) also not a const + // tensor currently processed not a const and source memory tensor (if exists) not a const too return; auto tensor = tensor_registry->getNativeITensor(ind); From 577a67fef146d770ced09ae93d271ab1ed344c89 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Wed, 11 Dec 2024 15:54:45 +0100 Subject: [PATCH 6/7] init consts refactor --- runtime/onert/backend/cpu/BackendContext.cc | 7 +++ .../backend/basic/BackendContextHelpers.h | 56 ++++++++++++------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc index 95635152a9a..cf199a7fe32 100644 --- a/runtime/onert/backend/cpu/BackendContext.cc +++ b/runtime/onert/backend/cpu/BackendContext.cc @@ -23,6 +23,7 @@ #include "ir/OperandIndexMap.h" #include "ir/OperandIndexSequence.h" #include "backend/basic/BackendContextHelpers.h" +#include "backend/basic/TensorRegistry.h" namespace onert { @@ -44,6 +45,12 @@ FunctionMap BackendContext::genKernels() basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(), tensor_builder->getSharedMemoryOperandIndexes()); + // TODO: Change type of tensor_registry field to TensorRegistry + auto tensor_registry_concreted = dynamic_cast(tensor_registry.get()); + assert(tensor_registry_concreted); + basic::initSharedMemoryConsts(graph()->operands(), external_operands(), tensor_registry_concreted, + tensor_builder->getSharedMemoryOperandIndexes()); + for (auto &&op_ind : _data.op_order) { auto fn_seq = kernel_gen->generate(op_ind); diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index c95dd9b7277..4fec186fd8c 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -24,6 +24,7 @@ #include "util/logging.h" #include "backend/ITensorRegistry.h" #include "backend/BackendContext.h" +#include "backend/basic/TensorRegistry.h" #include "Tensor.h" namespace onert @@ -240,45 +241,58 @@ template ITensorRegistry *genTensors(T_BackendContex ctx.data().op_order, {}); } +inline void +initSharedMemoryConsts(const ir::Operands &operands, + const util::Set &external_operands, + TensorRegistry *tensor_registry, + const ir::OperandIndexMap &shared_memory_operands_map) +{ + operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { + if (external_operands.contains(ind)) + return; + const auto shared_mem_it = shared_memory_operands_map.find(ind); + if (shared_mem_it == std::end(shared_memory_operands_map)) + return; // no shared memory source + if (!operands.at(shared_mem_it->second).isConstant()) + return; // source operand not a constant + + VERBOSE(FillOperandData) << "Fill shared data for " << ind << std::endl; + + const auto &source_operand_ind = operands.at(shared_mem_it->second); + auto memory_source_data = source_operand_ind.shareData(); + assert(memory_source_data && memory_source_data->base()); + auto tensor = tensor_registry->getNativeTensor(ind); + assert(tensor != nullptr); + tensor->setBuffer(const_cast(memory_source_data->base())); + }); +} + inline void initConsts(const ir::Operands &operands, const util::Set &external_operands, ITensorRegistry *tensor_registry, const ir::OperandIndexMap &shared_memory_operands_map) { operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { - const bool has_const_shared_memory = + if (external_operands.contains(ind) || !operand.isConstant()) + return; + const bool has_const_shared_source = shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) && operands.at(shared_memory_operands_map.at(ind)).isConstant(); - if (external_operands.contains(ind)) - return; - const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory; - if (!can_be_initialized_as_const) - // tensor currently processed not a const and source memory tensor (if exists) not a const too - return; + if (has_const_shared_source) + return; // tensors with shared memory are processed in initSharedMemoryConsts auto tensor = tensor_registry->getNativeITensor(ind); assert(tensor != nullptr); VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl; - if (has_const_shared_memory) - { - const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind)); - auto memory_source_data = source_operand_ind.shareData(); - assert(memory_source_data && memory_source_data->base()); - auto shared_mem_tensor = dynamic_cast(tensor); - assert(shared_mem_tensor != nullptr); - shared_mem_tensor->setBuffer(const_cast(memory_source_data->base())); - return; - } - // the default flow for constant initialization auto data = operand.shareData(); assert(data && data->base()); - auto ext_tensor = dynamic_cast(tensor); + ExternalTensor *ext_tensor = dynamic_cast(tensor); + if (ext_tensor == nullptr) - { throw std::runtime_error{"This tensor is not external tensor"}; - } + ext_tensor->setData(data); }); } From 155497bdbbaf6674b2e2c8fc3c89e1b2cff21db0 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Thu, 12 Dec 2024 09:17:22 +0100 Subject: [PATCH 7/7] review remarks --- runtime/onert/backend/cpu/BackendContext.cc | 6 ++++-- .../core/include/backend/basic/BackendContextHelpers.h | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc index cf199a7fe32..16904d86a86 100644 --- a/runtime/onert/backend/cpu/BackendContext.cc +++ b/runtime/onert/backend/cpu/BackendContext.cc @@ -25,6 +25,8 @@ #include "backend/basic/BackendContextHelpers.h" #include "backend/basic/TensorRegistry.h" +#include + namespace onert { namespace backend @@ -46,8 +48,8 @@ FunctionMap BackendContext::genKernels() tensor_builder->getSharedMemoryOperandIndexes()); // TODO: Change type of tensor_registry field to TensorRegistry - auto tensor_registry_concreted = dynamic_cast(tensor_registry.get()); - assert(tensor_registry_concreted); + auto tensor_registry_concreted = + nnfw::misc::polymorphic_downcast(tensor_registry.get()); basic::initSharedMemoryConsts(graph()->operands(), external_operands(), tensor_registry_concreted, tensor_builder->getSharedMemoryOperandIndexes()); diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index 4fec186fd8c..2a0777a4a82 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -178,13 +178,13 @@ void planTensors(const std::shared_ptr &tensor_builder, const i } template -ir::OperandIndexSequence register_source_memory_tensors( +util::Set register_source_memory_tensors( const std::shared_ptr &tensor_builder, const ir::Graph &graph, const util::Set &external_operands, const ir::OperandIndexMap &shared_memory_operand_idx) { // process source tensors that share memory at first - ir::OperandIndexSequence registered_source_ind; + util::Set registered_source_ind; for (const auto &[_, source_ind] : shared_memory_operand_idx) { if (external_operands.contains(source_ind)) @@ -192,7 +192,7 @@ ir::OperandIndexSequence register_source_memory_tensors( if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source continue; tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info()); - registered_source_ind.append(source_ind); + registered_source_ind.add(source_ind); } return registered_source_ind; }