From a5e770ae1b548aacb0ba0f8b9a7943c13147f12d Mon Sep 17 00:00:00 2001 From: mbencer Date: Fri, 29 Nov 2024 05:52:59 +0100 Subject: [PATCH] [onert] Propagate shared memory operand indexes to cpu backend (#14230) This commit adds propagation of shared memory operand indexes to cpu backend. Note that the propagated indexes map is not filled yet. ONE-DCO-1.0-Signed-off-by: Mateusz Bencer m.bencer@partner.samsung.com --- runtime/onert/backend/cpu/Backend.h | 3 +- runtime/onert/backend/cpu/BackendContext.cc | 9 +++- .../backend/basic/BackendContextHelpers.h | 47 +++++++++++-------- .../backend/basic/StaticTensorManager.h | 7 ++- .../include/backend/basic/TensorBuilder.h | 9 +++- .../src/backend/basic/StaticTensorManager.cc | 18 ++++--- .../core/src/backend/basic/TensorBuilder.cc | 21 +++++++-- .../core/src/backend/builtin/TensorBuilder.cc | 4 +- 8 files changed, 78 insertions(+), 40 deletions(-) diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index 398c188a897..1ccff6fba94 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -45,7 +45,8 @@ class Backend : public ::onert::backend::Backend auto &graph = *data.graph; auto context = std::make_unique(this, std::move(data)); auto tr = std::make_shared(); - auto tb = std::make_shared(tr); + // TODO: Use findSharedMemoryOperandIndexes method here + auto tb = std::make_shared(tr, ir::OperandIndexMap{}); context->tensor_registry = tr; context->tensor_builder = tb; context->kernel_gen = std::make_shared(graph, tb, tr, custom_kernel_builder, diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc index 17a121a6a71..3a39df917c4 100644 --- a/runtime/onert/backend/cpu/BackendContext.cc +++ b/runtime/onert/backend/cpu/BackendContext.cc @@ -31,7 +31,11 @@ namespace backend namespace cpu { -ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); } +ITensorRegistry *BackendContext::genTensors() +{ + return basic::genTensors(tensor_builder, *graph(), external_operands(), tensor_registry, + data().op_order, tensor_builder->getSharedMemoryOperandIndexes()); +} FunctionMap BackendContext::genKernels() { @@ -43,7 +47,8 @@ FunctionMap BackendContext::genKernels() ret.emplace(op_ind, std::move(fn_seq)); } - basic::initConsts(*this); + basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(), + tensor_builder->getSharedMemoryOperandIndexes()); // NOTE For memory optimization, we want to free some operand data const_cast(*_data.graph) diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index 79a535559e1..46e57e925e6 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -34,19 +34,18 @@ namespace basic { // TODO Remove the template param BackendContext once unification of cpu backend context is done -template void planTensors(const T_BackendContext &ctx) +template +void planTensors(const std::shared_ptr &tensor_builder, const ir::Graph &graph, + const util::Set &external_operands, + const std::vector &op_order) { - const ir::Graph &graph = *ctx.graph(); - const auto &order = ctx.data().op_order; - auto tensor_builder = ctx.tensor_builder; - ir::OperandIndexMap uses_map; ir::OperandIndexMap def_map; ir::OperandIndexSequence constants; // Prepare scanning graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (ctx.external_operands().contains(ind)) + if (external_operands.contains(ind)) return; // TODO Check if we need to handle unused tensors @@ -95,7 +94,7 @@ template void planTensors(const T_BackendContext &ct // 1. Scan DEF of outputs. If the DEF, allocate it // 2. Scan DEF of inputs. If variable tensor, allocate it // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - for (const auto &op_ind : order) + for (const auto &op_ind : op_order) { const auto &op = graph.operations().at(op_ind); auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; @@ -104,7 +103,7 @@ template void planTensors(const T_BackendContext &ct // Define outputs for (const auto &ind : op_outputs) { - if (ctx.external_operands().contains(ind)) + if (external_operands.contains(ind)) continue; if (!tensor_builder->isRegistered(ind)) continue; @@ -121,7 +120,7 @@ template void planTensors(const T_BackendContext &ct // non-constant because of less memory usage by memory planning in here for (const auto &ind : op_inputs) { - if (ctx.external_operands().contains(ind)) + if (external_operands.contains(ind)) continue; if (!tensor_builder->isRegistered(ind)) continue; @@ -138,7 +137,7 @@ template void planTensors(const T_BackendContext &ct for (const auto &ind : op_inputs) { - if (ctx.external_operands().contains(ind)) + if (external_operands.contains(ind)) continue; if (!tensor_builder->isRegistered(ind)) continue; @@ -177,13 +176,16 @@ template void planTensors(const T_BackendContext &ct [](std::pair it) { return it.second == 0; })); } -template ITensorRegistry *genTensors(T_BackendContext &ctx) +template +ITensorRegistry * +genTensors(const std::shared_ptr &tensor_builder, const ir::Graph &graph, + const util::Set &external_operands, + const std::shared_ptr &tensor_registry, + const std::vector &op_order, + const ir::OperandIndexMap & /*shared_memory_operand_idx*/) { - const ir::Graph &graph = *ctx.graph(); - auto tensor_builder = ctx.tensor_builder; - graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (ctx.external_operands().contains(ind)) + if (external_operands.contains(ind)) return; tensor_builder->registerTensorInfo(ind, obj.info()); }); @@ -191,7 +193,7 @@ template ITensorRegistry *genTensors(T_BackendContex // TODO Get compiler options from compiler, and use it rather than getting it from Env if (util::getConfigString(util::config::EXECUTOR) == "Linear") { - basic::planTensors(ctx); + basic::planTensors(tensor_builder, graph, external_operands, op_order); } else { @@ -205,12 +207,19 @@ template ITensorRegistry *genTensors(T_BackendContex tensor_builder->allocate(); - return ctx.tensor_registry.get(); + return tensor_registry.get(); +} + +template ITensorRegistry *genTensors(T_BackendContext &ctx) +{ + return genTensors(ctx.tensor_builder, *ctx.graph(), ctx.external_operands(), ctx.tensor_registry, + ctx.data().op_order, {}); } inline void initConsts(const ir::Operands &operands, const util::Set &external_operands, - ITensorRegistry *tensor_registry) + ITensorRegistry *tensor_registry, + const ir::OperandIndexMap & /*shared_memory_operands_map*/) { operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { if (external_operands.contains(ind) || !operand.isConstant()) @@ -234,7 +243,7 @@ inline void initConsts(const ir::Operands &operands, inline void initConsts(BackendContext &ctx) { - initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get()); + initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(), {}); } } // namespace basic diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h index 2aab4303131..a92af7bd45d 100644 --- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h @@ -37,9 +37,11 @@ class StaticTensorManager { public: StaticTensorManager(const std::shared_ptr ®, - DynamicTensorManager *dynamic_tensor_manager); + DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &shared_memory_operand_indexes); StaticTensorManager(const std::shared_ptr ®, const std::string planner_id, - DynamicTensorManager *dynamic_tensor_manager); + DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &shared_memory_operand_indexes); virtual ~StaticTensorManager() = default; void allocateNonconsts(void); @@ -57,6 +59,7 @@ class StaticTensorManager const std::shared_ptr _tensors; ir::OperandIndexMap _as_constants; DynamicTensorManager *_dynamic_tensor_manager; + ir::OperandIndexMap _shared_memory_operand_indexes; }; } // namespace basic diff --git a/runtime/onert/core/include/backend/basic/TensorBuilder.h b/runtime/onert/core/include/backend/basic/TensorBuilder.h index 5d5b521ae7b..43659572a08 100644 --- a/runtime/onert/core/include/backend/basic/TensorBuilder.h +++ b/runtime/onert/core/include/backend/basic/TensorBuilder.h @@ -37,8 +37,10 @@ namespace basic class TensorBuilder { public: - TensorBuilder(const std::shared_ptr &tensor_reg); - TensorBuilder(const std::shared_ptr &tensor_reg, const std::string planner_id); + TensorBuilder(const std::shared_ptr &tensor_reg, + const ir::OperandIndexMap &shared_memory_operand_indexes = {}); + TensorBuilder(const std::shared_ptr &tensor_reg, const std::string planner_id, + const ir::OperandIndexMap &shared_memory_operand_indexes = {}); /** * @brief Register tensor information to allocate on CPU backend @@ -54,6 +56,8 @@ class TensorBuilder void allocate(void); + const ir::OperandIndexMap &getSharedMemoryOperandIndexes() const; + DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } private: @@ -61,6 +65,7 @@ class TensorBuilder std::unique_ptr _dynamic_tensor_mgr; std::unique_ptr _static_tensor_mgr; ir::OperandIndexMap _tensor_info_map; + ir::OperandIndexMap _shared_memory_operand_indexes; }; } // namespace basic diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index 92b3f286b2c..2e5fadd8d37 100644 --- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -27,19 +27,23 @@ namespace backend namespace basic { -StaticTensorManager::StaticTensorManager(const std::shared_ptr ®, - DynamicTensorManager *dynamic_tensor_manager) +StaticTensorManager::StaticTensorManager( + const std::shared_ptr ®, DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &shared_memory_operand_indexes) : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} + _dynamic_tensor_manager{dynamic_tensor_manager}, + _shared_memory_operand_indexes{shared_memory_operand_indexes} { // DO NOTHING } -StaticTensorManager::StaticTensorManager(const std::shared_ptr ®, - const std::string planner_id, - DynamicTensorManager *dynamic_tensor_manager) +StaticTensorManager::StaticTensorManager( + const std::shared_ptr ®, const std::string planner_id, + DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &shared_memory_operand_indexes) : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} + _dynamic_tensor_manager{dynamic_tensor_manager}, + _shared_memory_operand_indexes{shared_memory_operand_indexes} { // DO NOTHING } diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc index c94076dfbf1..a1c21bcfeb6 100644 --- a/runtime/onert/core/src/backend/basic/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/basic/TensorBuilder.cc @@ -27,17 +27,23 @@ namespace backend namespace basic { -TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg) +TensorBuilder::TensorBuilder( + const std::shared_ptr &tensor_reg, + const ir::OperandIndexMap &shared_memory_operand_indexes) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, - _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(), + shared_memory_operand_indexes)}, + _shared_memory_operand_indexes{shared_memory_operand_indexes} { /* empty */ } -TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg, - const std::string planner_id) +TensorBuilder::TensorBuilder( + const std::shared_ptr &tensor_reg, const std::string planner_id, + const ir::OperandIndexMap &shared_memory_operand_indexes) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, - _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())} + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(), + shared_memory_operand_indexes)} { /* empty */ } @@ -83,6 +89,11 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); } +const ir::OperandIndexMap &TensorBuilder::getSharedMemoryOperandIndexes() const +{ + return _shared_memory_operand_indexes; +} + } // namespace basic } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc index ca1c0179439..b6781061f2f 100644 --- a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc @@ -29,8 +29,8 @@ namespace builtin TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())}, - _static_tensor_mgr{ - new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())} + _static_tensor_mgr{new basic::StaticTensorManager( + _tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), ir::OperandIndexMap{})} { /* empty */ }