From 311cf7bef08783f3641dd074e493786de456020a Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Fri, 29 Nov 2024 20:31:19 +0100
Subject: [PATCH 1/7] [onert] Share tensors memory for designated operands

This commit improves the tensors memory management to handle sharing memory buffers.
It means that more that one tensor can indicate the same buffer. It is determined by operands index map calculated in the previous step.
Note that cases like sharing memory from constant tensors requires additional checks.

ONE-DCO-1.0-Signed-off-by: Mateusz Bencer m.bencer@partner.samsung.com
---
 .../backend/basic/BackendContextHelpers.h     | 52 +++++++---
 .../backend/basic/StaticTensorManager.h       |  1 +
 .../src/backend/basic/StaticTensorManager.cc  | 99 ++++++++++++++++---
 3 files changed, 125 insertions(+), 27 deletions(-)
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index 46e57e925e6..ce905ce7a46 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -177,16 +177,31 @@ void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const i
 }
 
 template <typename T_TensorBuilder>
-ITensorRegistry *
-genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
-           const util::Set<ir::OperandIndex> &external_operands,
-           const std::shared_ptr<ITensorRegistry> &tensor_registry,
-           const std::vector<onert::ir::OperationIndex> &op_order,
-           const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
+ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                            const ir::Graph &graph,
+                            const util::Set<ir::OperandIndex> &external_operands,
+                            const std::shared_ptr<ITensorRegistry> &tensor_registry,
+                            const std::vector<onert::ir::OperationIndex> &op_order,
+                            const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
 {
+  // process source tensors for shared memory at first
+  std::vector<ir::OperandIndex> registered_source_ind;
+  for (const auto &[_, source_ind] : shared_memory_operand_idx)
+  {
+    if (external_operands.contains(source_ind))
+      continue;
+    if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
+      continue;
+    tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
+    registered_source_ind.emplace_back(source_ind);
+  }
+
   graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
     if (external_operands.contains(ind))
       return;
+    if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) !=
+        std::end(registered_source_ind)) // skip tensors already registered
+      return;
     tensor_builder->registerTensorInfo(ind, obj.info());
   });
 
@@ -219,10 +234,14 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
 inline void initConsts(const ir::Operands &operands,
                        const util::Set<ir::OperandIndex> &external_operands,
                        ITensorRegistry *tensor_registry,
-                       const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
+                       const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
 {
   operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (external_operands.contains(ind) || !operand.isConstant())
+    const bool has_const_shared_memory =
+      shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
+      operands.at(shared_memory_operands_map.at(ind)).isConstant();
+    const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
+    if (external_operands.contains(ind) || !can_be_initialized_as_const)
       return;
 
     auto tensor = tensor_registry->getNativeITensor(ind);
@@ -230,14 +249,23 @@ inline void initConsts(const ir::Operands &operands,
 
     VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
 
-    auto data = operand.shareData();
-    assert(data && data->base());
     ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
-
     if (ext_tensor == nullptr)
       throw std::runtime_error{"This tensor is not external tensor"};
 
-    ext_tensor->setData(data);
+    if (has_const_shared_memory)
+    {
+      const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind));
+      auto memory_source_data = source_operand_ind.shareData();
+      assert(memory_source_data && memory_source_data->base());
+      ext_tensor->setData(memory_source_data);
+    }
+    else
+    {
+      auto data = operand.shareData();
+      assert(data && data->base());
+      ext_tensor->setData(data);
+    }
   });
 }
 
diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
index a92af7bd45d..f9157cb2a42 100644
--- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -60,6 +60,7 @@ class StaticTensorManager
   ir::OperandIndexMap<bool> _as_constants;
   DynamicTensorManager *_dynamic_tensor_manager;
   ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
+  ir::OperandIndexMap<uint32_t> _source_operand_inds_ref_counter;
 };
 
 } // namespace basic
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
index 2e5fadd8d37..f6f69d6af06 100644
--- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <algorithm>
+
 #include "backend/basic/StaticTensorManager.h"
 
 #include "backend/basic/DynamicTensorManager.h"
@@ -54,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)
 
   for (auto &&[ind, tensor] : _tensors->native_tensors())
   {
-    if (!_as_constants[ind] && !tensor->is_dynamic())
+    bool buffer_set = false;
+    if (!tensor->is_dynamic())
     {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager)
-        << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
+      if (_shared_memory_operand_indexes.find(ind) != std::end(_shared_memory_operand_indexes))
+      {
+        const auto &shared_memory_ind = _shared_memory_operand_indexes[ind];
+        if (!_as_constants[shared_memory_ind])
+        {
+          tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
+          buffer_set = true;
+        }
+      }
+      else if (!_as_constants[ind])
+      {
+        tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
+        buffer_set = true;
+      }
+      if (buffer_set)
+      {
+        VERBOSE(CPU_StaticTensorManager)
+          << "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
+      }
     }
   }
 }
@@ -71,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                       const ir::OperandInfo &tensor_info, bool as_const)
 {
   assert(!_tensors->getNativeTensor(ind));
+  std::unique_ptr<Tensor> tensor = nullptr;
   if (as_const)
   {
-    auto tensor = std::make_unique<ExternalTensor>(tensor_info);
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    tensor = std::make_unique<ExternalTensor>(tensor_info);
   }
   else
   {
-    auto tensor =
-      std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
+    if (source_operand_ind != std::end(_shared_memory_operand_indexes) &&
+        _as_constants[source_operand_ind->second])
+    {
+      as_const = _as_constants[source_operand_ind->second];
+      auto new_tensor_info = tensor_info;
+      new_tensor_info.setAsConstant();
+      tensor = std::make_unique<ExternalTensor>(new_tensor_info);
+    }
+    else
+    {
+      tensor =
+        std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    }
   }
+  assert(tensor);
+  _tensors->setNativeTensor(ind, std::move(tensor));
   _as_constants[ind] = as_const;
 }
 
@@ -92,8 +122,26 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
+  ir::OperandIndex claim_ind;
+  const auto source_ind = _shared_memory_operand_indexes.find(ind);
+  if (source_ind == std::end(_shared_memory_operand_indexes))
+  {
+    claim_ind = ind;
+  }
+  else
+  {
+    claim_ind = source_ind->second;
+  }
+  if (_as_constants[claim_ind])
+  {
+    return;
+  }
+  ++_source_operand_inds_ref_counter[claim_ind];
+  // notify only first usage
+  if (1 == _source_operand_inds_ref_counter[claim_ind])
+  {
+    _nonconst_mgr->claimPlan(claim_ind, size);
+  }
 }
 
 void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
@@ -103,8 +151,29 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
+  ir::OperandIndex release_ind;
+  const auto source_operand_ind_ind = _shared_memory_operand_indexes.find(ind);
+  if (source_operand_ind_ind == std::end(_shared_memory_operand_indexes))
+  {
+    release_ind = ind;
+  }
+  else
+  {
+    release_ind = source_operand_ind_ind->second;
+  }
+  if (_as_constants[release_ind])
+  {
+    return;
+  }
+  if (_source_operand_inds_ref_counter[release_ind] > 0)
+  {
+    --_source_operand_inds_ref_counter[release_ind];
+  }
+  // notify only last usage
+  if (0 == _source_operand_inds_ref_counter[release_ind])
+  {
+    _nonconst_mgr->releasePlan(release_ind);
+  }
 }
 
 void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)

From e2005d88c492a574a26bf105d08ea46d7d5a6625 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Mon, 2 Dec 2024 16:10:21 +0100
Subject: [PATCH 2/7] implementation refactor

---
 runtime/onert/backend/cpu/Backend.h           |  4 +-
 .../backend/basic/BackendContextHelpers.h     | 31 ++++++---
 .../backend/basic/StaticTensorManager.h       |  5 ++
 .../src/backend/basic/StaticTensorManager.cc  | 69 +++++++------------
 4 files changed, 51 insertions(+), 58 deletions(-)

diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 1ccff6fba94..a4199f98d37 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -20,6 +20,7 @@
 #include "BackendContext.h"
 #include "Config.h"
 #include "KernelGenerator.h"
+#include "SharedMemoryOperands.h"
 
 #include <backend/Backend.h>
 
@@ -45,8 +46,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    // TODO: Use findSharedMemoryOperandIndexes method here
-    auto tb = std::make_shared<TensorBuilder>(tr, ir::OperandIndexMap<ir::OperandIndex>{});
+    auto tb = std::make_shared<TensorBuilder>(tr, findSharedMemoryOperandIndexes(graph));
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index ce905ce7a46..af069c91f20 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -177,15 +177,13 @@ void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const i
 }
 
 template <typename T_TensorBuilder>
-ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder,
-                            const ir::Graph &graph,
-                            const util::Set<ir::OperandIndex> &external_operands,
-                            const std::shared_ptr<ITensorRegistry> &tensor_registry,
-                            const std::vector<onert::ir::OperationIndex> &op_order,
-                            const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
+ir::OperandIndexSequence register_source_memory_tensors(
+  const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
+  const util::Set<ir::OperandIndex> &external_operands,
+  const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
 {
-  // process source tensors for shared memory at first
-  std::vector<ir::OperandIndex> registered_source_ind;
+  // process source tensors that share memory at first
+  ir::OperandIndexSequence registered_source_ind;
   for (const auto &[_, source_ind] : shared_memory_operand_idx)
   {
     if (external_operands.contains(source_ind))
@@ -193,14 +191,25 @@ ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_build
     if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
       continue;
     tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
-    registered_source_ind.emplace_back(source_ind);
+    registered_source_ind.append(source_ind);
   }
+  return registered_source_ind;
+}
 
+template <typename T_TensorBuilder>
+ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                            const ir::Graph &graph,
+                            const util::Set<ir::OperandIndex> &external_operands,
+                            const std::shared_ptr<ITensorRegistry> &tensor_registry,
+                            const std::vector<onert::ir::OperationIndex> &op_order,
+                            const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
+{
+  const auto registered_source_ind = register_source_memory_tensors(
+    tensor_builder, graph, external_operands, shared_memory_operand_idx);
   graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
     if (external_operands.contains(ind))
       return;
-    if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) !=
-        std::end(registered_source_ind)) // skip tensors already registered
+    if (registered_source_ind.contains(ind)) // skip tensors already registered
       return;
     tensor_builder->registerTensorInfo(ind, obj.info());
   });
diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
index f9157cb2a42..e08d17b25bb 100644
--- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -54,6 +54,11 @@ class StaticTensorManager
 
   void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
 
+private:
+  // Update source operand index if source memory operand exist.
+  // Otherwise, return unchanged.
+  ir::OperandIndex adjust_with_memory_source_operand(const ir::OperandIndex &ind);
+
 private:
   std::unique_ptr<MemoryManager> _nonconst_mgr;
   const std::shared_ptr<TensorRegistry> _tensors;
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
index f6f69d6af06..1564865e77f 100644
--- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -56,28 +56,14 @@ void StaticTensorManager::allocateNonconsts(void)
 
   for (auto &&[ind, tensor] : _tensors->native_tensors())
   {
-    bool buffer_set = false;
-    if (!tensor->is_dynamic())
+    const auto adjusted_ind = adjust_with_memory_source_operand(ind);
+    if (!_as_constants[adjusted_ind] && !tensor->is_dynamic())
     {
-      if (_shared_memory_operand_indexes.find(ind) != std::end(_shared_memory_operand_indexes))
-      {
-        const auto &shared_memory_ind = _shared_memory_operand_indexes[ind];
-        if (!_as_constants[shared_memory_ind])
-        {
-          tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
-          buffer_set = true;
-        }
-      }
-      else if (!_as_constants[ind])
-      {
-        tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
-        buffer_set = true;
-      }
-      if (buffer_set)
-      {
-        VERBOSE(CPU_StaticTensorManager)
-          << "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
-      }
+      auto *buffer = _nonconst_mgr->getBuffer(adjusted_ind);
+      tensor->setBuffer(buffer);
+
+      VERBOSE(CPU_StaticTensorManager)
+        << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
     }
   }
 }
@@ -95,14 +81,14 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
   }
   else
   {
-    const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
-    if (source_operand_ind != std::end(_shared_memory_operand_indexes) &&
-        _as_constants[source_operand_ind->second])
+    const auto source_operand_ind = adjust_with_memory_source_operand(ind);
+    if (_as_constants[source_operand_ind])
     {
-      as_const = _as_constants[source_operand_ind->second];
       auto new_tensor_info = tensor_info;
       new_tensor_info.setAsConstant();
+      // source memory tensor is a constant
       tensor = std::make_unique<ExternalTensor>(new_tensor_info);
+      as_const = true;
     }
     else
     {
@@ -122,16 +108,7 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  ir::OperandIndex claim_ind;
-  const auto source_ind = _shared_memory_operand_indexes.find(ind);
-  if (source_ind == std::end(_shared_memory_operand_indexes))
-  {
-    claim_ind = ind;
-  }
-  else
-  {
-    claim_ind = source_ind->second;
-  }
+  const auto claim_ind = adjust_with_memory_source_operand(ind);
   if (_as_constants[claim_ind])
   {
     return;
@@ -151,16 +128,7 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  ir::OperandIndex release_ind;
-  const auto source_operand_ind_ind = _shared_memory_operand_indexes.find(ind);
-  if (source_operand_ind_ind == std::end(_shared_memory_operand_indexes))
-  {
-    release_ind = ind;
-  }
-  else
-  {
-    release_ind = source_operand_ind_ind->second;
-  }
+  const auto release_ind = adjust_with_memory_source_operand(ind);
   if (_as_constants[release_ind])
   {
     return;
@@ -182,6 +150,17 @@ void StaticTensorManager::iterate(const std::function<void(const ir::OperandInde
     fn(it.first);
 }
 
+ir::OperandIndex StaticTensorManager::adjust_with_memory_source_operand(const ir::OperandIndex &ind)
+{
+  const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
+  if (source_operand_ind != std::end(_shared_memory_operand_indexes))
+  {
+    return source_operand_ind->second;
+  }
+  // source memory operand not found
+  return ind;
+}
+
 } // namespace basic
 } // namespace backend
 } // namespace onert

From 4171e2ed380607a76f230406bc3154cd5b3669f5 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Thu, 5 Dec 2024 14:50:15 +0100
Subject: [PATCH 3/7] fixed handling of const tensor

---
 runtime/onert/backend/cpu/BackendContext.cc   |  6 ++---
 .../backend/basic/BackendContextHelpers.h     | 16 +++++++++----
 .../src/backend/basic/StaticTensorManager.cc  | 23 ++++---------------
 3 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
index 3a39df917c4..95635152a9a 100644
--- a/runtime/onert/backend/cpu/BackendContext.cc
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -41,15 +41,15 @@ FunctionMap BackendContext::genKernels()
 {
   FunctionMap ret;
 
+  basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
+                    tensor_builder->getSharedMemoryOperandIndexes());
+
   for (auto &&op_ind : _data.op_order)
   {
     auto fn_seq = kernel_gen->generate(op_ind);
     ret.emplace(op_ind, std::move(fn_seq));
   }
 
-  basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
-                    tensor_builder->getSharedMemoryOperandIndexes());
-
   // NOTE For memory optimization, we want to free some operand data
   const_cast<ir::Graph &>(*_data.graph)
     .operands()
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index af069c91f20..aae4404620b 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -258,21 +258,27 @@ inline void initConsts(const ir::Operands &operands,
 
     VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
 
-    ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
-    if (ext_tensor == nullptr)
-      throw std::runtime_error{"This tensor is not external tensor"};
-
     if (has_const_shared_memory)
     {
       const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind));
       auto memory_source_data = source_operand_ind.shareData();
       assert(memory_source_data && memory_source_data->base());
-      ext_tensor->setData(memory_source_data);
+      auto shared_mem_tensor = dynamic_cast<Tensor *>(tensor);
+      if (nullptr == shared_mem_tensor)
+      {
+        throw std::runtime_error{"Incorrect type of tensor to support sharing memory"};
+      }
+      shared_mem_tensor->setBuffer(const_cast<uint8_t *>(memory_source_data->base()));
     }
     else
     {
       auto data = operand.shareData();
       assert(data && data->base());
+      auto ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+      if (ext_tensor == nullptr)
+      {
+        throw std::runtime_error{"This tensor is not external tensor"};
+      }
       ext_tensor->setData(data);
     }
   });
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
index 1564865e77f..1b5f807e16f 100644
--- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -74,30 +74,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                       const ir::OperandInfo &tensor_info, bool as_const)
 {
   assert(!_tensors->getNativeTensor(ind));
-  std::unique_ptr<Tensor> tensor = nullptr;
   if (as_const)
   {
-    tensor = std::make_unique<ExternalTensor>(tensor_info);
+    auto tensor = std::make_unique<ExternalTensor>(tensor_info);
+    _tensors->setNativeTensor(ind, std::move(tensor));
   }
   else
   {
-    const auto source_operand_ind = adjust_with_memory_source_operand(ind);
-    if (_as_constants[source_operand_ind])
-    {
-      auto new_tensor_info = tensor_info;
-      new_tensor_info.setAsConstant();
-      // source memory tensor is a constant
-      tensor = std::make_unique<ExternalTensor>(new_tensor_info);
-      as_const = true;
-    }
-    else
-    {
-      tensor =
-        std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    }
+    auto tensor =
+      std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    _tensors->setNativeTensor(ind, std::move(tensor));
   }
-  assert(tensor);
-  _tensors->setNativeTensor(ind, std::move(tensor));
   _as_constants[ind] = as_const;
 }
 

From f6e7bcd1bab5aa803a1d57a918031cab2a62e756 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Wed, 11 Dec 2024 11:14:22 +0100
Subject: [PATCH 4/7] review remarks

---
 .../backend/basic/BackendContextHelpers.h     | 27 +++++----
 .../backend/basic/StaticTensorManager.h       |  5 +-
 .../src/backend/basic/StaticTensorManager.cc  | 55 +++++++++++++------
 3 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index aae4404620b..5030600ca8c 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -249,8 +249,11 @@ inline void initConsts(const ir::Operands &operands,
     const bool has_const_shared_memory =
       shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
       operands.at(shared_memory_operands_map.at(ind)).isConstant();
+    if (external_operands.contains(ind))
+      return;
     const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
-    if (external_operands.contains(ind) || !can_be_initialized_as_const)
+    if (!can_be_initialized_as_const)
+      // tensor currently processed not a const and source memory tensor (if exists) also not a const
       return;
 
     auto tensor = tensor_registry->getNativeITensor(ind);
@@ -264,23 +267,19 @@ inline void initConsts(const ir::Operands &operands,
       auto memory_source_data = source_operand_ind.shareData();
       assert(memory_source_data && memory_source_data->base());
       auto shared_mem_tensor = dynamic_cast<Tensor *>(tensor);
-      if (nullptr == shared_mem_tensor)
-      {
-        throw std::runtime_error{"Incorrect type of tensor to support sharing memory"};
-      }
+      assert(shared_mem_tensor != nullptr);
       shared_mem_tensor->setBuffer(const_cast<uint8_t *>(memory_source_data->base()));
+      return;
     }
-    else
+    // the default flow for constant initialization
+    auto data = operand.shareData();
+    assert(data && data->base());
+    auto ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+    if (ext_tensor == nullptr)
     {
-      auto data = operand.shareData();
-      assert(data && data->base());
-      auto ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
-      if (ext_tensor == nullptr)
-      {
-        throw std::runtime_error{"This tensor is not external tensor"};
-      }
-      ext_tensor->setData(data);
+      throw std::runtime_error{"This tensor is not external tensor"};
     }
+    ext_tensor->setData(data);
   });
 }
 
diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
index e08d17b25bb..e377756f08f 100644
--- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -57,7 +57,10 @@ class StaticTensorManager
 private:
   // Update source operand index if source memory operand exist.
   // Otherwise, return unchanged.
-  ir::OperandIndex adjust_with_memory_source_operand(const ir::OperandIndex &ind);
+  ir::OperandIndex adjustWithMemorySourceOperand(const ir::OperandIndex &ind) const;
+  // Return true if given ind is shared index or source index of shared memory operands map.
+  // Otherwise, return false.
+  bool isSharedMemoryOperand(const ir::OperandIndex &ind) const;
 
 private:
   std::unique_ptr<MemoryManager> _nonconst_mgr;
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
index 1b5f807e16f..3f5d063e980 100644
--- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -56,7 +56,7 @@ void StaticTensorManager::allocateNonconsts(void)
 
   for (auto &&[ind, tensor] : _tensors->native_tensors())
   {
-    const auto adjusted_ind = adjust_with_memory_source_operand(ind);
+    const auto adjusted_ind = adjustWithMemorySourceOperand(ind);
     if (!_as_constants[adjusted_ind] && !tensor->is_dynamic())
     {
       auto *buffer = _nonconst_mgr->getBuffer(adjusted_ind);
@@ -95,17 +95,20 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  const auto claim_ind = adjust_with_memory_source_operand(ind);
+  const auto claim_ind = adjustWithMemorySourceOperand(ind);
   if (_as_constants[claim_ind])
   {
     return;
   }
-  ++_source_operand_inds_ref_counter[claim_ind];
-  // notify only first usage
-  if (1 == _source_operand_inds_ref_counter[claim_ind])
+  if (isSharedMemoryOperand(claim_ind))
   {
-    _nonconst_mgr->claimPlan(claim_ind, size);
+    ++_source_operand_inds_ref_counter[claim_ind];
+    if (_source_operand_inds_ref_counter[claim_ind] > 1)
+    {
+      return; // claimPlan should be called only for the first usage
+    }
   }
+  _nonconst_mgr->claimPlan(claim_ind, size);
 }
 
 void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
@@ -115,20 +118,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  const auto release_ind = adjust_with_memory_source_operand(ind);
+  const auto release_ind = adjustWithMemorySourceOperand(ind);
   if (_as_constants[release_ind])
   {
     return;
   }
-  if (_source_operand_inds_ref_counter[release_ind] > 0)
-  {
-    --_source_operand_inds_ref_counter[release_ind];
-  }
-  // notify only last usage
-  if (0 == _source_operand_inds_ref_counter[release_ind])
+  if (isSharedMemoryOperand(release_ind))
   {
-    _nonconst_mgr->releasePlan(release_ind);
+    if (_source_operand_inds_ref_counter[release_ind] > 0) // sanity check
+    {
+      --_source_operand_inds_ref_counter[release_ind];
+    }
+    if (_source_operand_inds_ref_counter[release_ind] > 0)
+    {
+      return; // releasePlan should be called only for the first usage
+    }
   }
+  _nonconst_mgr->releasePlan(release_ind);
 }
 
 void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
@@ -137,17 +143,30 @@ void StaticTensorManager::iterate(const std::function<void(const ir::OperandInde
     fn(it.first);
 }
 
-ir::OperandIndex StaticTensorManager::adjust_with_memory_source_operand(const ir::OperandIndex &ind)
+ir::OperandIndex
+StaticTensorManager::adjustWithMemorySourceOperand(const ir::OperandIndex &ind) const
 {
-  const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
-  if (source_operand_ind != std::end(_shared_memory_operand_indexes))
+  const auto shared_operand_ind = _shared_memory_operand_indexes.find(ind);
+  if (shared_operand_ind != std::end(_shared_memory_operand_indexes))
   {
-    return source_operand_ind->second;
+    return shared_operand_ind->second;
   }
   // source memory operand not found
   return ind;
 }
 
+bool StaticTensorManager::isSharedMemoryOperand(const ir::OperandIndex &ind) const
+{
+  for (const auto &[shared_ind, source_ind] : _shared_memory_operand_indexes)
+  {
+    if (shared_ind == ind || source_ind == ind)
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
 } // namespace basic
 } // namespace backend
 } // namespace onert

From 1cc0ad9e37a5b3ecbea9ad32b6627d2501ef6996 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Wed, 11 Dec 2024 11:16:51 +0100
Subject: [PATCH 5/7] adjust with styles

---
 .../onert/core/include/backend/basic/BackendContextHelpers.h    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index 5030600ca8c..c95dd9b7277 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -253,7 +253,7 @@ inline void initConsts(const ir::Operands &operands,
       return;
     const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
     if (!can_be_initialized_as_const)
-      // tensor currently processed not a const and source memory tensor (if exists) also not a const
+      // tensor currently processed not a const and source memory tensor (if exists) not a const too
       return;
 
     auto tensor = tensor_registry->getNativeITensor(ind);

From 577a67fef146d770ced09ae93d271ab1ed344c89 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Wed, 11 Dec 2024 15:54:45 +0100
Subject: [PATCH 6/7] init consts refactor

---
 runtime/onert/backend/cpu/BackendContext.cc   |  7 +++
 .../backend/basic/BackendContextHelpers.h     | 56 ++++++++++++-------
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
index 95635152a9a..cf199a7fe32 100644
--- a/runtime/onert/backend/cpu/BackendContext.cc
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -23,6 +23,7 @@
 #include "ir/OperandIndexMap.h"
 #include "ir/OperandIndexSequence.h"
 #include "backend/basic/BackendContextHelpers.h"
+#include "backend/basic/TensorRegistry.h"
 
 namespace onert
 {
@@ -44,6 +45,12 @@ FunctionMap BackendContext::genKernels()
   basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
                     tensor_builder->getSharedMemoryOperandIndexes());
 
+  // TODO: Change type of tensor_registry field to TensorRegistry
+  auto tensor_registry_concreted = dynamic_cast<basic::TensorRegistry *>(tensor_registry.get());
+  assert(tensor_registry_concreted);
+  basic::initSharedMemoryConsts(graph()->operands(), external_operands(), tensor_registry_concreted,
+                                tensor_builder->getSharedMemoryOperandIndexes());
+
   for (auto &&op_ind : _data.op_order)
   {
     auto fn_seq = kernel_gen->generate(op_ind);
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index c95dd9b7277..4fec186fd8c 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -24,6 +24,7 @@
 #include "util/logging.h"
 #include "backend/ITensorRegistry.h"
 #include "backend/BackendContext.h"
+#include "backend/basic/TensorRegistry.h"
 #include "Tensor.h"
 
 namespace onert
@@ -240,45 +241,58 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
                     ctx.data().op_order, {});
 }
 
+inline void
+initSharedMemoryConsts(const ir::Operands &operands,
+                       const util::Set<ir::OperandIndex> &external_operands,
+                       TensorRegistry *tensor_registry,
+                       const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
+{
+  operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+    if (external_operands.contains(ind))
+      return;
+    const auto shared_mem_it = shared_memory_operands_map.find(ind);
+    if (shared_mem_it == std::end(shared_memory_operands_map))
+      return; // no shared memory source
+    if (!operands.at(shared_mem_it->second).isConstant())
+      return; // source operand not a constant
+
+    VERBOSE(FillOperandData) << "Fill shared data for " << ind << std::endl;
+
+    const auto &source_operand_ind = operands.at(shared_mem_it->second);
+    auto memory_source_data = source_operand_ind.shareData();
+    assert(memory_source_data && memory_source_data->base());
+    auto tensor = tensor_registry->getNativeTensor(ind);
+    assert(tensor != nullptr);
+    tensor->setBuffer(const_cast<uint8_t *>(memory_source_data->base()));
+  });
+}
+
 inline void initConsts(const ir::Operands &operands,
                        const util::Set<ir::OperandIndex> &external_operands,
                        ITensorRegistry *tensor_registry,
                        const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
 {
   operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    const bool has_const_shared_memory =
+    if (external_operands.contains(ind) || !operand.isConstant())
+      return;
+    const bool has_const_shared_source =
       shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
       operands.at(shared_memory_operands_map.at(ind)).isConstant();
-    if (external_operands.contains(ind))
-      return;
-    const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
-    if (!can_be_initialized_as_const)
-      // tensor currently processed not a const and source memory tensor (if exists) not a const too
-      return;
+    if (has_const_shared_source)
+      return; // tensors with shared memory are processed in initSharedMemoryConsts
 
     auto tensor = tensor_registry->getNativeITensor(ind);
     assert(tensor != nullptr);
 
     VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
 
-    if (has_const_shared_memory)
-    {
-      const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind));
-      auto memory_source_data = source_operand_ind.shareData();
-      assert(memory_source_data && memory_source_data->base());
-      auto shared_mem_tensor = dynamic_cast<Tensor *>(tensor);
-      assert(shared_mem_tensor != nullptr);
-      shared_mem_tensor->setBuffer(const_cast<uint8_t *>(memory_source_data->base()));
-      return;
-    }
-    // the default flow for constant initialization
     auto data = operand.shareData();
     assert(data && data->base());
-    auto ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+    ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+
     if (ext_tensor == nullptr)
-    {
       throw std::runtime_error{"This tensor is not external tensor"};
-    }
+
     ext_tensor->setData(data);
   });
 }

From 155497bdbbaf6674b2e2c8fc3c89e1b2cff21db0 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Thu, 12 Dec 2024 09:17:22 +0100
Subject: [PATCH 7/7] review remarks

---
 runtime/onert/backend/cpu/BackendContext.cc                 | 6 ++++--
 .../core/include/backend/basic/BackendContextHelpers.h      | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
index cf199a7fe32..16904d86a86 100644
--- a/runtime/onert/backend/cpu/BackendContext.cc
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -25,6 +25,8 @@
 #include "backend/basic/BackendContextHelpers.h"
 #include "backend/basic/TensorRegistry.h"
 
+#include <misc/polymorphic_downcast.h>
+
 namespace onert
 {
 namespace backend
@@ -46,8 +48,8 @@ FunctionMap BackendContext::genKernels()
                     tensor_builder->getSharedMemoryOperandIndexes());
 
   // TODO: Change type of tensor_registry field to TensorRegistry
-  auto tensor_registry_concreted = dynamic_cast<basic::TensorRegistry *>(tensor_registry.get());
-  assert(tensor_registry_concreted);
+  auto tensor_registry_concreted =
+    nnfw::misc::polymorphic_downcast<basic::TensorRegistry *>(tensor_registry.get());
   basic::initSharedMemoryConsts(graph()->operands(), external_operands(), tensor_registry_concreted,
                                 tensor_builder->getSharedMemoryOperandIndexes());
 
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index 4fec186fd8c..2a0777a4a82 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -178,13 +178,13 @@ void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const i
 }
 
 template <typename T_TensorBuilder>
-ir::OperandIndexSequence register_source_memory_tensors(
+util::Set<ir::OperandIndex> register_source_memory_tensors(
   const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
   const util::Set<ir::OperandIndex> &external_operands,
   const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
 {
   // process source tensors that share memory at first
-  ir::OperandIndexSequence registered_source_ind;
+  util::Set<ir::OperandIndex> registered_source_ind;
   for (const auto &[_, source_ind] : shared_memory_operand_idx)
   {
     if (external_operands.contains(source_ind))
@@ -192,7 +192,7 @@ ir::OperandIndexSequence register_source_memory_tensors(
     if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
       continue;
     tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
-    registered_source_ind.append(source_ind);
+    registered_source_ind.add(source_ind);
   }
   return registered_source_ind;
 }