Samsung · hseok-oh · Dec 13, 2024 · Nov 29, 2024 · Dec 2, 2024 · Dec 2, 2024
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
@@ -20,6 +20,7 @@
 #include "BackendContext.h"
 #include "Config.h"
 #include "KernelGenerator.h"
+#include "SharedMemoryOperands.h"
 
 #include <backend/Backend.h>
 
@@ -45,8 +46,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    // TODO: Use findSharedMemoryOperandIndexes method here
-    auto tb = std::make_shared<TensorBuilder>(tr, ir::OperandIndexMap<ir::OperandIndex>{});
+    auto tb = std::make_shared<TensorBuilder>(tr, findSharedMemoryOperandIndexes(graph));
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
@@ -41,15 +41,15 @@ FunctionMap BackendContext::genKernels()
 {
   FunctionMap ret;
 
+  basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
+                    tensor_builder->getSharedMemoryOperandIndexes());
+
   for (auto &&op_ind : _data.op_order)
   {
     auto fn_seq = kernel_gen->generate(op_ind);
     ret.emplace(op_ind, std::move(fn_seq));
   }
 
-  basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
-                    tensor_builder->getSharedMemoryOperandIndexes());
-
   // NOTE For memory optimization, we want to free some operand data
   const_cast<ir::Graph &>(*_data.graph)
     .operands()

diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -177,16 +177,40 @@ void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const i
 }
 
 template <typename T_TensorBuilder>
-ITensorRegistry *
-genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
-           const util::Set<ir::OperandIndex> &external_operands,
-           const std::shared_ptr<ITensorRegistry> &tensor_registry,
-           const std::vector<onert::ir::OperationIndex> &op_order,
-           const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
+ir::OperandIndexSequence register_source_memory_tensors(
+  const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
+  const util::Set<ir::OperandIndex> &external_operands,
+  const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
 {
+  // process source tensors that share memory at first
+  ir::OperandIndexSequence registered_source_ind;
+  for (const auto &[_, source_ind] : shared_memory_operand_idx)
+  {
+    if (external_operands.contains(source_ind))
+      continue;
+    if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
+      continue;
+    tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
+    registered_source_ind.append(source_ind);
+  }
+  return registered_source_ind;
+}
+
+template <typename T_TensorBuilder>
+ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                            const ir::Graph &graph,
+                            const util::Set<ir::OperandIndex> &external_operands,
+                            const std::shared_ptr<ITensorRegistry> &tensor_registry,
+                            const std::vector<onert::ir::OperationIndex> &op_order,
+                            const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
+{
+  const auto registered_source_ind = register_source_memory_tensors(
+    tensor_builder, graph, external_operands, shared_memory_operand_idx);
   graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
     if (external_operands.contains(ind))
       return;
+    if (registered_source_ind.contains(ind)) // skip tensors already registered
+      return;
     tensor_builder->registerTensorInfo(ind, obj.info());
   });
 
@@ -219,25 +243,44 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
 inline void initConsts(const ir::Operands &operands,
                        const util::Set<ir::OperandIndex> &external_operands,
                        ITensorRegistry *tensor_registry,
-                       const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
+                       const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
 {
   operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (external_operands.contains(ind) || !operand.isConstant())
+    const bool has_const_shared_memory =
+      shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
+      operands.at(shared_memory_operands_map.at(ind)).isConstant();
+    const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
+    if (external_operands.contains(ind) || !can_be_initialized_as_const)
       return;
 
     auto tensor = tensor_registry->getNativeITensor(ind);
     assert(tensor != nullptr);
 
     VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
 
-    auto data = operand.shareData();
-    assert(data && data->base());
-    ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
-
-    if (ext_tensor == nullptr)
-      throw std::runtime_error{"This tensor is not external tensor"};
-
-    ext_tensor->setData(data);
+    if (has_const_shared_memory)
+    {
+      const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind));
+      auto memory_source_data = source_operand_ind.shareData();
+      assert(memory_source_data && memory_source_data->base());
+      auto shared_mem_tensor = dynamic_cast<Tensor *>(tensor);
+      if (nullptr == shared_mem_tensor)
+      {
+        throw std::runtime_error{"Incorrect type of tensor to support sharing memory"};
+      }
+      shared_mem_tensor->setBuffer(const_cast<uint8_t *>(memory_source_data->base()));
+    }
+    else
+    {
+      auto data = operand.shareData();
+      assert(data && data->base());
+      auto ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+      if (ext_tensor == nullptr)
+      {
+        throw std::runtime_error{"This tensor is not external tensor"};
+      }
+      ext_tensor->setData(data);
+    }
   });
 }
 

diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -54,12 +54,18 @@ class StaticTensorManager
 
   void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
 
+private:
+  // Update source operand index if source memory operand exist.
+  // Otherwise, return unchanged.
+  ir::OperandIndex adjust_with_memory_source_operand(const ir::OperandIndex &ind);
+
 private:
   std::unique_ptr<MemoryManager> _nonconst_mgr;
   const std::shared_ptr<TensorRegistry> _tensors;
   ir::OperandIndexMap<bool> _as_constants;
   DynamicTensorManager *_dynamic_tensor_manager;
   ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
+  ir::OperandIndexMap<uint32_t> _source_operand_inds_ref_counter;
 };
 
 } // namespace basic

diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <algorithm>
+
 #include "backend/basic/StaticTensorManager.h"
 
 #include "backend/basic/DynamicTensorManager.h"
@@ -54,9 +56,10 @@ void StaticTensorManager::allocateNonconsts(void)
 
   for (auto &&[ind, tensor] : _tensors->native_tensors())
   {
-    if (!_as_constants[ind] && !tensor->is_dynamic())
+    const auto adjusted_ind = adjust_with_memory_source_operand(ind);
+    if (!_as_constants[adjusted_ind] && !tensor->is_dynamic())
     {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
+      auto *buffer = _nonconst_mgr->getBuffer(adjusted_ind);
       tensor->setBuffer(buffer);
 
       VERBOSE(CPU_StaticTensorManager)
@@ -92,8 +95,17 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
+  const auto claim_ind = adjust_with_memory_source_operand(ind);
+  if (_as_constants[claim_ind])
+  {
+    return;
+  }
+  ++_source_operand_inds_ref_counter[claim_ind];
+  // notify only first usage
+  if (1 == _source_operand_inds_ref_counter[claim_ind])
+  {
+    _nonconst_mgr->claimPlan(claim_ind, size);
+  }
 }
 
 void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
@@ -103,8 +115,20 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
+  const auto release_ind = adjust_with_memory_source_operand(ind);
+  if (_as_constants[release_ind])
+  {
+    return;
+  }
+  if (_source_operand_inds_ref_counter[release_ind] > 0)
+  {
+    --_source_operand_inds_ref_counter[release_ind];
+  }
+  // notify only last usage
+  if (0 == _source_operand_inds_ref_counter[release_ind])
+  {
+    _nonconst_mgr->releasePlan(release_ind);
+  }
 }
 
 void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
@@ -113,6 +137,17 @@ void StaticTensorManager::iterate(const std::function<void(const ir::OperandInde
     fn(it.first);
 }
 
+ir::OperandIndex StaticTensorManager::adjust_with_memory_source_operand(const ir::OperandIndex &ind)
+{
+  const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
+  if (source_operand_ind != std::end(_shared_memory_operand_indexes))
+  {
+    return source_operand_ind->second;
+  }
+  // source memory operand not found
+  return ind;
+}
+
 } // namespace basic
 } // namespace backend
 } // namespace onert