Samsung · mbencer · Oct 2, 2024 · Oct 2, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
@@ -20,6 +20,7 @@
 #include "BackendContext.h"
 #include "Config.h"
 #include "KernelGenerator.h"
+#include "SharedMemoryOperands.h"
 
 #include <backend/Backend.h>
 
@@ -45,7 +46,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, findSharedMemoryOperandIndexes(graph));
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
@@ -31,7 +31,11 @@ namespace backend
 namespace cpu
 {
 
-ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+ITensorRegistry *BackendContext::genTensors()
+{
+  return basic::genTensors(tensor_builder, *graph(), external_operands(), tensor_registry,
+                           data().op_order, tensor_builder->getSharedMemoryOperandIndexes());
+}
 
 FunctionMap BackendContext::genKernels()
 {
@@ -43,7 +47,8 @@ FunctionMap BackendContext::genKernels()
     ret.emplace(op_ind, std::move(fn_seq));
   }
 
-  basic::initConsts(*this);
+  basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
+                    tensor_builder->getSharedMemoryOperandIndexes());
 
   // NOTE For memory optimization, we want to free some operand data
   const_cast<ir::Graph &>(*_data.graph)

diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -1,6 +1,8 @@
 nnfw_find_package(Ruy REQUIRED)
 
 file(GLOB_RECURSE SOURCES "*.cc")
+file(GLOB_RECURSE TESTS "*.test.cc")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
 
@@ -21,3 +23,25 @@ set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES
   INSTALL_RPATH "$ORIGIN:$ORIGIN/..")
 
 install(TARGETS ${LIB_ONERT_BACKEND_CPU} DESTINATION lib/nnfw/backend)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# Unit Tests
+set(TEST_ONERT_CPU_BACKEND test_onert_cpu_backend)
+
+add_executable(${TEST_ONERT_CPU_BACKEND} ${TESTS})
+
+target_link_libraries(${TEST_ONERT_CPU_BACKEND} ${LIB_ONERT_BACKEND_CPU})
+# Requires linking nnfw_coverage: check header coverage
+target_link_libraries(${TEST_ONERT_CPU_BACKEND} nnfw_coverage)
+target_link_libraries(${TEST_ONERT_CPU_BACKEND} onert_core)
+target_link_libraries(${TEST_ONERT_CPU_BACKEND} gtest gtest_main dl ${LIB_PTHREAD})
+
+# Set install rpath to find onert_core, onert_backend_cpu, etc
+set_target_properties(${TEST_ONERT_CPU_BACKEND} PROPERTIES
+  INSTALL_RPATH "$ORIGIN/../lib/nnfw:$ORIGIN/../lib/nnfw/backend")
+
+add_test(${TEST_ONERT_CPU_BACKEND} ${TEST_ONERT_CPU_BACKEND})
+install(TARGETS ${TEST_ONERT_CPU_BACKEND} DESTINATION unittest)
diff --git a/runtime/onert/backend/cpu/SharedMemoryOperands.cc b/runtime/onert/backend/cpu/SharedMemoryOperands.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SharedMemoryOperands.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+namespace
+{
+// To handle cases like Reshape->Reshape->Reshape... chain where the memory is shared.
+// In such a case we should re-assign indexes to the first Reshape input.
+void reassign_indexes_to_single_sources(
+  ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_map)
+{
+  for (auto [shared_ind, source_ind] : shared_memory_operand_map)
+  {
+    bool other_source_found = false;
+    auto it = std::end(shared_memory_operand_map);
+    while ((it = shared_memory_operand_map.find(source_ind)) != std::end(shared_memory_operand_map))
+    {
+      source_ind = shared_memory_operand_map[source_ind];
+      other_source_found = true;
+    }
+    if (other_source_found)
+    {
+      shared_memory_operand_map[shared_ind] = source_ind;
+    }
+  }
+}
+
+bool is_memory_sharing_allowed(const ir::IGraph &graph, const ir::IOperation &op)
+{
+
+  const std::unordered_set<ir::OpCode> ops_with_possible_memory_sharing = {
+    ir::OpCode::Reshape, ir::OpCode::ExpandDims, ir::OpCode::Squeeze};
+
+  if (ops_with_possible_memory_sharing.find(op.opcode()) ==
+      std::end(ops_with_possible_memory_sharing))
+  {
+    return false;
+  }
+  if (graph.operands().at(op.getInputs().at(0)).info().isDynamic())
+  {
+    return false;
+  }
+  if (graph.operands().at(op.getOutputs().at(0)).info().isDynamic())
+  {
+    return false;
+  }
+  const auto op_input_output = {op.getInputs().at(0), op.getOutputs().at(0)};
+  const bool is_model_input_output = std::any_of(
+    std::begin(op_input_output), std::end(op_input_output), [&graph](const ir::OperandIndex &ind) {
+      return graph.getInputs().contains(ind) || graph.getOutputs().contains(ind);
+    });
+  return !is_model_input_output;
+};
+
+} // namespace
+
+ir::OperandIndexMap<ir::OperandIndex> findSharedMemoryOperandIndexes(const ir::IGraph &graph)
+{
+  ir::OperandIndexMap<ir::OperandIndex> shared_memory_operand_map;
+  graph.operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) {
+    if (is_memory_sharing_allowed(graph, op))
+    {
+      shared_memory_operand_map[op.getOutputs().at(0)] = op.getInputs().at(0);
+    }
+  });
+  reassign_indexes_to_single_sources(shared_memory_operand_map);
+  return shared_memory_operand_map;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/SharedMemoryOperands.h b/runtime/onert/backend/cpu/SharedMemoryOperands.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__
+#define __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__
+
+#include "ir/IGraph.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+/*
+ * Find indexed of operands assigned to tensors which can share memory (indicate the same buffer).
+ * Note that it's applicable for operations that do NOT change data but only shape like Reshape.
+ */
+ir::OperandIndexMap<ir::OperandIndex> findSharedMemoryOperandIndexes(const ir::IGraph &graph);
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__
diff --git a/runtime/onert/backend/cpu/SharedMemoryOperands.test.cc b/runtime/onert/backend/cpu/SharedMemoryOperands.test.cc
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "SharedMemoryOperands.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Permute.h"
+#include "ir/operation/Squeeze.h"
+#include "ir/operation/Reshape.h"
+
+using namespace onert::backend::cpu;
+using namespace onert::ir;
+
+TEST(SharedMemoryOperands, no_shared_memory_graph)
+{
+  auto graph = std::make_unique<Graph>();
+  TypeInfo data_type{DataType::FLOAT32};
+  const auto perm_input = graph->addOperand({4}, data_type);
+  const auto perm_output = graph->addOperand({4}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(perm_input, perm_output));
+  graph->addInput(perm_input);
+  graph->addOutput(perm_output);
+  graph->verify();
+
+  const auto indexes_map = findSharedMemoryOperandIndexes(*graph);
+
+  ASSERT_EQ(indexes_map.size(), 0);
+}
+
+TEST(SharedMemoryOperands, single_reshape_graph)
+{
+  auto graph = std::make_unique<Graph>();
+  TypeInfo data_type{DataType::FLOAT32};
+  const auto perm_input = graph->addOperand({4}, data_type);
+  const auto reshape_input = graph->addOperand({4}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(perm_input, reshape_input));
+  const auto reshape_output = graph->addOperand({2, 2}, data_type);
+  operation::Reshape::Param shape;
+  shape.new_shape = {2, 2};
+  TypeInfo shape_type{DataType::INT32};
+  const auto reshape_shape = graph->addOperand({2}, shape_type);
+  graph->addOperation(
+    std::make_unique<operation::Reshape>(OperandIndexSequence{reshape_input, reshape_shape},
+                                         OperandIndexSequence{reshape_output}, shape));
+  const auto perm2_output = graph->addOperand({2, 2}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(reshape_output, perm2_output));
+  graph->addInput(perm_input);
+  graph->addOutput(perm2_output);
+  graph->verify();
+
+  const auto indexes_map = findSharedMemoryOperandIndexes(*graph);
+
+  ASSERT_EQ(indexes_map.size(), 1);
+  EXPECT_EQ(indexes_map.begin()->first, 2);
+  EXPECT_EQ(indexes_map.begin()->second, 1);
+}
+
+TEST(SharedMemoryOperands, double_reshape_graph)
+{
+  auto graph = std::make_unique<Graph>();
+  TypeInfo data_type{DataType::FLOAT32};
+  const auto perm_input = graph->addOperand({4}, data_type);
+  const auto reshape1_input = graph->addOperand({4}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(perm_input, reshape1_input));
+  const auto reshape1_output = graph->addOperand({2, 2}, data_type);
+  operation::Reshape::Param shape;
+  shape.new_shape = {2, 2};
+  TypeInfo shape_type{DataType::INT32};
+  const auto reshape_shape = graph->addOperand({2}, shape_type);
+  graph->addOperation(
+    std::make_unique<operation::Reshape>(OperandIndexSequence{reshape1_input, reshape_shape},
+                                         OperandIndexSequence{reshape1_output}, shape));
+  const auto reshape2_output = graph->addOperand({2, 2}, data_type);
+  graph->addOperation(
+    std::make_unique<operation::Reshape>(OperandIndexSequence{reshape1_output, reshape_shape},
+                                         OperandIndexSequence{reshape2_output}, shape));
+  const auto perm2_output = graph->addOperand({2, 2}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(reshape2_output, perm2_output));
+  graph->addInput(perm_input);
+  graph->addOutput(perm2_output);
+  graph->verify();
+
+  const auto indexes_map = findSharedMemoryOperandIndexes(*graph);
+
+  ASSERT_EQ(indexes_map.size(), 2);
+  auto map_it = indexes_map.begin();
+  EXPECT_EQ(map_it->first, 2);
+  EXPECT_EQ(map_it->second, 1);
+  ++map_it;
+  EXPECT_EQ(map_it->first, 4);
+  EXPECT_EQ(map_it->second, 1);
+}
+
+TEST(SharedMemoryOperands, dyn_output_reshape_graph)
+{
+  auto graph = std::make_unique<Graph>();
+  TypeInfo data_type{DataType::FLOAT32};
+  const auto perm_input = graph->addOperand({4}, data_type);
+  const auto reshape_input = graph->addOperand({4}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(perm_input, reshape_input));
+  const auto reshape_output = graph->addOperand({}, data_type);
+  graph->operands().at(reshape_output).info().setDynamic();
+  operation::Reshape::Param shape;
+  TypeInfo shape_type{DataType::INT32};
+  const auto reshape_shape = graph->addOperand({2}, shape_type);
+  graph->addOperation(
+    std::make_unique<operation::Reshape>(OperandIndexSequence{reshape_input, reshape_shape},
+                                         OperandIndexSequence{reshape_output}, shape));
+  const auto perm2_output = graph->addOperand({}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(reshape_output, perm2_output));
+  graph->addInput(perm_input);
+  graph->addOutput(perm2_output);
+  graph->verify();
+
+  const auto indexes_map = findSharedMemoryOperandIndexes(*graph);
+
+  ASSERT_EQ(indexes_map.size(), 0);
+}
+
+TEST(SharedMemoryOperands, model_input_reshape_graph)
+{
+  auto graph = std::make_unique<Graph>();
+  TypeInfo data_type{DataType::FLOAT32};
+  const auto reshape_input = graph->addOperand({4}, data_type);
+  const auto reshape_output = graph->addOperand({2, 2}, data_type);
+  operation::Reshape::Param shape;
+  shape.new_shape = {2, 2};
+  TypeInfo shape_type{DataType::INT32};
+  const auto reshape_shape = graph->addOperand({2}, shape_type);
+  graph->addOperation(
+    std::make_unique<operation::Reshape>(OperandIndexSequence{reshape_input, reshape_shape},
+                                         OperandIndexSequence{reshape_output}, shape));
+  const auto perm_output = graph->addOperand({2, 2}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(reshape_output, perm_output));
+  graph->addInput(reshape_input);
+  graph->addOutput(perm_output);
+  graph->verify();
+
+  const auto indexes_map = findSharedMemoryOperandIndexes(*graph);
+
+  ASSERT_EQ(indexes_map.size(), 0);
+}
+
+TEST(SharedMemoryOperands, single_squeeze_graph)
+{
+  auto graph = std::make_unique<Graph>();
+  TypeInfo data_type{DataType::FLOAT32};
+  const auto perm_input = graph->addOperand({4, 1}, data_type);
+  const auto squeeze_input = graph->addOperand({4, 1}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(perm_input, squeeze_input));
+  const auto squeeze_output = graph->addOperand({4}, data_type);
+  operation::Squeeze::Param axes;
+  axes.dims[0] = 1;
+  axes.ndim = 1;
+  graph->addOperation(std::make_unique<operation::Squeeze>(
+    OperandIndexSequence{squeeze_input}, OperandIndexSequence{squeeze_output}, axes));
+  const auto perm2_output = graph->addOperand({4}, data_type);
+  graph->addOperation(std::make_unique<operation::Permute>(squeeze_output, perm2_output));
+  graph->addInput(perm_input);
+  graph->addOutput(perm2_output);
+  graph->verify();
+
+  const auto indexes_map = findSharedMemoryOperandIndexes(*graph);
+
+  ASSERT_EQ(indexes_map.size(), 1);
+  EXPECT_EQ(indexes_map.begin()->first, 2);
+  EXPECT_EQ(indexes_map.begin()->second, 1);
+}