Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert] Share memory for Reshape, ExapndDims and Squeeze #14057

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c8d8a75
[onert] Share memory for Reshape, ExapndDims and Squeeze
mbencer Oct 2, 2024
7e3bfae
fixed trix and xnpack build
mbencer Oct 2, 2024
e2283b1
skip for other backend
mbencer Oct 3, 2024
ae54f84
fix skipping not supported backends
mbencer Oct 3, 2024
b901aae
first unit tests
mbencer Oct 4, 2024
2cbe01d
handling additional const input case + more tests
mbencer Oct 5, 2024
9ded99d
styles applied
mbencer Oct 5, 2024
0152e12
remove unnecessary file
mbencer Oct 5, 2024
0b2c4ee
added reassign_indexes_to_single_sources
mbencer Oct 9, 2024
b634fbf
more tests
mbencer Oct 9, 2024
132e237
test names refactor
mbencer Oct 10, 2024
fe371a6
styles applied
mbencer Oct 10, 2024
1ea3bac
Merge remote-tracking branch 'upstream/master' into mbencer/ReshapeAv…
mbencer Oct 10, 2024
12d33dc
claim and release memory improvements
mbencer Oct 11, 2024
b4e655f
styles applied
mbencer Oct 11, 2024
14a4c61
extract findSharedMemoryOperandsIndexes
mbencer Oct 15, 2024
c3199da
styles applied
mbencer Oct 15, 2024
b19064b
added SharedMemoryOperands tests
mbencer Oct 15, 2024
f0afe39
test name refactor
mbencer Oct 16, 2024
828cd72
styles applied
mbencer Oct 16, 2024
44244d1
last names refactor
mbencer Oct 16, 2024
fc4b1c9
names refactor
mbencer Oct 16, 2024
50036d0
styles applied
mbencer Oct 16, 2024
c790a5e
Merge remote-tracking branch 'upstream/master' into mbencer/ReshapeAv…
mbencer Oct 16, 2024
732e33b
BackendContext refactor
mbencer Nov 4, 2024
848e272
Merge remote-tracking branch 'upstream/master' into mbencer/ReshapeAv…
mbencer Nov 29, 2024
c7af462
revert irrelevant changes
mbencer Nov 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "BackendContext.h"
#include "Config.h"
#include "KernelGenerator.h"
#include "SharedMemoryOperands.h"

#include <backend/Backend.h>

Expand All @@ -45,7 +46,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, findSharedMemoryOperandIndexes(graph));
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/backend/cpu/BackendContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ namespace backend
namespace cpu
{

ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
ITensorRegistry *BackendContext::genTensors()
{
return basic::genTensors(tensor_builder, *graph(), external_operands(), tensor_registry,
data().op_order, tensor_builder->getSharedMemoryOperandIndexes());
}

FunctionMap BackendContext::genKernels()
{
Expand All @@ -43,7 +47,8 @@ FunctionMap BackendContext::genKernels()
ret.emplace(op_ind, std::move(fn_seq));
}

basic::initConsts(*this);
basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
tensor_builder->getSharedMemoryOperandIndexes());

// NOTE For memory optimization, we want to free some operand data
const_cast<ir::Graph &>(*_data.graph)
Expand Down
24 changes: 24 additions & 0 deletions runtime/onert/backend/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
nnfw_find_package(Ruy REQUIRED)

file(GLOB_RECURSE SOURCES "*.cc")
file(GLOB_RECURSE TESTS "*.test.cc")
list(REMOVE_ITEM SOURCES ${TESTS})

add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})

Expand All @@ -21,3 +23,25 @@ set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES
INSTALL_RPATH "$ORIGIN:$ORIGIN/..")

install(TARGETS ${LIB_ONERT_BACKEND_CPU} DESTINATION lib/nnfw/backend)

if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)

# Unit Tests
set(TEST_ONERT_CPU_BACKEND test_onert_cpu_backend)

add_executable(${TEST_ONERT_CPU_BACKEND} ${TESTS})

target_link_libraries(${TEST_ONERT_CPU_BACKEND} ${LIB_ONERT_BACKEND_CPU})
# Requires linking nnfw_coverage: check header coverage
target_link_libraries(${TEST_ONERT_CPU_BACKEND} nnfw_coverage)
target_link_libraries(${TEST_ONERT_CPU_BACKEND} onert_core)
target_link_libraries(${TEST_ONERT_CPU_BACKEND} gtest gtest_main dl ${LIB_PTHREAD})

# Set install rpath to find onert_core, onert_backend_cpu, etc
set_target_properties(${TEST_ONERT_CPU_BACKEND} PROPERTIES
INSTALL_RPATH "$ORIGIN/../lib/nnfw:$ORIGIN/../lib/nnfw/backend")

add_test(${TEST_ONERT_CPU_BACKEND} ${TEST_ONERT_CPU_BACKEND})
install(TARGETS ${TEST_ONERT_CPU_BACKEND} DESTINATION unittest)
93 changes: 93 additions & 0 deletions runtime/onert/backend/cpu/SharedMemoryOperands.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "SharedMemoryOperands.h"

namespace onert
{
namespace backend
{
namespace cpu
{

namespace
{
// To handle cases like Reshape->Reshape->Reshape... chain where the memory is shared.
// In such a case we should re-assign indexes to the first Reshape input.
void reassign_indexes_to_single_sources(
ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_map)
{
for (auto [shared_ind, source_ind] : shared_memory_operand_map)
{
bool other_source_found = false;
auto it = std::end(shared_memory_operand_map);
while ((it = shared_memory_operand_map.find(source_ind)) != std::end(shared_memory_operand_map))
{
source_ind = shared_memory_operand_map[source_ind];
other_source_found = true;
}
if (other_source_found)
{
shared_memory_operand_map[shared_ind] = source_ind;
}
}
}

bool is_memory_sharing_allowed(const ir::IGraph &graph, const ir::IOperation &op)
{

const std::unordered_set<ir::OpCode> ops_with_possible_memory_sharing = {
ir::OpCode::Reshape, ir::OpCode::ExpandDims, ir::OpCode::Squeeze};

if (ops_with_possible_memory_sharing.find(op.opcode()) ==
std::end(ops_with_possible_memory_sharing))
{
return false;
}
if (graph.operands().at(op.getInputs().at(0)).info().isDynamic())
{
return false;
}
if (graph.operands().at(op.getOutputs().at(0)).info().isDynamic())
{
return false;
}
Copy link
Contributor

@hseok-oh hseok-oh Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mbencer Is there any reason to not allow on dynamic shape?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hseok-oh In general I believe that it's possible to handle but my plan was to implement it separately to limit the scope of this feature. Note that dynamic tensor have a separate path of building - DynamicTensorManager::buildTensor. Dyn shapes handling requires additional branch here to handle a case where source memory tensor is a constant (has ExternalTensor type). To research is also a case where source memory tensor has static shape - in such a case DynamicMemoryManager shouldn't be pass to a tensor ctor. The rest should be even simpler because dyn tensors don't re-use common memory (controlled by [static]MemoryManager).

To sum-up:
I can handle it also as a part of this feature or create a separate issue ;)

const auto op_input_output = {op.getInputs().at(0), op.getOutputs().at(0)};
const bool is_model_input_output = std::any_of(
std::begin(op_input_output), std::end(op_input_output), [&graph](const ir::OperandIndex &ind) {
return graph.getInputs().contains(ind) || graph.getOutputs().contains(ind);
});
return !is_model_input_output;
};

} // namespace

ir::OperandIndexMap<ir::OperandIndex> findSharedMemoryOperandIndexes(const ir::IGraph &graph)
{
ir::OperandIndexMap<ir::OperandIndex> shared_memory_operand_map;
graph.operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) {
if (is_memory_sharing_allowed(graph, op))
{
shared_memory_operand_map[op.getOutputs().at(0)] = op.getInputs().at(0);
}
});
reassign_indexes_to_single_sources(shared_memory_operand_map);
return shared_memory_operand_map;
}

} // namespace cpu
} // namespace backend
} // namespace onert
39 changes: 39 additions & 0 deletions runtime/onert/backend/cpu/SharedMemoryOperands.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__
#define __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__

#include "ir/IGraph.h"
#include "ir/OperandIndexMap.h"

namespace onert
{
namespace backend
{
namespace cpu
{
/*
* Find indexed of operands assigned to tensors which can share memory (indicate the same buffer).
* Note that it's applicable for operations that do NOT change data but only shape like Reshape.
*/
ir::OperandIndexMap<ir::OperandIndex> findSharedMemoryOperandIndexes(const ir::IGraph &graph);

} // namespace cpu
} // namespace backend
} // namespace onert

#endif // __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__
185 changes: 185 additions & 0 deletions runtime/onert/backend/cpu/SharedMemoryOperands.test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#include <memory>

#include "SharedMemoryOperands.h"

#include "ir/Graph.h"
#include "ir/operation/Permute.h"
#include "ir/operation/Squeeze.h"
#include "ir/operation/Reshape.h"

using namespace onert::backend::cpu;
using namespace onert::ir;

TEST(SharedMemoryOperands, no_shared_memory_graph)
{
auto graph = std::make_unique<Graph>();
TypeInfo data_type{DataType::FLOAT32};
const auto perm_input = graph->addOperand({4}, data_type);
const auto perm_output = graph->addOperand({4}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(perm_input, perm_output));
graph->addInput(perm_input);
graph->addOutput(perm_output);
graph->verify();

const auto indexes_map = findSharedMemoryOperandIndexes(*graph);

ASSERT_EQ(indexes_map.size(), 0);
}

TEST(SharedMemoryOperands, single_reshape_graph)
{
auto graph = std::make_unique<Graph>();
TypeInfo data_type{DataType::FLOAT32};
const auto perm_input = graph->addOperand({4}, data_type);
const auto reshape_input = graph->addOperand({4}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(perm_input, reshape_input));
const auto reshape_output = graph->addOperand({2, 2}, data_type);
operation::Reshape::Param shape;
shape.new_shape = {2, 2};
TypeInfo shape_type{DataType::INT32};
const auto reshape_shape = graph->addOperand({2}, shape_type);
graph->addOperation(
std::make_unique<operation::Reshape>(OperandIndexSequence{reshape_input, reshape_shape},
OperandIndexSequence{reshape_output}, shape));
const auto perm2_output = graph->addOperand({2, 2}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(reshape_output, perm2_output));
graph->addInput(perm_input);
graph->addOutput(perm2_output);
graph->verify();

const auto indexes_map = findSharedMemoryOperandIndexes(*graph);

ASSERT_EQ(indexes_map.size(), 1);
EXPECT_EQ(indexes_map.begin()->first, 2);
EXPECT_EQ(indexes_map.begin()->second, 1);
}

TEST(SharedMemoryOperands, double_reshape_graph)
{
auto graph = std::make_unique<Graph>();
TypeInfo data_type{DataType::FLOAT32};
const auto perm_input = graph->addOperand({4}, data_type);
const auto reshape1_input = graph->addOperand({4}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(perm_input, reshape1_input));
const auto reshape1_output = graph->addOperand({2, 2}, data_type);
operation::Reshape::Param shape;
shape.new_shape = {2, 2};
TypeInfo shape_type{DataType::INT32};
const auto reshape_shape = graph->addOperand({2}, shape_type);
graph->addOperation(
std::make_unique<operation::Reshape>(OperandIndexSequence{reshape1_input, reshape_shape},
OperandIndexSequence{reshape1_output}, shape));
const auto reshape2_output = graph->addOperand({2, 2}, data_type);
graph->addOperation(
std::make_unique<operation::Reshape>(OperandIndexSequence{reshape1_output, reshape_shape},
OperandIndexSequence{reshape2_output}, shape));
const auto perm2_output = graph->addOperand({2, 2}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(reshape2_output, perm2_output));
graph->addInput(perm_input);
graph->addOutput(perm2_output);
graph->verify();

const auto indexes_map = findSharedMemoryOperandIndexes(*graph);

ASSERT_EQ(indexes_map.size(), 2);
auto map_it = indexes_map.begin();
EXPECT_EQ(map_it->first, 2);
EXPECT_EQ(map_it->second, 1);
++map_it;
EXPECT_EQ(map_it->first, 4);
EXPECT_EQ(map_it->second, 1);
}

TEST(SharedMemoryOperands, dyn_output_reshape_graph)
{
auto graph = std::make_unique<Graph>();
TypeInfo data_type{DataType::FLOAT32};
const auto perm_input = graph->addOperand({4}, data_type);
const auto reshape_input = graph->addOperand({4}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(perm_input, reshape_input));
const auto reshape_output = graph->addOperand({}, data_type);
graph->operands().at(reshape_output).info().setDynamic();
operation::Reshape::Param shape;
TypeInfo shape_type{DataType::INT32};
const auto reshape_shape = graph->addOperand({2}, shape_type);
graph->addOperation(
std::make_unique<operation::Reshape>(OperandIndexSequence{reshape_input, reshape_shape},
OperandIndexSequence{reshape_output}, shape));
const auto perm2_output = graph->addOperand({}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(reshape_output, perm2_output));
graph->addInput(perm_input);
graph->addOutput(perm2_output);
graph->verify();

const auto indexes_map = findSharedMemoryOperandIndexes(*graph);

ASSERT_EQ(indexes_map.size(), 0);
}

TEST(SharedMemoryOperands, model_input_reshape_graph)
{
auto graph = std::make_unique<Graph>();
TypeInfo data_type{DataType::FLOAT32};
const auto reshape_input = graph->addOperand({4}, data_type);
const auto reshape_output = graph->addOperand({2, 2}, data_type);
operation::Reshape::Param shape;
shape.new_shape = {2, 2};
TypeInfo shape_type{DataType::INT32};
const auto reshape_shape = graph->addOperand({2}, shape_type);
graph->addOperation(
std::make_unique<operation::Reshape>(OperandIndexSequence{reshape_input, reshape_shape},
OperandIndexSequence{reshape_output}, shape));
const auto perm_output = graph->addOperand({2, 2}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(reshape_output, perm_output));
graph->addInput(reshape_input);
graph->addOutput(perm_output);
graph->verify();

const auto indexes_map = findSharedMemoryOperandIndexes(*graph);

ASSERT_EQ(indexes_map.size(), 0);
}

TEST(SharedMemoryOperands, single_squeeze_graph)
{
auto graph = std::make_unique<Graph>();
TypeInfo data_type{DataType::FLOAT32};
const auto perm_input = graph->addOperand({4, 1}, data_type);
const auto squeeze_input = graph->addOperand({4, 1}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(perm_input, squeeze_input));
const auto squeeze_output = graph->addOperand({4}, data_type);
operation::Squeeze::Param axes;
axes.dims[0] = 1;
axes.ndim = 1;
graph->addOperation(std::make_unique<operation::Squeeze>(
OperandIndexSequence{squeeze_input}, OperandIndexSequence{squeeze_output}, axes));
const auto perm2_output = graph->addOperand({4}, data_type);
graph->addOperation(std::make_unique<operation::Permute>(squeeze_output, perm2_output));
graph->addInput(perm_input);
graph->addOutput(perm2_output);
graph->verify();

const auto indexes_map = findSharedMemoryOperandIndexes(*graph);

ASSERT_EQ(indexes_map.size(), 1);
EXPECT_EQ(indexes_map.begin()->first, 2);
EXPECT_EQ(indexes_map.begin()->second, 1);
}
Loading