Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert] Share memory for Reshape, ExapndDims and Squeeze #14057

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c8d8a75
[onert] Share memory for Reshape, ExapndDims and Squeeze
mbencer Oct 2, 2024
7e3bfae
fixed trix and xnpack build
mbencer Oct 2, 2024
e2283b1
skip for other backend
mbencer Oct 3, 2024
ae54f84
fix skipping not supported backends
mbencer Oct 3, 2024
b901aae
first unit tests
mbencer Oct 4, 2024
2cbe01d
handling additional const input case + more tests
mbencer Oct 5, 2024
9ded99d
styles applied
mbencer Oct 5, 2024
0152e12
remove unnecessary file
mbencer Oct 5, 2024
0b2c4ee
added reassign_indexes_to_single_sources
mbencer Oct 9, 2024
b634fbf
more tests
mbencer Oct 9, 2024
132e237
test names refactor
mbencer Oct 10, 2024
fe371a6
styles applied
mbencer Oct 10, 2024
1ea3bac
Merge remote-tracking branch 'upstream/master' into mbencer/ReshapeAv…
mbencer Oct 10, 2024
12d33dc
claim and release memory improvements
mbencer Oct 11, 2024
b4e655f
styles applied
mbencer Oct 11, 2024
14a4c61
extract findSharedMemoryOperandsIndexes
mbencer Oct 15, 2024
c3199da
styles applied
mbencer Oct 15, 2024
b19064b
added SharedMemoryOperands tests
mbencer Oct 15, 2024
f0afe39
test name refactor
mbencer Oct 16, 2024
828cd72
styles applied
mbencer Oct 16, 2024
44244d1
last names refactor
mbencer Oct 16, 2024
fc4b1c9
names refactor
mbencer Oct 16, 2024
50036d0
styles applied
mbencer Oct 16, 2024
c790a5e
Merge remote-tracking branch 'upstream/master' into mbencer/ReshapeAv…
mbencer Oct 16, 2024
732e33b
BackendContext refactor
mbencer Nov 4, 2024
848e272
Merge remote-tracking branch 'upstream/master' into mbencer/ReshapeAv…
mbencer Nov 29, 2024
c7af462
revert irrelevant changes
mbencer Nov 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
8 changes: 6 additions & 2 deletions runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@ void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *o

void ExpandDimsLayer::run()
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
// output buffer equals to input buffer means that copy is not needed
if (_output->buffer() != _input->buffer())
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
}

} // namespace ops
Expand Down
8 changes: 6 additions & 2 deletions runtime/onert/backend/cpu/ops/ReshapeLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ ReshapeLayer::ReshapeLayer() : _input(nullptr), _shape(nullptr), _output(nullptr

void ReshapeLayer::reshapeGeneric()
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
// output buffer equals to input buffer means that copy is not needed
if (_output->buffer() != _input->buffer())
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
}

void ReshapeLayer::configure(const IPortableTensor *input, const IPortableTensor *shape,
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/backend/ruy/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/backend/trix/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context());
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/backend/xnnpack/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
2 changes: 2 additions & 0 deletions runtime/onert/core/include/backend/BackendContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ struct ContextData
std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
/* Is linear executor or not */
bool is_linear_executor;
/* Map of operands which share memory where the values are sources of memory */
ir::OperandIndexMap<ir::OperandIndex> shared_memory_operand_map;
};

class BackendContext
Expand Down
44 changes: 37 additions & 7 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,24 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
const ir::Graph &graph = *ctx.graph();
auto tensor_builder = ctx.tensor_builder;

// process source tensors for shared memory at first
std::vector<ir::OperandIndex> registered_source_ind;
for (const auto &[_, source_ind] : tensor_builder->getOperandsWithSharedMemory())
{
if (ctx.external_operands().contains(source_ind))
continue;
if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
continue;
tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
registered_source_ind.emplace_back(source_ind);
}

graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
return;
if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) !=
std::end(registered_source_ind)) // skip tensors already registered
return;
tensor_builder->registerTensorInfo(ind, obj.info());
});

Expand All @@ -210,31 +225,46 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex

inline void initConsts(const ir::Operands &operands,
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry)
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
const bool has_const_shared_memory =
shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
operands.at(shared_memory_operands_map.at(ind)).isConstant();
const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
if (external_operands.contains(ind) || !can_be_initialized_as_const)
return;

auto tensor = tensor_registry->getNativeITensor(ind);
assert(tensor != nullptr);

VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;

auto data = operand.shareData();
assert(data && data->base());
ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);

if (ext_tensor == nullptr)
throw std::runtime_error{"This tensor is not external tensor"};

ext_tensor->setData(data);
if (has_const_shared_memory)
{
const auto &memory_source_operand = operands.at(shared_memory_operands_map.at(ind));
auto memory_source_data = memory_source_operand.shareData();
assert(memory_source_data && memory_source_data->base());
ext_tensor->setData(memory_source_data);
}
else
{
auto data = operand.shareData();
assert(data && data->base());
ext_tensor->setData(data);
}
});
}

inline void initConsts(BackendContext &ctx)
{
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(),
ctx.data().shared_memory_operand_map);
}

} // namespace basic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ class StaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
virtual ~StaticTensorManager() = default;

void allocateNonconsts(void);
Expand All @@ -57,6 +59,8 @@ class StaticTensorManager
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
ir::OperandIndexMap<uint32_t> _source_operands_ref_counter;
};

} // namespace basic
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/core/include/backend/basic/TensorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ namespace basic
class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);

/**
* @brief Register tensor information to allocate on CPU backend
Expand All @@ -54,13 +56,16 @@ class TensorBuilder

void allocate(void);

const ir::OperandIndexMap<ir::OperandIndex> &getOperandsWithSharedMemory() const;

DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }

private:
const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
};

} // namespace basic
Expand Down
107 changes: 85 additions & 22 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <algorithm>

#include "backend/basic/StaticTensorManager.h"

#include "backend/basic/DynamicTensorManager.h"
Expand All @@ -27,19 +29,23 @@ namespace backend
namespace basic
{

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _nonconst_mgr{new MemoryManager()}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_operands_with_shared_memory{operands_with_shared_memory}
{
// DO NOTHING
}

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_operands_with_shared_memory{operands_with_shared_memory}
{
// DO NOTHING
}
Expand All @@ -50,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)

for (auto &&[ind, tensor] : _tensors->native_tensors())
{
if (!_as_constants[ind] && !tensor->is_dynamic())
bool buffer_set = false;
if (!tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);

VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
if (_operands_with_shared_memory.find(ind) != std::end(_operands_with_shared_memory))
{
const auto &shared_memory_ind = _operands_with_shared_memory[ind];
if (!_as_constants[shared_memory_ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
buffer_set = true;
}
}
else if (!_as_constants[ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
buffer_set = true;
}
if (buffer_set)
{
VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
}
}
}
}
Expand All @@ -67,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info, bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
std::unique_ptr<Tensor> tensor = nullptr;
if (as_const)
{
auto tensor = std::make_unique<ExternalTensor>(tensor_info);
_tensors->setNativeTensor(ind, std::move(tensor));
tensor = std::make_unique<ExternalTensor>(tensor_info);
}
else
{
auto tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
_tensors->setNativeTensor(ind, std::move(tensor));
const auto source_operand = _operands_with_shared_memory.find(ind);
if (source_operand != std::end(_operands_with_shared_memory) &&
_as_constants[source_operand->second])
{
as_const = _as_constants[source_operand->second];
auto new_tensor_info = tensor_info;
new_tensor_info.setAsConstant();
tensor = std::make_unique<ExternalTensor>(new_tensor_info);
}
else
{
tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
}
}
assert(tensor);
_tensors->setNativeTensor(ind, std::move(tensor));
_as_constants[ind] = as_const;
}

Expand All @@ -88,8 +122,22 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->claimPlan(ind, size);
const auto source_ind = _operands_with_shared_memory.find(ind);
if (source_ind == std::end(_operands_with_shared_memory))
{
if (!_as_constants[ind])
{
_nonconst_mgr->claimPlan(ind, size);
++_source_operands_ref_counter[ind];
}
}
else
{
if (!_as_constants[source_ind->second])
{
++_source_operands_ref_counter[source_ind->second];
}
}
}

void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
Expand All @@ -99,8 +147,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->releasePlan(ind);
const auto source_operand_ind =
std::find_if(std::begin(_operands_with_shared_memory), std::end(_operands_with_shared_memory),
[&ind](const auto &op) { return op.second == ind; });

ir::OperandIndex release_ind;
if (source_operand_ind == std::end(_operands_with_shared_memory))
{
release_ind = ind;
}
else
{
release_ind = source_operand_ind->second;
}
if (!_as_constants[release_ind] && 0 == _source_operands_ref_counter[release_ind])
{
_nonconst_mgr->releasePlan(release_ind);
}
}

void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
Expand Down
Loading