Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert] Share tensors memory for designated operands #14384

Merged
merged 8 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "BackendContext.h"
#include "Config.h"
#include "KernelGenerator.h"
#include "SharedMemoryOperands.h"

#include <backend/Backend.h>

Expand All @@ -45,8 +46,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
// TODO: Use findSharedMemoryOperandIndexes method here
auto tb = std::make_shared<TensorBuilder>(tr, ir::OperandIndexMap<ir::OperandIndex>{});
auto tb = std::make_shared<TensorBuilder>(tr, findSharedMemoryOperandIndexes(graph));
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
6 changes: 3 additions & 3 deletions runtime/onert/backend/cpu/BackendContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ FunctionMap BackendContext::genKernels()
{
FunctionMap ret;

basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
tensor_builder->getSharedMemoryOperandIndexes());

for (auto &&op_ind : _data.op_order)
{
auto fn_seq = kernel_gen->generate(op_ind);
ret.emplace(op_ind, std::move(fn_seq));
}

basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
tensor_builder->getSharedMemoryOperandIndexes());

// NOTE For memory optimization, we want to free some operand data
const_cast<ir::Graph &>(*_data.graph)
.operands()
Expand Down
75 changes: 59 additions & 16 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,16 +177,40 @@ void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const i
}

template <typename T_TensorBuilder>
ITensorRegistry *
genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::shared_ptr<ITensorRegistry> &tensor_registry,
const std::vector<onert::ir::OperationIndex> &op_order,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
ir::OperandIndexSequence register_source_memory_tensors(
const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
{
// process source tensors that share memory at first
ir::OperandIndexSequence registered_source_ind;
for (const auto &[_, source_ind] : shared_memory_operand_idx)
{
if (external_operands.contains(source_ind))
continue;
if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
continue;
tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
registered_source_ind.append(source_ind);
}
return registered_source_ind;
}

template <typename T_TensorBuilder>
ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder,
const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::shared_ptr<ITensorRegistry> &tensor_registry,
const std::vector<onert::ir::OperationIndex> &op_order,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
{
const auto registered_source_ind = register_source_memory_tensors(
tensor_builder, graph, external_operands, shared_memory_operand_idx);
graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (external_operands.contains(ind))
return;
if (registered_source_ind.contains(ind)) // skip tensors already registered
ragmani marked this conversation as resolved.
Show resolved Hide resolved
return;
tensor_builder->registerTensorInfo(ind, obj.info());
});

Expand Down Expand Up @@ -219,25 +243,44 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
inline void initConsts(const ir::Operands &operands,
ragmani marked this conversation as resolved.
Show resolved Hide resolved
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
const bool has_const_shared_memory =
shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
operands.at(shared_memory_operands_map.at(ind)).isConstant();
const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
ragmani marked this conversation as resolved.
Show resolved Hide resolved
if (external_operands.contains(ind) || !can_be_initialized_as_const)
return;

auto tensor = tensor_registry->getNativeITensor(ind);
assert(tensor != nullptr);

VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;

auto data = operand.shareData();
assert(data && data->base());
ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);

if (ext_tensor == nullptr)
throw std::runtime_error{"This tensor is not external tensor"};

ext_tensor->setData(data);
if (has_const_shared_memory)
{
const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind));
auto memory_source_data = source_operand_ind.shareData();
assert(memory_source_data && memory_source_data->base());
auto shared_mem_tensor = dynamic_cast<Tensor *>(tensor);
ragmani marked this conversation as resolved.
Show resolved Hide resolved
if (nullptr == shared_mem_tensor)
{
throw std::runtime_error{"Incorrect type of tensor to support sharing memory"};
}
shared_mem_tensor->setBuffer(const_cast<uint8_t *>(memory_source_data->base()));
}
else
ragmani marked this conversation as resolved.
Show resolved Hide resolved
{
auto data = operand.shareData();
assert(data && data->base());
auto ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
if (ext_tensor == nullptr)
{
throw std::runtime_error{"This tensor is not external tensor"};
}
ext_tensor->setData(data);
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,18 @@ class StaticTensorManager

void iterate(const std::function<void(const ir::OperandIndex &)> &fn);

private:
// Update source operand index if source memory operand exist.
// Otherwise, return unchanged.
ir::OperandIndex adjust_with_memory_source_operand(const ir::OperandIndex &ind);

private:
std::unique_ptr<MemoryManager> _nonconst_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
ir::OperandIndexMap<uint32_t> _source_operand_inds_ref_counter;
};

} // namespace basic
Expand Down
47 changes: 41 additions & 6 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <algorithm>

#include "backend/basic/StaticTensorManager.h"

#include "backend/basic/DynamicTensorManager.h"
Expand Down Expand Up @@ -54,9 +56,10 @@ void StaticTensorManager::allocateNonconsts(void)

for (auto &&[ind, tensor] : _tensors->native_tensors())
{
if (!_as_constants[ind] && !tensor->is_dynamic())
const auto adjusted_ind = adjust_with_memory_source_operand(ind);
if (!_as_constants[adjusted_ind] && !tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
auto *buffer = _nonconst_mgr->getBuffer(adjusted_ind);
tensor->setBuffer(buffer);

VERBOSE(CPU_StaticTensorManager)
Expand Down Expand Up @@ -92,8 +95,17 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->claimPlan(ind, size);
const auto claim_ind = adjust_with_memory_source_operand(ind);
if (_as_constants[claim_ind])
{
return;
}
++_source_operand_inds_ref_counter[claim_ind];
// notify only first usage
if (1 == _source_operand_inds_ref_counter[claim_ind])
{
_nonconst_mgr->claimPlan(claim_ind, size);
}
ragmani marked this conversation as resolved.
Show resolved Hide resolved
}

void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
Expand All @@ -103,8 +115,20 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->releasePlan(ind);
const auto release_ind = adjust_with_memory_source_operand(ind);
if (_as_constants[release_ind])
{
return;
}
if (_source_operand_inds_ref_counter[release_ind] > 0)
{
--_source_operand_inds_ref_counter[release_ind];
}
// notify only last usage
if (0 == _source_operand_inds_ref_counter[release_ind])
{
_nonconst_mgr->releasePlan(release_ind);
}
}

void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
Expand All @@ -113,6 +137,17 @@ void StaticTensorManager::iterate(const std::function<void(const ir::OperandInde
fn(it.first);
}

ir::OperandIndex StaticTensorManager::adjust_with_memory_source_operand(const ir::OperandIndex &ind)
{
const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
if (source_operand_ind != std::end(_shared_memory_operand_indexes))
{
return source_operand_ind->second;
}
// source memory operand not found
return ind;
}

} // namespace basic
} // namespace backend
} // namespace onert