Skip to content

Commit

Permalink
[onert] Share memory for Reshape, ExapndDims and Squeeze
Browse files Browse the repository at this point in the history
This commit extends current tensor memory management infrastructure to allow tensor memory sharing if possible.

ONE-DCO-1.0-Signed-off-by: Mateusz Bencer [email protected]
  • Loading branch information
mbencer committed Oct 2, 2024
1 parent 8753418 commit c8d8a75
Show file tree
Hide file tree
Showing 14 changed files with 186 additions and 49 deletions.
2 changes: 1 addition & 1 deletion runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
8 changes: 6 additions & 2 deletions runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@ void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *o

void ExpandDimsLayer::run()
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
// output buffer equals to input buffer means that copy is not needed
if (_output->buffer() != _input->buffer())
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
}

} // namespace ops
Expand Down
8 changes: 6 additions & 2 deletions runtime/onert/backend/cpu/ops/ReshapeLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ ReshapeLayer::ReshapeLayer() : _input(nullptr), _shape(nullptr), _output(nullptr

void ReshapeLayer::reshapeGeneric()
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
// output buffer equals to input buffer means that copy is not needed
if (_output->buffer() != _input->buffer())
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
}

void ReshapeLayer::configure(const IPortableTensor *input, const IPortableTensor *shape,
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/backend/ruy/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
2 changes: 2 additions & 0 deletions runtime/onert/core/include/backend/BackendContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ struct ContextData
std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
/* Is linear executor or not */
bool is_linear_executor;
/* Map of operands which share memory where the values are sources of memory */
ir::OperandIndexMap<ir::OperandIndex> shared_memory_operand_map;
};

class BackendContext
Expand Down
29 changes: 22 additions & 7 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,31 +210,46 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex

inline void initConsts(const ir::Operands &operands,
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry)
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
const bool has_const_shared_memory =
shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
operands.at(shared_memory_operands_map.at(ind)).isConstant();
const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
if (external_operands.contains(ind) || !can_be_initialized_as_const)
return;

auto tensor = tensor_registry->getNativeITensor(ind);
assert(tensor != nullptr);

VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;

auto data = operand.shareData();
assert(data && data->base());
ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);

if (ext_tensor == nullptr)
throw std::runtime_error{"This tensor is not external tensor"};

ext_tensor->setData(data);
if (has_const_shared_memory)
{
const auto &memory_source_operand = operands.at(shared_memory_operands_map.at(ind));
auto memory_source_data = memory_source_operand.shareData();
assert(memory_source_data && memory_source_data->base());
ext_tensor->setData(memory_source_data);
}
else
{
auto data = operand.shareData();
assert(data && data->base());
ext_tensor->setData(data);
}
});
}

inline void initConsts(BackendContext &ctx)
{
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(),
ctx.data().shared_memory_operand_map);
}

} // namespace basic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ class StaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
virtual ~StaticTensorManager() = default;

void allocateNonconsts(void);
Expand All @@ -57,6 +59,8 @@ class StaticTensorManager
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
ir::OperandIndexMap<uint32_t> _source_operands_ref_counter;
};

} // namespace basic
Expand Down
6 changes: 4 additions & 2 deletions runtime/onert/core/include/backend/basic/TensorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ namespace basic
class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);

/**
* @brief Register tensor information to allocate on CPU backend
Expand Down
107 changes: 85 additions & 22 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <algorithm>

#include "backend/basic/StaticTensorManager.h"

#include "backend/basic/DynamicTensorManager.h"
Expand All @@ -27,19 +29,23 @@ namespace backend
namespace basic
{

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _nonconst_mgr{new MemoryManager()}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_operands_with_shared_memory{operands_with_shared_memory}
{
// DO NOTHING
}

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_operands_with_shared_memory{operands_with_shared_memory}
{
// DO NOTHING
}
Expand All @@ -50,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)

for (auto &&[ind, tensor] : _tensors->native_tensors())
{
if (!_as_constants[ind] && !tensor->is_dynamic())
bool buffer_set = false;
if (!tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);

VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
if (_operands_with_shared_memory.find(ind) != std::end(_operands_with_shared_memory))
{
const auto &shared_memory_ind = _operands_with_shared_memory[ind];
if (!_as_constants[shared_memory_ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
buffer_set = true;
}
}
else if (!_as_constants[ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
buffer_set = true;
}
if (buffer_set)
{
VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
}
}
}
}
Expand All @@ -67,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info, bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
std::unique_ptr<Tensor> tensor = nullptr;
if (as_const)
{
auto tensor = std::make_unique<ExternalTensor>(tensor_info);
_tensors->setNativeTensor(ind, std::move(tensor));
tensor = std::make_unique<ExternalTensor>(tensor_info);
}
else
{
auto tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
_tensors->setNativeTensor(ind, std::move(tensor));
const auto source_operand = _operands_with_shared_memory.find(ind);
if (source_operand != std::end(_operands_with_shared_memory) &&
_as_constants[source_operand->second])
{
as_const = _as_constants[source_operand->second];
auto new_tensor_info = tensor_info;
new_tensor_info.setAsConstant();
tensor = std::make_unique<ExternalTensor>(new_tensor_info);
}
else
{
tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
}
}
assert(tensor);
_tensors->setNativeTensor(ind, std::move(tensor));
_as_constants[ind] = as_const;
}

Expand All @@ -88,8 +122,22 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->claimPlan(ind, size);
const auto source_ind = _operands_with_shared_memory.find(ind);
if (source_ind == std::end(_operands_with_shared_memory))
{
if (!_as_constants[ind])
{
_nonconst_mgr->claimPlan(ind, size);
++_source_operands_ref_counter[ind];
}
}
else
{
if (!_as_constants[source_ind->second])
{
++_source_operands_ref_counter[source_ind->second];
}
}
}

void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
Expand All @@ -99,8 +147,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->releasePlan(ind);
const auto source_operand_ind =
std::find_if(std::begin(_operands_with_shared_memory), std::end(_operands_with_shared_memory),
[&ind](const auto &op) { return op.second == ind; });

ir::OperandIndex release_ind;
if (source_operand_ind == std::end(_operands_with_shared_memory))
{
release_ind = ind;
}
else
{
release_ind = source_operand_ind->second;
}
if (!_as_constants[release_ind] && 0 == _source_operands_ref_counter[release_ind])
{
_nonconst_mgr->releasePlan(release_ind);
}
}

void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
Expand Down
15 changes: 10 additions & 5 deletions runtime/onert/core/src/backend/basic/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,22 @@ namespace backend
namespace basic
{

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{
new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(), operands_with_shared_memory)}
{
/* empty */
}

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::string planner_id)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(),
operands_with_shared_memory)}
{
/* empty */
}
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/core/src/backend/builtin/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Backend : public ::onert::backend::Backend, public backend::train::ITraina
// TODO Remove TensorBuilder and ConstantInitializer
// TODO Support Consecutive controflow operation's intermediate tensor
auto tr = std::make_shared<TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(
Expand Down
8 changes: 5 additions & 3 deletions runtime/onert/core/src/backend/builtin/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ namespace backend
namespace builtin
{

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
_static_tensor_mgr{
new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new basic::StaticTensorManager(
_tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), operands_with_shared_memory)}
{
/* empty */
}
Expand Down
3 changes: 2 additions & 1 deletion runtime/onert/core/src/backend/builtin/TensorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ namespace builtin
class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);

/**
* @brief Register tensor information to allocate on CPU backend
Expand Down
Loading

0 comments on commit c8d8a75

Please sign in to comment.