Skip to content

Commit

Permalink
[onert] Propagate shared memory operand indexes to cpu backend (#14230)
Browse files Browse the repository at this point in the history
This commit adds propagation of shared memory operand indexes to cpu backend.
Note that the propagated indexes map is not filled yet.

ONE-DCO-1.0-Signed-off-by: Mateusz Bencer [email protected]
  • Loading branch information
mbencer authored Nov 29, 2024
1 parent 4f30a00 commit a5e770a
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 40 deletions.
3 changes: 2 additions & 1 deletion runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
// TODO: Use findSharedMemoryOperandIndexes method here
auto tb = std::make_shared<TensorBuilder>(tr, ir::OperandIndexMap<ir::OperandIndex>{});
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/backend/cpu/BackendContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ namespace backend
namespace cpu
{

ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
ITensorRegistry *BackendContext::genTensors()
{
return basic::genTensors(tensor_builder, *graph(), external_operands(), tensor_registry,
data().op_order, tensor_builder->getSharedMemoryOperandIndexes());
}

FunctionMap BackendContext::genKernels()
{
Expand All @@ -43,7 +47,8 @@ FunctionMap BackendContext::genKernels()
ret.emplace(op_ind, std::move(fn_seq));
}

basic::initConsts(*this);
basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
tensor_builder->getSharedMemoryOperandIndexes());

// NOTE For memory optimization, we want to free some operand data
const_cast<ir::Graph &>(*_data.graph)
Expand Down
47 changes: 28 additions & 19 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,18 @@ namespace basic
{

// TODO Remove the template param BackendContext once unification of cpu backend context is done
template <typename T_BackendContext> void planTensors(const T_BackendContext &ctx)
template <typename T_TensorBuilder>
void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::vector<onert::ir::OperationIndex> &op_order)
{
const ir::Graph &graph = *ctx.graph();
const auto &order = ctx.data().op_order;
auto tensor_builder = ctx.tensor_builder;

ir::OperandIndexMap<uint32_t> uses_map;
ir::OperandIndexMap<uint32_t> def_map;
ir::OperandIndexSequence constants;

// Prepare scanning
graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
return;

// TODO Check if we need to handle unused tensors
Expand Down Expand Up @@ -95,7 +94,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan DEF of inputs. If variable tensor, allocate it
// 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
for (const auto &op_ind : order)
for (const auto &op_ind : op_order)
{
const auto &op = graph.operations().at(op_ind);
auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
Expand All @@ -104,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// Define outputs
for (const auto &ind : op_outputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand All @@ -121,7 +120,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// non-constant because of less memory usage by memory planning in here
for (const auto &ind : op_inputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand All @@ -138,7 +137,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct

for (const auto &ind : op_inputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand Down Expand Up @@ -177,21 +176,24 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
[](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}

template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
template <typename T_TensorBuilder>
ITensorRegistry *
genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::shared_ptr<ITensorRegistry> &tensor_registry,
const std::vector<onert::ir::OperationIndex> &op_order,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
{
const ir::Graph &graph = *ctx.graph();
auto tensor_builder = ctx.tensor_builder;

graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
return;
tensor_builder->registerTensorInfo(ind, obj.info());
});

// TODO Get compiler options from compiler, and use it rather than getting it from Env
if (util::getConfigString(util::config::EXECUTOR) == "Linear")
{
basic::planTensors(ctx);
basic::planTensors(tensor_builder, graph, external_operands, op_order);
}
else
{
Expand All @@ -205,12 +207,19 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex

tensor_builder->allocate();

return ctx.tensor_registry.get();
return tensor_registry.get();
}

template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
{
return genTensors(ctx.tensor_builder, *ctx.graph(), ctx.external_operands(), ctx.tensor_registry,
ctx.data().op_order, {});
}

inline void initConsts(const ir::Operands &operands,
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry)
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
Expand All @@ -234,7 +243,7 @@ inline void initConsts(const ir::Operands &operands,

inline void initConsts(BackendContext &ctx)
{
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(), {});
}

} // namespace basic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ class StaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
virtual ~StaticTensorManager() = default;

void allocateNonconsts(void);
Expand All @@ -57,6 +59,7 @@ class StaticTensorManager
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
};

} // namespace basic
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/core/include/backend/basic/TensorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ namespace basic
class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});

/**
* @brief Register tensor information to allocate on CPU backend
Expand All @@ -54,13 +56,16 @@ class TensorBuilder

void allocate(void);

const ir::OperandIndexMap<ir::OperandIndex> &getSharedMemoryOperandIndexes() const;

DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }

private:
const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
};

} // namespace basic
Expand Down
18 changes: 11 additions & 7 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,23 @@ namespace backend
namespace basic
{

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _nonconst_mgr{new MemoryManager()}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
// DO NOTHING
}

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
// DO NOTHING
}
Expand Down
21 changes: 16 additions & 5 deletions runtime/onert/core/src/backend/basic/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,23 @@ namespace backend
namespace basic
{

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(),
shared_memory_operand_indexes)},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
/* empty */
}

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::string planner_id)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(),
shared_memory_operand_indexes)}
{
/* empty */
}
Expand Down Expand Up @@ -83,6 +89,11 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const

void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }

const ir::OperandIndexMap<ir::OperandIndex> &TensorBuilder::getSharedMemoryOperandIndexes() const
{
return _shared_memory_operand_indexes;
}

} // namespace basic
} // namespace backend
} // namespace onert
4 changes: 2 additions & 2 deletions runtime/onert/core/src/backend/builtin/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ namespace builtin

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
_static_tensor_mgr{
new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new basic::StaticTensorManager(
_tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), ir::OperandIndexMap<ir::OperandIndex>{})}
{
/* empty */
}
Expand Down

0 comments on commit a5e770a

Please sign in to comment.