From b89d3e304d7b4e1119004db0adfd00d4d520d973 Mon Sep 17 00:00:00 2001
From: sseung <zetwhite@naver.com>
Date: Thu, 10 Oct 2024 16:18:09 +0900
Subject: [PATCH 1/4] add ops

---
 .../train/ops/BinaryArithmeticLayer.cc        | 15 +++++++--
 .../backend/train/ops/BinaryArithmeticLayer.h |  3 +-
 .../backend/train/ops/ConvolutionLayer.cc     | 33 ++++++++++---------
 .../backend/train/ops/ConvolutionLayer.h      |  9 ++---
 .../backend/train/ops/FullyConnectedLayer.cc  | 23 ++++++++-----
 .../backend/train/ops/FullyConnectedLayer.h   | 10 +++---
 runtime/onert/backend/train/ops/PoolLayer.cc  | 33 ++++++++++++++-----
 runtime/onert/backend/train/ops/PoolLayer.h   |  3 ++
 8 files changed, 84 insertions(+), 45 deletions(-)
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
index 3c4ce2f7ce1..a1a19fe56fb 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
@@ -55,11 +55,22 @@ void BinaryArithmeticLayer::configureBackward(IPortableTensor *back_prop_lhs,
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
-    _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+    _act_back_prop_output = std::make_shared<ExtraTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<ExtraTensors> BinaryArithmeticLayer::registerExtraTensors()
+{
+  ExtraTensors tensors;
+
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return std::optional<ExtraTensors>(tensors);
+}
+
 void BinaryArithmeticLayer::forward(bool) { cpu::ops::BinaryArithmeticLayer::run(); }
 
 void BinaryArithmeticLayer::backward()
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
index 60d6e8be1cc..598cd3327ca 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
@@ -50,6 +50,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(IPortableTensor *back_prop_lhs, IPortableTensor *back_prop_rhs,
                          const IPortableTensor *back_prop_output, const ir::Activation activation,
                          const ArithmeticType arithmetic_type);
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -60,7 +61,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
 
   ArithmeticType _arithmetic_type;
   ir::Activation _activation;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
index 41ff7fd1c43..28c66c13737 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
@@ -31,7 +31,7 @@ namespace
 using namespace onert;
 
 template <typename Tensor>
-std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *origin_weights)
+std::shared_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *origin_weights)
 {
   const auto &origin_shape = origin_weights->getShape();
   assert(origin_shape.rank() == 4);
@@ -42,7 +42,7 @@ std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *
     ir::Shape{origin_shape.dim(1), origin_shape.dim(2), origin_shape.dim(3), origin_shape.dim(0)};
   transposed_info.shape(transposed_shape);
 
-  return std::make_unique<Tensor>(transposed_info);
+  return std::make_shared<Tensor>(transposed_info);
 }
 
 } // namespace
@@ -79,27 +79,30 @@ void ConvolutionLayer::configureBackward(const IPortableTensor *weights,
   if (_dilationHeightFactor != 1 || _dilationWidthFactor != 1)
     throw std::runtime_error("train ConvolutionLayer: Unsupported dilation yet");
 
-  // TODO Optimize transposed tensors
-  _transposed_weights = createTransposedWeights<Tensor>(weights);
-  _transposed_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_weights->total_size()));
+  _transposed_weights = createTransposedWeights<ExtraTensor>(weights);
 
-  _conv_back_prop_output = std::make_unique<BackPropTensor>(back_prop_output->get_info());
-  _conv_back_prop_output->setBuffer(
-    std::make_shared<basic::Allocator>(_conv_back_prop_output->total_size()));
+  _conv_back_prop_output = std::make_shared<ExtraTensor>(back_prop_output->get_info());
 
-  _transposed_grad_weights = createTransposedWeights<GradientTensor>(weights);
-  _transposed_grad_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_grad_weights->total_size()));
+  _transposed_grad_weights = createTransposedWeights<ExtraTensor>(weights);
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<BackPropTensor>(_back_prop_output->get_info());
-    _act_back_prop_output->setBuffer(
-      std::make_shared<basic::Allocator>(_act_back_prop_output->total_size()));
+    _act_back_prop_output = std::make_unique<ExtraTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<ExtraTensors> ConvolutionLayer::registerExtraTensors()
+{
+  ExtraTensors tensors = {_transposed_weights, _conv_back_prop_output, _transposed_grad_weights};
+
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return std::optional<ExtraTensors>(tensors);
+}
+
 void ConvolutionLayer::forward(bool) { cpu::ops::ConvolutionLayer::run(); }
 void ConvolutionLayer::backward()
 {
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.h b/runtime/onert/backend/train/ops/ConvolutionLayer.h
index ef11f68bf57..6df64a26b41 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.h
@@ -41,6 +41,7 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(const IPortableTensor *weights, IPortableTensor *back_prop_input,
                          IPortableTensor *grad_weights, IPortableTensor *grad_bias,
                          const IPortableTensor *back_prop_output, const ir::Activation activation);
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -54,10 +55,10 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   const IPortableTensor *_back_prop_output;
 
   // TODO Consider if these tensors should be built in TensorBuilder
-  std::unique_ptr<Tensor> _transposed_weights;
-  std::unique_ptr<BackPropTensor> _conv_back_prop_output;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
-  std::unique_ptr<GradientTensor> _transposed_grad_weights;
+  std::shared_ptr<ExtraTensor> _transposed_weights;
+  std::shared_ptr<ExtraTensor> _conv_back_prop_output;
+  std::shared_ptr<ExtraTensor> _transposed_grad_weights;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
index 9d35655b26f..1e57f795228 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
@@ -28,7 +28,7 @@ namespace
 
 using namespace onert;
 
-std::unique_ptr<backend::train::Tensor>
+std::shared_ptr<backend::train::ExtraTensor>
 createTransposedTensor(const backend::IPortableTensor *origin_tensor)
 {
   const auto &origin_shape = origin_tensor->getShape();
@@ -38,7 +38,7 @@ createTransposedTensor(const backend::IPortableTensor *origin_tensor)
   auto transposed_shape = ir::Shape{origin_shape.dim(1), origin_shape.dim(0)};
   transposed_info.shape(transposed_shape);
 
-  return std::make_unique<backend::train::Tensor>(transposed_info);
+  return std::make_shared<backend::train::ExtraTensor>(transposed_info);
 }
 
 } // namespace
@@ -86,23 +86,28 @@ void FullyConnectedLayer::configureBackward(
       "train FullyConnectedLayer: Input other ranks than 2 are not supported."};
 
   _transposed_weights = createTransposedTensor(weights);
-  _transposed_weights->setBuffer(std::make_shared<basic::Allocator>(weights->total_size()));
 
   _transposed_input = createTransposedTensor(input);
-  _transposed_input->setBuffer(std::make_shared<basic::Allocator>(input->total_size()));
 
   _transposed_back_prop_output = createTransposedTensor(back_prop_output);
-  _transposed_back_prop_output->setBuffer(
-    std::make_shared<basic::Allocator>(back_prop_output->total_size()));
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<Tensor>(_back_prop_output->get_info());
-    _act_back_prop_output->setBuffer(
-      std::make_shared<basic::Allocator>(_back_prop_output->total_size()));
+    _act_back_prop_output = std::make_shared<ExtraTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<ExtraTensors> FullyConnectedLayer::registerExtraTensors()
+{
+  ExtraTensors tensors = {_transposed_weights, _transposed_input, _transposed_back_prop_output};
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return tensors;
+}
+
 void FullyConnectedLayer::forward(bool) { cpu::ops::FullyConnectedLayer::run(); }
 
 void FullyConnectedLayer::backward()
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.h b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
index 190bfbffe42..5a691cb176b 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
@@ -46,6 +46,7 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
                          const IPortableTensor *back_prop_output, ir::Activation activation,
                          ir::FullyConnectedWeightsFormat weights_format);
 
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -58,11 +59,10 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
   IPortableTensor *_back_prop_input;
   const IPortableTensor *_back_prop_output;
 
-  // TODO Optimize memory
-  std::unique_ptr<Tensor> _transposed_weights;
-  std::unique_ptr<Tensor> _transposed_input;
-  std::unique_ptr<Tensor> _transposed_back_prop_output;
-  std::unique_ptr<Tensor> _act_back_prop_output;
+  std::shared_ptr<ExtraTensor> _transposed_weights;
+  std::shared_ptr<ExtraTensor> _transposed_input;
+  std::shared_ptr<ExtraTensor> _transposed_back_prop_output;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
index 098389d8f10..6196549ce94 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.cc
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -43,8 +43,8 @@ class MaxPool2D final : public TrainingKernelRegistry
   const IPortableTensor *_output;
   nnfw::cker::PoolParams _op_params;
 
-  std::unique_ptr<Tensor> _act_back_prop_output;
-  std::unique_ptr<Tensor> _arg_max_index;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
+  std::shared_ptr<ExtraTensor> _arg_max_index;
 
 public:
   MaxPool2D(const uint32_t paddingLeft, const uint32_t, const uint32_t paddingTop, const uint32_t,
@@ -66,20 +66,30 @@ class MaxPool2D final : public TrainingKernelRegistry
                                       &_op_params.float_activation_max);
     }
 
-    _arg_max_index = std::make_unique<Tensor>(_output->get_info());
-    _arg_max_index->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+    _arg_max_index = std::make_shared<ExtraTensor>(_output->get_info());
 
     if (activation != ir::Activation::NONE)
     {
-      _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
-      _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+      _act_back_prop_output = std::make_shared<ExtraTensor>(_output->get_info());
     }
   };
 
   ~MaxPool2D() {}
 
 public:
-  void forward(const IPortableTensor *in, IPortableTensor *out)
+  std::optional<ExtraTensors> registerExtraTensors() override
+  {
+    ExtraTensors tensors = {_arg_max_index};
+    if (_act_back_prop_output != nullptr)
+    {
+      tensors.push_back(_act_back_prop_output);
+    }
+
+    return std::optional<ExtraTensors>(tensors);
+  }
+
+public:
+  void forward(const IPortableTensor *in, IPortableTensor *out) override
   {
     auto out_shape = getShape(out);
     auto out_data = getBuffer<float>(out);
@@ -90,7 +100,7 @@ class MaxPool2D final : public TrainingKernelRegistry
                                  out_data, getBuffer<int>(arg_max_index));
   }
 
-  void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in)
+  void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) override
   {
     // activation backward
     try
@@ -110,7 +120,7 @@ class MaxPool2D final : public TrainingKernelRegistry
                                      getBuffer<int>(arg_max_index), getShape(back_prop_in),
                                      getBuffer<float>(back_prop_in));
   }
-};
+}; // namespace ops
 
 class AveragePool2D final : public TrainingKernelRegistry
 {
@@ -225,6 +235,11 @@ void PoolLayer::configureBackward(const uint32_t paddingLeft, const uint32_t pad
   }
 }
 
+std::optional<ExtraTensors> PoolLayer::registerExtraTensors()
+{
+  return _kernel->registerExtraTensors();
+}
+
 void PoolLayer::forward(bool training)
 {
   if (training)
diff --git a/runtime/onert/backend/train/ops/PoolLayer.h b/runtime/onert/backend/train/ops/PoolLayer.h
index 2b0c9e2a00b..60cea49111b 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.h
+++ b/runtime/onert/backend/train/ops/PoolLayer.h
@@ -38,6 +38,8 @@ class TrainingKernelRegistry
 public:
   virtual void forward(const IPortableTensor *in, IPortableTensor *out) = 0;
   virtual void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) = 0;
+  virtual std::optional<ExtraTensors> registerExtraTensors() = 0;
+
   TrainingKernelRegistry() = default;
   virtual ~TrainingKernelRegistry() = default;
 };
@@ -62,6 +64,7 @@ class PoolLayer : public ::onert::exec::train::ITrainableFunction, public cpu::o
                          IPortableTensor *output, IPortableTensor *back_prop_input,
                          const IPortableTensor *back_prop_output);
 
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 

From 5412be57d91feeefd8084bca5c21804a3a224621 Mon Sep 17 00:00:00 2001
From: sseung <zetwhite@naver.com>
Date: Thu, 10 Oct 2024 16:32:13 +0900
Subject: [PATCH 2/4] update backend

---
 runtime/onert/backend/train/BackendContext.cc | 51 ++++++++++++++++++
 runtime/onert/backend/train/BackendContext.h  |  1 +
 runtime/onert/backend/train/TensorBuilder.cc  | 54 +++++++++++++++++++
 runtime/onert/backend/train/TensorBuilder.h   | 14 +++++
 runtime/onert/backend/train/TensorPlanner.cc  | 42 +++++++++++++++
 runtime/onert/backend/train/TensorPlanner.h   |  1 +
 6 files changed, 163 insertions(+)

diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc
index 59fee712247..28c2fe118b2 100644
--- a/runtime/onert/backend/train/BackendContext.cc
+++ b/runtime/onert/backend/train/BackendContext.cc
@@ -179,9 +179,60 @@ FunctionMap BackendContext::gen()
   //   fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
   // }
 
+  planLayerScopeTensors(fn_map);
+  _tensor_builder->allocateLayerScope();
+
   return fn_map;
 }
 
+void BackendContext::planLayerScopeTensors(const FunctionMap &fn_map)
+{
+
+  const auto &ops = trainable_graph()->operations();
+
+  auto register_tensors = [this, &ops](ir::OperationIndex op_idx,
+                                       std::optional<LayerScopeTensors> &&tensors) {
+    if (not tensors.has_value())
+      return;
+
+    auto ls_tensors = tensors.value();
+    for (auto i = 0u; i < ls_tensors.size(); ++i)
+    {
+      LayerScopeTensorIndex tensor_idx(op_idx, i);
+      _tensor_builder->registerLayerScopeTensor(tensor_idx, ls_tensors[i]);
+
+      std::stringstream info;
+      info << op_idx << "_" << ops.at(op_idx).name();
+      VERBOSE() << "register (idx:" << tensor_idx << ") requested from " << info.str() << std::endl;
+    }
+    return;
+  };
+
+  for (auto &pair : fn_map)
+  {
+    auto &op_idx = pair.first;
+    auto &fn_seq = pair.second;
+
+    const ir::IOperation *op = &ops.at(op_idx);
+    const auto trainable_op = dynamic_cast<const ir::train::TrainableOperation *>(op);
+    assert(trainable_op != nullptr);
+
+    if (not trainable_op->isRequiredForBackward())
+      continue;
+
+    VERBOSE(ExtraTensor) << "register tensor for " << trainable_op->name() << std::endl;
+
+    fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
+      register_tensors(op_idx, (&fn)->registerLayerScopeTensors());
+    });
+  }
+
+  const auto ctx_data = data();
+  TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
+  tensor_planner.planLayerScopeTensors(_tensor_builder.get());
+  return;
+}
+
 void BackendContext::planForwardTensors()
 {
   const auto &tgraph = *trainable_graph();
diff --git a/runtime/onert/backend/train/BackendContext.h b/runtime/onert/backend/train/BackendContext.h
index 8e343aee403..7017d7e3175 100644
--- a/runtime/onert/backend/train/BackendContext.h
+++ b/runtime/onert/backend/train/BackendContext.h
@@ -73,6 +73,7 @@ class BackendContext : public onert::backend::train::TrainableBackendContext
 private:
   void planForwardTensors();
   void planBackwardTensors();
+  void planLayerScopeTensors(const FunctionMap &fn_map);
 
 public:
   std::shared_ptr<ExternalContext> external_context() { return _external_context; }
diff --git a/runtime/onert/backend/train/TensorBuilder.cc b/runtime/onert/backend/train/TensorBuilder.cc
index ee737222be2..9a278738c23 100644
--- a/runtime/onert/backend/train/TensorBuilder.cc
+++ b/runtime/onert/backend/train/TensorBuilder.cc
@@ -95,6 +95,25 @@ void TensorBuilder::registerDisposableBackwardTensorInfo(const DisposableTensorI
   _disposable_backprops.add(index);
 }
 
+void TensorBuilder::registerLayerScopeTensor(const LayerScopeTensorIndex &index,
+                                             std::shared_ptr<LayerScopeTensor> &tensor)
+{
+  auto pair = _layerscope_map.find(index.op_index());
+  if (pair == _layerscope_map.end())
+  {
+    util::Set<LayerScopeTensorIndex> indices;
+    indices.add(index);
+    _layerscope_map[index.op_index()] = indices;
+  }
+  else
+  {
+    assert(!pair->second.contains(index));
+    pair->second.add(index);
+  }
+
+  _tensor_reg->setLayerScopeTensor(index, tensor);
+}
+
 void TensorBuilder::notifyFirstUse(const ir::OperandIndex &index)
 {
   // TODO Support momory plan
@@ -155,6 +174,16 @@ void TensorBuilder::notifyDisposableBackPropLastUse(const DisposableTensorIndex
   _tensor_mgr->releaseDisposableBackPropPlan(index);
 }
 
+void TensorBuilder::notifyLayerScopeFirstUse(const LayerScopeTensorIndex &index)
+{
+  _tensor_mgr->claimLayerScopePlan(index);
+}
+
+void TensorBuilder::notifyLayerScopeLastUse(const LayerScopeTensorIndex &index)
+{
+  _tensor_mgr->releaseLayerScopePlan(index);
+}
+
 bool TensorBuilder::isRegistered(const ir::OperandIndex &index) const
 {
   return _tensor_info_map.find(index) != _tensor_info_map.end();
@@ -170,6 +199,29 @@ bool TensorBuilder::isRegisteredDisposableBackwardTensor(const DisposableTensorI
   return _disposable_backprops.contains(index);
 }
 
+bool TensorBuilder::isRegisteredLayerScopeTensor(const ir::OperationIndex &index) const
+{
+  const auto pair = _layerscope_map.find(index);
+  return (pair != _layerscope_map.end());
+}
+
+util::Set<LayerScopeTensorIndex>
+TensorBuilder::getRegisteredLayerScopeTensorIndex(const ir::OperationIndex &index) const
+{
+  const auto pair = _layerscope_map.find(index);
+  assert(pair != _layerscope_map.end());
+
+  return pair->second;
+}
+
+LayerScopeTensorLifeTime
+TensorBuilder::getLayerScopeTensorLifeTime(const LayerScopeTensorIndex &index) const
+{
+  const auto &ls_tensors = _tensor_reg->layerscope_tensors();
+  const auto &tensor = ls_tensors.at(index);
+  return tensor->lifetime();
+}
+
 void TensorBuilder::allocate(void)
 {
   _tensor_mgr->allocateNonConstTensors();
@@ -183,6 +235,8 @@ void TensorBuilder::allocateBackward(void)
   _tensor_mgr->allocateDisposableBackPropTensors();
 }
 
+void TensorBuilder::allocateLayerScope(void) { _tensor_mgr->allocateLayerScopeTensors(); }
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorBuilder.h b/runtime/onert/backend/train/TensorBuilder.h
index 1fa46855142..3b0bd7ae688 100644
--- a/runtime/onert/backend/train/TensorBuilder.h
+++ b/runtime/onert/backend/train/TensorBuilder.h
@@ -18,10 +18,12 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__
 
 #include "DisposableTensorIndex.h"
+#include "LayerScopeTensorIndex.h"
 #include "TensorManager.h"
 #include "TensorRegistry.h"
 #include "util/Set.h"
 
+#include <ir/OperationIndexMap.h>
 #include <exec/train/optimizer/Optimizer.h>
 
 namespace onert
@@ -55,6 +57,9 @@ class TensorBuilder
   void registerDisposableBackwardTensorInfo(const DisposableTensorIndex &index,
                                             const ir::OperandInfo &info);
 
+  void registerLayerScopeTensor(const LayerScopeTensorIndex &index,
+                                std::shared_ptr<LayerScopeTensor> &info);
+
   // TODO Support memory plan of all tensors
   void notifyFirstUse(const ir::OperandIndex &);
   void notifyLastUse(const ir::OperandIndex &);
@@ -62,13 +67,21 @@ class TensorBuilder
   void notifyBackwardLastUse(const ir::OperandIndex &);
   void notifyDisposableBackPropFirstUse(const DisposableTensorIndex &);
   void notifyDisposableBackPropLastUse(const DisposableTensorIndex &);
+  void notifyLayerScopeFirstUse(const LayerScopeTensorIndex &);
+  void notifyLayerScopeLastUse(const LayerScopeTensorIndex &);
 
   bool isRegistered(const ir::OperandIndex &) const;
   bool isRegisteredBackward(const ir::OperandIndex &) const;
   bool isRegisteredDisposableBackwardTensor(const DisposableTensorIndex &index) const;
+  bool isRegisteredLayerScopeTensor(const ir::OperationIndex &) const;
+
+  util::Set<LayerScopeTensorIndex>
+  getRegisteredLayerScopeTensorIndex(const ir::OperationIndex &) const;
+  LayerScopeTensorLifeTime getLayerScopeTensorLifeTime(const LayerScopeTensorIndex &) const;
 
   void allocate(void);
   void allocateBackward(void);
+  void allocateLayerScope(void); // <- this have to called after
 
 private:
   const std::shared_ptr<TensorRegistry> _tensor_reg;
@@ -77,6 +90,7 @@ class TensorBuilder
   ir::OperandIndexMap<ir::OperandInfo> _backward_tensor_info_map;
   ir::OperandIndexMap<bool> _as_constants;
   util::Set<DisposableTensorIndex> _disposable_backprops;
+  ir::OperationIndexMap<util::Set<LayerScopeTensorIndex>> _layerscope_map;
   const exec::train::optimizer::Optimizer *_optimizer;
 };
 
diff --git a/runtime/onert/backend/train/TensorPlanner.cc b/runtime/onert/backend/train/TensorPlanner.cc
index 724eab7d171..33d4048dce4 100644
--- a/runtime/onert/backend/train/TensorPlanner.cc
+++ b/runtime/onert/backend/train/TensorPlanner.cc
@@ -519,6 +519,48 @@ ir::OperandIndexSequence TensorPlanner::getOutgoingBackPropSeq(const ir::Operati
   return ret;
 }
 
+void TensorPlanner::planLayerScopeTensors(TensorBuilder *tensor_builder)
+{
+  // forwading order
+  const auto f_order = _tgraph.topolSortOperations();
+  for (const auto &op_index : f_order)
+  {
+    if (not tensor_builder->isRegisteredLayerScopeTensor(op_index))
+      continue;
+
+    auto indices = tensor_builder->getRegisteredLayerScopeTensorIndex(op_index);
+    for (const auto &idx : indices)
+    {
+      const auto lt = tensor_builder->getLayerScopeTensorLifeTime(idx);
+      if (lt == LayerScopeTensorLifeTime::FORWARD_TO_BACKWARD)
+        tensor_builder->notifyLayerScopeFirstUse(idx);
+    }
+  }
+
+  // backwarding order
+  const auto b_order = _tgraph.essentialBackwardOrder();
+  for (const auto &op_index : b_order)
+  {
+    if (not tensor_builder->isRegisteredLayerScopeTensor(op_index))
+      continue;
+
+    auto indices = tensor_builder->getRegisteredLayerScopeTensorIndex(op_index);
+    for (const auto &idx : indices)
+    {
+      const auto lt = tensor_builder->getLayerScopeTensorLifeTime(idx);
+      if (lt == LayerScopeTensorLifeTime::BACKWARD)
+        tensor_builder->notifyLayerScopeFirstUse(idx);
+    }
+    for (const auto &idx : indices)
+    {
+      const auto lt = tensor_builder->getLayerScopeTensorLifeTime(idx);
+      if (lt == LayerScopeTensorLifeTime::FORWARD_TO_BACKWARD ||
+          lt == LayerScopeTensorLifeTime::BACKWARD)
+        tensor_builder->notifyLayerScopeLastUse(idx);
+    }
+  }
+}
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorPlanner.h b/runtime/onert/backend/train/TensorPlanner.h
index 61af802fda9..5bdb0d70803 100644
--- a/runtime/onert/backend/train/TensorPlanner.h
+++ b/runtime/onert/backend/train/TensorPlanner.h
@@ -45,6 +45,7 @@ class TensorPlanner
   void planBackPropTensors(TensorBuilder *tensor_builder);
   void planGradientTensors(TensorBuilder *tensor_builder);
   void planDisposableBackPropTensors(TensorBuilder *tensor_builder);
+  void planLayerScopeTensors(TensorBuilder *tensor_builder);
 
 private:
   ir::OperandIndexSequence getOutgoingBackPropSeq(const ir::OperationIndex &op_index,

From 1f26bc4d1e84f539ff5c4db1b3b09971bd36d8da Mon Sep 17 00:00:00 2001
From: sseung <zetwhite@naver.com>
Date: Thu, 10 Oct 2024 16:34:54 +0900
Subject: [PATCH 3/4] rename extra to layerscope

---
 runtime/onert/backend/train/BackendContext.cc |  2 +-
 runtime/onert/backend/train/TensorBuilder.cc  | 22 +++++++++--------
 runtime/onert/backend/train/TensorBuilder.h   |  4 ++--
 .../train/ops/BinaryArithmeticLayer.cc        |  8 +++----
 .../backend/train/ops/BinaryArithmeticLayer.h |  4 ++--
 .../backend/train/ops/ConvolutionLayer.cc     | 15 ++++++------
 .../backend/train/ops/ConvolutionLayer.h      | 10 ++++----
 .../backend/train/ops/FullyConnectedLayer.cc  | 11 +++++----
 .../backend/train/ops/FullyConnectedLayer.h   | 10 ++++----
 runtime/onert/backend/train/ops/PoolLayer.cc  | 24 ++++++++++++-------
 runtime/onert/backend/train/ops/PoolLayer.h   |  4 ++--
 11 files changed, 62 insertions(+), 52 deletions(-)

diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc
index 28c2fe118b2..cba4eac8c31 100644
--- a/runtime/onert/backend/train/BackendContext.cc
+++ b/runtime/onert/backend/train/BackendContext.cc
@@ -220,7 +220,7 @@ void BackendContext::planLayerScopeTensors(const FunctionMap &fn_map)
     if (not trainable_op->isRequiredForBackward())
       continue;
 
-    VERBOSE(ExtraTensor) << "register tensor for " << trainable_op->name() << std::endl;
+    VERBOSE(LayerScopeTensor) << "register tensor for " << trainable_op->name() << std::endl;
 
     fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
       register_tensors(op_idx, (&fn)->registerLayerScopeTensors());
diff --git a/runtime/onert/backend/train/TensorBuilder.cc b/runtime/onert/backend/train/TensorBuilder.cc
index 9a278738c23..5b58ed7b593 100644
--- a/runtime/onert/backend/train/TensorBuilder.cc
+++ b/runtime/onert/backend/train/TensorBuilder.cc
@@ -98,12 +98,14 @@ void TensorBuilder::registerDisposableBackwardTensorInfo(const DisposableTensorI
 void TensorBuilder::registerLayerScopeTensor(const LayerScopeTensorIndex &index,
                                              std::shared_ptr<LayerScopeTensor> &tensor)
 {
-  auto pair = _layerscope_map.find(index.op_index());
-  if (pair == _layerscope_map.end())
+  const auto op_idx = index.op_index();
+
+  const auto pair = _operation_to_layerscope.find(op_idx);
+  if (pair == _operation_to_layerscope.end())
   {
-    util::Set<LayerScopeTensorIndex> indices;
-    indices.add(index);
-    _layerscope_map[index.op_index()] = indices;
+    util::Set<LayerScopeTensorIndex> tensor_indices;
+    tensor_indices.add(index);
+    _operation_to_layerscope[op_idx] = tensor_indices;
   }
   else
   {
@@ -201,15 +203,15 @@ bool TensorBuilder::isRegisteredDisposableBackwardTensor(const DisposableTensorI
 
 bool TensorBuilder::isRegisteredLayerScopeTensor(const ir::OperationIndex &index) const
 {
-  const auto pair = _layerscope_map.find(index);
-  return (pair != _layerscope_map.end());
+  const auto pair = _operation_to_layerscope.find(index);
+  return (pair != _operation_to_layerscope.end());
 }
 
-util::Set<LayerScopeTensorIndex>
+const util::Set<LayerScopeTensorIndex> &
 TensorBuilder::getRegisteredLayerScopeTensorIndex(const ir::OperationIndex &index) const
 {
-  const auto pair = _layerscope_map.find(index);
-  assert(pair != _layerscope_map.end());
+  const auto pair = _operation_to_layerscope.find(index);
+  assert(pair != _operation_to_layerscope.end());
 
   return pair->second;
 }
diff --git a/runtime/onert/backend/train/TensorBuilder.h b/runtime/onert/backend/train/TensorBuilder.h
index 3b0bd7ae688..c53861ec519 100644
--- a/runtime/onert/backend/train/TensorBuilder.h
+++ b/runtime/onert/backend/train/TensorBuilder.h
@@ -75,7 +75,7 @@ class TensorBuilder
   bool isRegisteredDisposableBackwardTensor(const DisposableTensorIndex &index) const;
   bool isRegisteredLayerScopeTensor(const ir::OperationIndex &) const;
 
-  util::Set<LayerScopeTensorIndex>
+  const util::Set<LayerScopeTensorIndex> &
   getRegisteredLayerScopeTensorIndex(const ir::OperationIndex &) const;
   LayerScopeTensorLifeTime getLayerScopeTensorLifeTime(const LayerScopeTensorIndex &) const;
 
@@ -90,7 +90,7 @@ class TensorBuilder
   ir::OperandIndexMap<ir::OperandInfo> _backward_tensor_info_map;
   ir::OperandIndexMap<bool> _as_constants;
   util::Set<DisposableTensorIndex> _disposable_backprops;
-  ir::OperationIndexMap<util::Set<LayerScopeTensorIndex>> _layerscope_map;
+  ir::OperationIndexMap<util::Set<LayerScopeTensorIndex>> _operation_to_layerscope;
   const exec::train::optimizer::Optimizer *_optimizer;
 };
 
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
index a1a19fe56fb..cd3dc2e7f52 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
@@ -55,20 +55,20 @@ void BinaryArithmeticLayer::configureBackward(IPortableTensor *back_prop_lhs,
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_shared<ExtraTensor>(_back_prop_output->get_info());
+    _act_back_prop_output = std::make_shared<LayerScopeTensor>(_back_prop_output->get_info());
   }
 }
 
-std::optional<ExtraTensors> BinaryArithmeticLayer::registerExtraTensors()
+std::optional<LayerScopeTensors> BinaryArithmeticLayer::registerLayerScopeTensors()
 {
-  ExtraTensors tensors;
+  LayerScopeTensors tensors;
 
   if (_act_back_prop_output != nullptr)
   {
     tensors.push_back(_act_back_prop_output);
   }
 
-  return std::optional<ExtraTensors>(tensors);
+  return std::optional<LayerScopeTensors>(tensors);
 }
 
 void BinaryArithmeticLayer::forward(bool) { cpu::ops::BinaryArithmeticLayer::run(); }
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
index 598cd3327ca..0a3e8ae58d6 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
@@ -50,7 +50,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(IPortableTensor *back_prop_lhs, IPortableTensor *back_prop_rhs,
                          const IPortableTensor *back_prop_output, const ir::Activation activation,
                          const ArithmeticType arithmetic_type);
-  std::optional<ExtraTensors> registerExtraTensors() override;
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -61,7 +61,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
 
   ArithmeticType _arithmetic_type;
   ir::Activation _activation;
-  std::shared_ptr<ExtraTensor> _act_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
index 28c66c13737..f53a9932a00 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
@@ -79,28 +79,29 @@ void ConvolutionLayer::configureBackward(const IPortableTensor *weights,
   if (_dilationHeightFactor != 1 || _dilationWidthFactor != 1)
     throw std::runtime_error("train ConvolutionLayer: Unsupported dilation yet");
 
-  _transposed_weights = createTransposedWeights<ExtraTensor>(weights);
+  _transposed_weights = createTransposedWeights<LayerScopeTensor>(weights);
 
-  _conv_back_prop_output = std::make_shared<ExtraTensor>(back_prop_output->get_info());
+  _conv_back_prop_output = std::make_shared<LayerScopeTensor>(back_prop_output->get_info());
 
-  _transposed_grad_weights = createTransposedWeights<ExtraTensor>(weights);
+  _transposed_grad_weights = createTransposedWeights<LayerScopeTensor>(weights);
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<ExtraTensor>(_back_prop_output->get_info());
+    _act_back_prop_output = std::make_unique<LayerScopeTensor>(_back_prop_output->get_info());
   }
 }
 
-std::optional<ExtraTensors> ConvolutionLayer::registerExtraTensors()
+std::optional<LayerScopeTensors> ConvolutionLayer::registerLayerScopeTensors()
 {
-  ExtraTensors tensors = {_transposed_weights, _conv_back_prop_output, _transposed_grad_weights};
+  LayerScopeTensors tensors = {_transposed_weights, _conv_back_prop_output,
+                               _transposed_grad_weights};
 
   if (_act_back_prop_output != nullptr)
   {
     tensors.push_back(_act_back_prop_output);
   }
 
-  return std::optional<ExtraTensors>(tensors);
+  return std::optional<LayerScopeTensors>(tensors);
 }
 
 void ConvolutionLayer::forward(bool) { cpu::ops::ConvolutionLayer::run(); }
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.h b/runtime/onert/backend/train/ops/ConvolutionLayer.h
index 6df64a26b41..1177fb26f1f 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.h
@@ -41,7 +41,7 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(const IPortableTensor *weights, IPortableTensor *back_prop_input,
                          IPortableTensor *grad_weights, IPortableTensor *grad_bias,
                          const IPortableTensor *back_prop_output, const ir::Activation activation);
-  std::optional<ExtraTensors> registerExtraTensors() override;
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -55,10 +55,10 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   const IPortableTensor *_back_prop_output;
 
   // TODO Consider if these tensors should be built in TensorBuilder
-  std::shared_ptr<ExtraTensor> _transposed_weights;
-  std::shared_ptr<ExtraTensor> _conv_back_prop_output;
-  std::shared_ptr<ExtraTensor> _transposed_grad_weights;
-  std::shared_ptr<ExtraTensor> _act_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _transposed_weights;
+  std::shared_ptr<LayerScopeTensor> _conv_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _transposed_grad_weights;
+  std::shared_ptr<LayerScopeTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
index 1e57f795228..cf1407923de 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
@@ -28,7 +28,7 @@ namespace
 
 using namespace onert;
 
-std::shared_ptr<backend::train::ExtraTensor>
+std::shared_ptr<backend::train::LayerScopeTensor>
 createTransposedTensor(const backend::IPortableTensor *origin_tensor)
 {
   const auto &origin_shape = origin_tensor->getShape();
@@ -38,7 +38,7 @@ createTransposedTensor(const backend::IPortableTensor *origin_tensor)
   auto transposed_shape = ir::Shape{origin_shape.dim(1), origin_shape.dim(0)};
   transposed_info.shape(transposed_shape);
 
-  return std::make_shared<backend::train::ExtraTensor>(transposed_info);
+  return std::make_shared<backend::train::LayerScopeTensor>(transposed_info);
 }
 
 } // namespace
@@ -93,13 +93,14 @@ void FullyConnectedLayer::configureBackward(
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_shared<ExtraTensor>(_back_prop_output->get_info());
+    _act_back_prop_output = std::make_shared<LayerScopeTensor>(_back_prop_output->get_info());
   }
 }
 
-std::optional<ExtraTensors> FullyConnectedLayer::registerExtraTensors()
+std::optional<LayerScopeTensors> FullyConnectedLayer::registerLayerScopeTensors()
 {
-  ExtraTensors tensors = {_transposed_weights, _transposed_input, _transposed_back_prop_output};
+  LayerScopeTensors tensors = {_transposed_weights, _transposed_input,
+                               _transposed_back_prop_output};
   if (_act_back_prop_output != nullptr)
   {
     tensors.push_back(_act_back_prop_output);
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.h b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
index 5a691cb176b..44fe5ab7c88 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
@@ -46,7 +46,7 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
                          const IPortableTensor *back_prop_output, ir::Activation activation,
                          ir::FullyConnectedWeightsFormat weights_format);
 
-  std::optional<ExtraTensors> registerExtraTensors() override;
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -59,10 +59,10 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
   IPortableTensor *_back_prop_input;
   const IPortableTensor *_back_prop_output;
 
-  std::shared_ptr<ExtraTensor> _transposed_weights;
-  std::shared_ptr<ExtraTensor> _transposed_input;
-  std::shared_ptr<ExtraTensor> _transposed_back_prop_output;
-  std::shared_ptr<ExtraTensor> _act_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _transposed_weights;
+  std::shared_ptr<LayerScopeTensor> _transposed_input;
+  std::shared_ptr<LayerScopeTensor> _transposed_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
index 6196549ce94..7604f80bffb 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.cc
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -24,6 +24,8 @@
 #include <cker/train/operation/MaxPool.h>
 #include <cker/train/operation/ReLU.h>
 
+#include <optional>
+
 namespace onert
 {
 namespace backend
@@ -43,8 +45,8 @@ class MaxPool2D final : public TrainingKernelRegistry
   const IPortableTensor *_output;
   nnfw::cker::PoolParams _op_params;
 
-  std::shared_ptr<ExtraTensor> _act_back_prop_output;
-  std::shared_ptr<ExtraTensor> _arg_max_index;
+  std::shared_ptr<LayerScopeTensor> _act_back_prop_output;
+  std::shared_ptr<LayerScopeTensor> _arg_max_index;
 
 public:
   MaxPool2D(const uint32_t paddingLeft, const uint32_t, const uint32_t paddingTop, const uint32_t,
@@ -66,26 +68,27 @@ class MaxPool2D final : public TrainingKernelRegistry
                                       &_op_params.float_activation_max);
     }
 
-    _arg_max_index = std::make_shared<ExtraTensor>(_output->get_info());
+    _arg_max_index = std::make_shared<LayerScopeTensor>(
+      _output->get_info(), LayerScopeTensorLifeTime::FORWARD_TO_BACKWARD);
 
     if (activation != ir::Activation::NONE)
     {
-      _act_back_prop_output = std::make_shared<ExtraTensor>(_output->get_info());
+      _act_back_prop_output = std::make_shared<LayerScopeTensor>(_output->get_info());
     }
   };
 
   ~MaxPool2D() {}
 
 public:
-  std::optional<ExtraTensors> registerExtraTensors() override
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override
   {
-    ExtraTensors tensors = {_arg_max_index};
+    LayerScopeTensors tensors = {_arg_max_index};
     if (_act_back_prop_output != nullptr)
     {
       tensors.push_back(_act_back_prop_output);
     }
 
-    return std::optional<ExtraTensors>(tensors);
+    return std::optional<LayerScopeTensors>(tensors);
   }
 
 public:
@@ -191,6 +194,9 @@ class AveragePool2D final : public TrainingKernelRegistry
                                          getBuffer<float>(back_prop_out), getShape(back_prop_in),
                                          getBuffer<float>(back_prop_in));
   }
+
+public:
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override { return std::nullopt; }
 };
 
 } // namespace
@@ -235,9 +241,9 @@ void PoolLayer::configureBackward(const uint32_t paddingLeft, const uint32_t pad
   }
 }
 
-std::optional<ExtraTensors> PoolLayer::registerExtraTensors()
+std::optional<LayerScopeTensors> PoolLayer::registerLayerScopeTensors()
 {
-  return _kernel->registerExtraTensors();
+  return _kernel->registerLayerScopeTensors();
 }
 
 void PoolLayer::forward(bool training)
diff --git a/runtime/onert/backend/train/ops/PoolLayer.h b/runtime/onert/backend/train/ops/PoolLayer.h
index 60cea49111b..b1ed9006a42 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.h
+++ b/runtime/onert/backend/train/ops/PoolLayer.h
@@ -38,7 +38,7 @@ class TrainingKernelRegistry
 public:
   virtual void forward(const IPortableTensor *in, IPortableTensor *out) = 0;
   virtual void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) = 0;
-  virtual std::optional<ExtraTensors> registerExtraTensors() = 0;
+  virtual std::optional<LayerScopeTensors> registerLayerScopeTensors() = 0;
 
   TrainingKernelRegistry() = default;
   virtual ~TrainingKernelRegistry() = default;
@@ -64,7 +64,7 @@ class PoolLayer : public ::onert::exec::train::ITrainableFunction, public cpu::o
                          IPortableTensor *output, IPortableTensor *back_prop_input,
                          const IPortableTensor *back_prop_output);
 
-  std::optional<ExtraTensors> registerExtraTensors() override;
+  std::optional<LayerScopeTensors> registerLayerScopeTensors() override;
   void forward(bool training) override;
   void backward() override;
 

From 401dc31fc503ad2f874e842d04ceef8893845496 Mon Sep 17 00:00:00 2001
From: sseung <zetwhite@naver.com>
Date: Thu, 10 Oct 2024 22:19:41 +0900
Subject: [PATCH 4/4] resolve andriod build err

---
 runtime/onert/backend/train/ops/PoolLayer.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
index 7604f80bffb..8a8c05adeed 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.cc
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -165,7 +165,7 @@ class AveragePool2D final : public TrainingKernelRegistry
   ~AveragePool2D() {}
 
 public:
-  void forward(const IPortableTensor *in, IPortableTensor *out)
+  void forward(const IPortableTensor *in, IPortableTensor *out) override
   {
     auto out_shape = getShape(out);
     auto out_data = getBuffer<float>(out);
@@ -175,7 +175,7 @@ class AveragePool2D final : public TrainingKernelRegistry
                                    out_data);
   }
 
-  void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in)
+  void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) override
   {
     // activation backward
     try