diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc
index 3e72b72d2ef..8d0d6f22f00 100644
--- a/runtime/onert/backend/train/BackendContext.cc
+++ b/runtime/onert/backend/train/BackendContext.cc
@@ -16,11 +16,13 @@
 
 #include "BackendContext.h"
 
+#include "ExtraTensorGenerator.h"
 #include "TensorBuilder.h"
 #include "KernelGenerator.h"
 #include "ops/BackPropInitializer.h"
 
 #include <backend/basic/train/TrainableBackendContextHelpers.h>
+#include <ir/train/ITrainableOperation.h>
 #include <misc/polymorphic_downcast.h>
 
 #include <cassert>
@@ -229,6 +231,29 @@ FunctionMap BackendContext::genKernels()
   //   fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
   // }
 
+  ExtraTensorGenerator extra_tensor_gen(trainable_graph(), _tensor_builder, _tensor_registry);
+
+  const auto &ops = trainable_graph()->operations();
+
+  for (auto &pair : ret)
+  {
+    auto &op_idx = pair.first;
+    auto &fn_seq = pair.second;
+
+    const ir::IOperation *op = &ops.at(op_idx);
+    const auto trainable_op = dynamic_cast<const ir::train::TrainableOperation *>(op);
+    assert(trainable_op != nullptr);
+
+    if (not trainable_op->isRequiredForBackward())
+      continue;
+
+    fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
+      extra_tensor_gen.register_tensors(op_idx, (&fn)->requestExtraTensors());
+    });
+  }
+  extra_tensor_gen.plan();
+  extra_tensor_gen.allocate();
+
   return ret;
 }
 
diff --git a/runtime/onert/backend/train/ExtraTensorGenerator.cc b/runtime/onert/backend/train/ExtraTensorGenerator.cc
new file mode 100644
index 00000000000..0cbe8b8b1f9
--- /dev/null
+++ b/runtime/onert/backend/train/ExtraTensorGenerator.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExtraTensorGenerator.h"
+
+#include "ExtraTensorIndex.h"
+
+#include <ir/Operations.h>
+#include <util/logging.h>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+ExtraTensorGenerator::ExtraTensorGenerator(const ir::train::TrainableGraph *tgraph,
+                                           std::shared_ptr<TensorBuilder> &tensor_builder,
+                                           std::shared_ptr<ITensorRegistry> &tensor_registry)
+  : _tgraph(tgraph), _tensor_builder(tensor_builder)
+{
+  _tensor_reg = std::dynamic_pointer_cast<TensorRegistry>(tensor_registry);
+}
+
+void ExtraTensorGenerator::register_tensors(ir::OperationIndex op_idx, ExtraTensorRequests &&reqs)
+{
+  // save request, _idx_to_reuqests will be used for memory planning
+  if (reqs.size() == 0)
+    return;
+
+  // _idx_to_requests[op_idx] = reqs;
+  _idx_to_requests.insert({op_idx, reqs});
+  auto &operations = _tgraph->operations();
+
+  for (size_t i = 0; i < reqs.size(); i++)
+  {
+    // register tensor
+    ExtraTensorIndex tensor_idx(op_idx, i);
+    _tensor_builder->registerExtraTensorInfo(tensor_idx, reqs[i].info);
+
+    std::stringstream op_info;
+    op_info << op_idx << "_" << operations.at(op_idx).name();
+    VERBOSE(ExtraTensorGenerator) << "register (idx:" << tensor_idx << ") requested from "
+                                  << op_info.str() << std::endl;
+
+    // return registered tensor
+    auto generated_tensor = _tensor_reg->getExtraTensor(tensor_idx);
+    *reqs[i].address = generated_tensor;
+  }
+  return;
+}
+
+void ExtraTensorGenerator::plan()
+{
+  // forwarding order
+  const auto f_order = _tgraph->topolSortOperations();
+  for (const auto &op_index : f_order)
+  {
+    auto &reqs = _idx_to_requests[op_index];
+    for (auto i = 0u; i < reqs.size(); ++i)
+    {
+      auto &lt = reqs[i].lifetime;
+      if (lt == ExtraTensorLifeTime::FORWARD_TO_BACKWARD)
+        _tensor_builder->notifyFirstUse(ExtraTensorIndex(op_index, i));
+    }
+  }
+
+  // backwarding order
+  const auto b_order = _tgraph->essentialBackwardOrder();
+  for (const auto &op_index : b_order)
+  {
+    auto &reqs = _idx_to_requests[op_index];
+
+    for (auto i = 0u; i < reqs.size(); ++i)
+    {
+      auto &lt = reqs[i].lifetime;
+      if (lt == ExtraTensorLifeTime::BACKWARD)
+        _tensor_builder->notifyFirstUse(ExtraTensorIndex(op_index, i));
+    }
+
+    for (auto i = 0u; i < reqs.size(); ++i)
+    {
+      auto &lt = reqs[i].lifetime;
+      if (lt == ExtraTensorLifeTime::FORWARD_TO_BACKWARD || lt == ExtraTensorLifeTime::BACKWARD)
+        _tensor_builder->notifyLastUse(ExtraTensorIndex(op_index, i));
+    }
+  }
+}
+
+void ExtraTensorGenerator::allocate() { _tensor_builder->allocateExtra(); }
+
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ExtraTensorGenerator.h b/runtime/onert/backend/train/ExtraTensorGenerator.h
new file mode 100644
index 00000000000..c41a43eff4b
--- /dev/null
+++ b/runtime/onert/backend/train/ExtraTensorGenerator.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__
+#define __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__
+
+#include <backend/train/ExtraTensorRequest.h>
+#include <ir/train/TrainableGraph.h>
+#include <ir/Index.h>
+
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class ExtraTensorGenerator
+{
+public:
+  ExtraTensorGenerator() = delete;
+
+  ExtraTensorGenerator(const ir::train::TrainableGraph *tgraph,
+                       std::shared_ptr<TensorBuilder> &tensor_builder,
+                       std::shared_ptr<ITensorRegistry> &tensor_registry);
+
+public:
+  // Since register is reserved keyword, use 'register_tensors' intead of 'register'
+  void register_tensors(ir::OperationIndex idx, ExtraTensorRequests &&requests);
+  void plan();
+  void allocate();
+
+private:
+  const ir::train::TrainableGraph *_tgraph;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::unordered_map<ir::OperationIndex, ExtraTensorRequests> _idx_to_requests;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__
diff --git a/runtime/onert/backend/train/ExtraTensorIndex.h b/runtime/onert/backend/train/ExtraTensorIndex.h
new file mode 100644
index 00000000000..80cc904a177
--- /dev/null
+++ b/runtime/onert/backend/train/ExtraTensorIndex.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_EXTRA_TENSOR_INDEX_H__
+#define __ONERT_BACKEND_TRAIN_EXTRA_TENSOR_INDEX_H__
+
+#include <ir/Index.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class ExtraTensorIndex
+{
+public:
+  ExtraTensorIndex(ir::OperationIndex op, uint32_t sub) : op_index(op), sub_index(sub) {}
+
+  ir::OperationIndex op_index;
+  uint32_t sub_index;
+
+  bool operator==(const ExtraTensorIndex &other) const
+  {
+    return op_index == other.op_index && sub_index == other.sub_index;
+  }
+};
+
+inline std::ostream &operator<<(std::ostream &o, const ExtraTensorIndex &i)
+{
+  o << i.op_index;
+  o << "-" << i.sub_index;
+  return o;
+}
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+namespace std
+{
+
+template <> struct hash<onert::backend::train::ExtraTensorIndex>
+{
+  size_t operator()(const onert::backend::train::ExtraTensorIndex &index) const noexcept
+  {
+    const auto op_index = index.op_index;
+    const auto sub_index = index.sub_index;
+
+    return (static_cast<size_t>(op_index.value())) << 16 | static_cast<size_t>(sub_index);
+  }
+};
+
+} // namespace std
+
+#endif // __ONERT_BACKEND_TRAIN_EXTRA_TENSOR_INDEX_H__
diff --git a/runtime/onert/backend/train/MemoryManager.cc b/runtime/onert/backend/train/MemoryManager.cc
index 87cd15d55a8..36ff49b2d85 100644
--- a/runtime/onert/backend/train/MemoryManager.cc
+++ b/runtime/onert/backend/train/MemoryManager.cc
@@ -17,6 +17,7 @@
 #include "MemoryManager.h"
 
 #include "MemoryPlannerFactory.h"
+#include "ExtraTensorIndex.h"
 
 #include <util/ConfigSource.h>
 
@@ -53,52 +54,60 @@ uint8_t *GradientMemoryManager::getOptVarBuffer(const ir::OperandIndex &ind, uin
   return _var_mem_alloc->base() + var_offset + mem_blk.offset;
 }
 
-DisposableMemoryManager::DisposableMemoryManager() : _mem_planner{createMemoryPlanner()}
+template <typename Index>
+TrainMemoryManager<Index>::TrainMemoryManager() : _mem_planner{createMemoryPlanner()}
 {
   // DO NOTHING
 }
 
-DisposableMemoryManager::DisposableMemoryManager(const std::string planner_id)
+template <typename Index>
+TrainMemoryManager<Index>::TrainMemoryManager(const std::string planner_id)
   : _mem_planner{createMemoryPlanner(planner_id)}
 {
   // DO NOTHING
 }
 
-basic::IMemoryPlanner<DisposableTensorIndex> *DisposableMemoryManager::createMemoryPlanner()
+template <typename Index>
+basic::IMemoryPlanner<Index> *TrainMemoryManager<Index>::createMemoryPlanner()
 {
   auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
-  return MemoryPlannerFactory::get().create(planner_id);
+  return MemoryPlannerFactory<Index>::get().create(planner_id);
 }
 
-basic::IMemoryPlanner<DisposableTensorIndex> *
-DisposableMemoryManager::createMemoryPlanner(const std::string planner_id)
+template <typename Index>
+basic::IMemoryPlanner<Index> *
+TrainMemoryManager<Index>::createMemoryPlanner(const std::string planner_id)
 {
-  return MemoryPlannerFactory::get().create(planner_id);
+  return MemoryPlannerFactory<Index>::get().create(planner_id);
 }
 
-void DisposableMemoryManager::claimPlan(const DisposableTensorIndex &ind, uint32_t size)
+template <typename Index> void TrainMemoryManager<Index>::claimPlan(const Index &ind, uint32_t size)
 {
   _mem_planner->claim(ind, size);
 }
 
-void DisposableMemoryManager::releasePlan(const DisposableTensorIndex &ind)
+template <typename Index> void TrainMemoryManager<Index>::releasePlan(const Index &ind)
 {
   _mem_planner->release(ind);
 }
 
-void DisposableMemoryManager::allocate(void)
+template <typename Index> void TrainMemoryManager<Index>::allocate(void)
 {
   _mem_alloc = std::make_shared<basic::Allocator>(_mem_planner->capacity());
   assert(_mem_alloc->base());
 }
 
-uint8_t *DisposableMemoryManager::getBuffer(const DisposableTensorIndex &ind) const
+template <typename Index> uint8_t *TrainMemoryManager<Index>::getBuffer(const Index &ind) const
 {
   assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
   const auto &mem_blk = _mem_planner->memory_plans().at(ind);
   return _mem_alloc->base() + mem_blk.offset;
 }
 
+// Instatiation
+template class TrainMemoryManager<DisposableTensorIndex>;
+template class TrainMemoryManager<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryManager.h b/runtime/onert/backend/train/MemoryManager.h
index 987cf905100..f330d8073c8 100644
--- a/runtime/onert/backend/train/MemoryManager.h
+++ b/runtime/onert/backend/train/MemoryManager.h
@@ -20,6 +20,7 @@
 #include <backend/basic/MemoryManager.h>
 
 #include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 
 namespace onert
 {
@@ -44,30 +45,34 @@ class GradientMemoryManager : public MemoryManager
   uint32_t _optim_vars_count;
 };
 
-class DisposableMemoryManager
+// TODO: Find a better name
+template <typename Index> class TrainMemoryManager
 {
 public:
-  DisposableMemoryManager();
-  DisposableMemoryManager(const std::string planner_id);
+  TrainMemoryManager();
+  TrainMemoryManager(const std::string planner_id);
 
   void allocate(void);
-  uint8_t *getBuffer(const DisposableTensorIndex &ind) const;
+  uint8_t *getBuffer(const Index &ind) const;
   void deallocate(void) { _mem_alloc->release(); }
 
-  void claimPlan(const DisposableTensorIndex &ind, uint32_t size);
-  void releasePlan(const DisposableTensorIndex &ind);
+  void claimPlan(const Index &ind, uint32_t size);
+  void releasePlan(const Index &ind);
 
   std::shared_ptr<basic::Allocator> getMemAlloc() { return _mem_alloc; }
 
 private:
-  basic::IMemoryPlanner<DisposableTensorIndex> *createMemoryPlanner();
-  basic::IMemoryPlanner<DisposableTensorIndex> *createMemoryPlanner(const std::string planner_id);
+  basic::IMemoryPlanner<Index> *createMemoryPlanner();
+  basic::IMemoryPlanner<Index> *createMemoryPlanner(const std::string planner_id);
 
 private:
-  std::shared_ptr<basic::IMemoryPlanner<DisposableTensorIndex>> _mem_planner;
+  std::shared_ptr<basic::IMemoryPlanner<Index>> _mem_planner;
   std::shared_ptr<basic::Allocator> _mem_alloc;
 };
 
+using DisposableMemoryManager = TrainMemoryManager<DisposableTensorIndex>;
+using ExtraMemoryManager = TrainMemoryManager<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryPlanner.cc b/runtime/onert/backend/train/MemoryPlanner.cc
index ea385558e28..e60e9f99948 100644
--- a/runtime/onert/backend/train/MemoryPlanner.cc
+++ b/runtime/onert/backend/train/MemoryPlanner.cc
@@ -15,6 +15,8 @@
  */
 
 #include "MemoryPlanner.h"
+#include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 
 #include <util/logging.h>
 
@@ -27,7 +29,7 @@ namespace backend
 namespace train
 {
 
-void BumpPlanner::claim(const DisposableTensorIndex &ind, size_t size)
+template <typename Index> void BumpPlanner<Index>::claim(const Index &ind, size_t size)
 {
   basic::Block blk{_capacity, size};
   _mem_plans[ind] = blk;
@@ -36,7 +38,7 @@ void BumpPlanner::claim(const DisposableTensorIndex &ind, size_t size)
   VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl;
 }
 
-void BumpPlanner::release(const DisposableTensorIndex &ind)
+template <typename Index> void BumpPlanner<Index>::release(const Index &ind)
 {
   VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): "
                       << "NOTHING does" << std::endl;
@@ -56,7 +58,7 @@ void BumpPlanner::release(const DisposableTensorIndex &ind)
 //       point in time, it means the place at the offset can be claimed.
 // 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
 //    the previous claim_base_offset.
-void FirstFitPlanner::claim(const DisposableTensorIndex &ind, size_t size)
+template <typename Index> void FirstFitPlanner<Index>::claim(const Index &ind, size_t size)
 {
   // Find the right position for claiming
   uint32_t next_offset = 0;
@@ -88,7 +90,7 @@ void FirstFitPlanner::claim(const DisposableTensorIndex &ind, size_t size)
   }
 }
 
-void FirstFitPlanner::release(const DisposableTensorIndex &ind)
+template <typename Index> void FirstFitPlanner<Index>::release(const Index &ind)
 {
   for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
   {
@@ -107,14 +109,15 @@ void FirstFitPlanner::release(const DisposableTensorIndex &ind)
   assert(!"Cannot release for given index. It has been not claimed or released already.");
 }
 
-WICPlanner::WICPlanner()
+template <typename Index>
+WICPlanner<Index>::WICPlanner()
   : _initialized(false), _capacity(0), _mem_plans(), _live_indices(), _interference_graph(),
     _indices()
 {
   // DO NOTHING
 }
 
-void WICPlanner::claim(const DisposableTensorIndex &ind, size_t size)
+template <typename Index> void WICPlanner<Index>::claim(const Index &ind, size_t size)
 {
   _indices.emplace(size, ind);
   _interference_graph[ind].insert(_interference_graph[ind].end(), _live_indices.cbegin(),
@@ -128,7 +131,7 @@ void WICPlanner::claim(const DisposableTensorIndex &ind, size_t size)
   VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl;
 }
 
-void WICPlanner::release(const DisposableTensorIndex &ind)
+template <typename Index> void WICPlanner<Index>::release(const Index &ind)
 {
   _live_indices.erase(ind);
   VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl;
@@ -143,7 +146,7 @@ void WICPlanner::release(const DisposableTensorIndex &ind)
  * 3. Allocate memory block for sorted operands
  *   - Find free memory block which does not overlap with interfered operands
  */
-void WICPlanner::buildMemoryPlans()
+template <typename Index> void WICPlanner<Index>::buildMemoryPlans()
 {
   for (const auto &[size, ind] : _indices)
   {
@@ -194,13 +197,22 @@ void WICPlanner::buildMemoryPlans()
   _indices.clear();
 }
 
-std::unordered_map<DisposableTensorIndex, basic::Block> &WICPlanner::memory_plans()
+template <typename Index> typename WICPlanner<Index>::MemoryPlans &WICPlanner<Index>::memory_plans()
 {
   if (!_initialized)
     buildMemoryPlans();
   return _mem_plans;
 }
 
+template class BumpPlanner<DisposableTensorIndex>;
+template class BumpPlanner<ExtraTensorIndex>;
+
+template class FirstFitPlanner<DisposableTensorIndex>;
+template class FirstFitPlanner<ExtraTensorIndex>;
+
+template class WICPlanner<DisposableTensorIndex>;
+template class WICPlanner<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryPlanner.h b/runtime/onert/backend/train/MemoryPlanner.h
index 181dd5e6979..5e3f48e02f8 100644
--- a/runtime/onert/backend/train/MemoryPlanner.h
+++ b/runtime/onert/backend/train/MemoryPlanner.h
@@ -24,13 +24,14 @@
 
 #include <backend/basic/IMemoryPlanner.h>
 
-#include "DisposableTensorIndex.h"
-
 #include <map>
 #include <vector>
 #include <unordered_set>
 #include <memory>
 
+#include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
+
 namespace onert
 {
 namespace backend
@@ -41,20 +42,22 @@ namespace train
 /**
  * @brief Class to plan memory by bump way
  */
-class BumpPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
+template <typename Index> class BumpPlanner : public basic::IMemoryPlanner<Index>
 {
+  using MemoryPlans = typename basic::IMemoryPlanner<Index>::MemoryPlans;
+
 public:
   /**
    * @brief Claim memory for tensor by bump way
    * @param[in] index The tensor index
    * @param[in] size The size of the memory
    */
-  void claim(const DisposableTensorIndex &, size_t) override;
+  void claim(const Index &, size_t) override;
   /**
    * @brief Release memory for tensor by bump way
    * @param[in] index The tensor index
    */
-  void release(const DisposableTensorIndex &) override;
+  void release(const Index &) override;
   /**
    * @brief Get capacity for memory planning
    * @return The value of capacity
@@ -74,20 +77,22 @@ class BumpPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
 /**
  * @brief Class to plan memory by firstfit way
  */
-class FirstFitPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
+template <typename Index> class FirstFitPlanner : public basic::IMemoryPlanner<Index>
 {
+  using MemoryPlans = typename basic::IMemoryPlanner<Index>::MemoryPlans;
+
 public:
   /**
    * @brief Claim memory for tensor by firstfit way
    * @param[in] index The tensor index
    * @param[in] size The size of the memory
    */
-  void claim(const DisposableTensorIndex &, size_t) override;
+  void claim(const Index &, size_t) override;
   /**
    * @brief Release memory for tensor by firstfit way
    * @param[in] index The tensor index
    */
-  void release(const DisposableTensorIndex &) override;
+  void release(const Index &) override;
   /**
    * @brief Get capacity for memory planning
    * @return The value of capacity
@@ -103,14 +108,17 @@ class FirstFitPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
   uint32_t _capacity = 0;
   MemoryPlans _mem_plans;
   // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
-  std::map<uint32_t, DisposableTensorIndex> _claim_table;
+  std::map<uint32_t, Index> _claim_table;
 };
 
 /**
  * @brief Class to plan memory by Weighted Interval Color algorithm
  */
-class WICPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
+template <typename Index> class WICPlanner : public basic::IMemoryPlanner<Index>
 {
+public:
+  using MemoryPlans = typename basic::IMemoryPlanner<Index>::MemoryPlans;
+
 public:
   WICPlanner();
 
@@ -119,12 +127,12 @@ class WICPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
    * @param[in] index The tensor index
    * @param[in] size The size of the memory
    */
-  void claim(const DisposableTensorIndex &, size_t) override;
+  void claim(const Index &, size_t) override;
   /**
    * @brief Release memory for tensor by WIC algorithm
    * @param[in] index The tensor index
    */
-  void release(const DisposableTensorIndex &) override;
+  void release(const Index &) override;
   /**
    * @brief Get capacity for memory planning
    * @return The value of capacity
@@ -147,10 +155,10 @@ class WICPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
   bool _initialized;
   uint32_t _capacity;
   MemoryPlans _mem_plans;
-  std::unordered_set<DisposableTensorIndex> _live_indices;
-  DisposableTensorIndexMap<std::vector<DisposableTensorIndex>> _interference_graph;
+  std::unordered_set<Index> _live_indices;
+  std::unordered_map<Index, std::vector<Index>> _interference_graph;
   // Sort tensors by descending order of size
-  std::multimap<uint32_t, DisposableTensorIndex, std::greater<uint32_t>> _indices;
+  std::multimap<uint32_t, Index, std::greater<uint32_t>> _indices;
 };
 
 } // namespace train
diff --git a/runtime/onert/backend/train/MemoryPlanner.test.cc b/runtime/onert/backend/train/MemoryPlanner.test.cc
index 8978607706f..15b9dc15693 100644
--- a/runtime/onert/backend/train/MemoryPlanner.test.cc
+++ b/runtime/onert/backend/train/MemoryPlanner.test.cc
@@ -16,6 +16,7 @@
 
 #include <gtest/gtest.h>
 
+#include "DisposableTensorIndex.h"
 #include "MemoryPlanner.h"
 #include "ir/Index.h"
 
@@ -25,7 +26,7 @@ using onert::ir::OperationIndex;
 
 TEST(BumpPlanner, claim_test)
 {
-  BumpPlanner planner;
+  BumpPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -55,7 +56,7 @@ TEST(BumpPlanner, claim_test)
 
 TEST(FirstFitPlanner, claim_release_test)
 {
-  FirstFitPlanner planner;
+  FirstFitPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -148,7 +149,7 @@ TEST(FirstFitPlanner, claim_release_test)
 
 TEST(FirstFitPlanner, neg_release_non_existing_index)
 {
-  FirstFitPlanner planner;
+  FirstFitPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -184,7 +185,7 @@ TEST(FirstFitPlanner, neg_release_non_existing_index)
 
 TEST(FirstFitPlanner, neg_release_twice)
 {
-  FirstFitPlanner planner;
+  FirstFitPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -223,7 +224,7 @@ TEST(FirstFitPlanner, neg_release_twice)
 
 TEST(WICPlanner, claim_release_test)
 {
-  WICPlanner planner;
+  WICPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size) {
     DisposableTensorIndex mem_idx{OperationIndex{op_index}, OperandIndex{operand_index}};
diff --git a/runtime/onert/backend/train/MemoryPlannerFactory.cc b/runtime/onert/backend/train/MemoryPlannerFactory.cc
index acfa44e3511..e1e80119213 100644
--- a/runtime/onert/backend/train/MemoryPlannerFactory.cc
+++ b/runtime/onert/backend/train/MemoryPlannerFactory.cc
@@ -16,6 +16,8 @@
 
 #include "MemoryPlannerFactory.h"
 
+#include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 namespace onert
 {
 namespace backend
@@ -23,29 +25,47 @@ namespace backend
 namespace train
 {
 
-MemoryPlannerFactory &MemoryPlannerFactory::get()
+template <typename Index> MemoryPlannerFactory<Index> &MemoryPlannerFactory<Index>::get()
 {
-  static MemoryPlannerFactory instance;
+  static MemoryPlannerFactory<Index> instance;
   return instance;
 }
 
-basic::IMemoryPlanner<DisposableTensorIndex> *MemoryPlannerFactory::create(const std::string &key)
+template <typename Index>
+basic::IMemoryPlanner<Index> *MemoryPlannerFactory<Index>::create(const std::string &key)
 {
   if (key == "FirstFit")
   {
-    return new FirstFitPlanner;
+    return new FirstFitPlanner<Index>();
   }
   else if (key == "Bump")
   {
-    return new BumpPlanner;
+    return new BumpPlanner<Index>();
   }
   else if (key == "WIC")
   {
-    return new WICPlanner;
+    return new WICPlanner<Index>();
   }
-  return new FirstFitPlanner; // Default Planner
+  return new FirstFitPlanner<Index>(); // Default Planner
 }
 
+// is this necessary?
+/**
+/usr/bin/ld: libbackend_train.so: undefined reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::DisposableTensorIndex>::create(std::__cxx11::basic_string<char,
+std::char_traits<char>, std::allocator<char> > const&)' /usr/bin/ld: libbackend_train.so: undefined
+reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::ExtraTensorIndex>::create(std::__cxx11::basic_string<char,
+std::char_traits<char>, std::allocator<char> > const&)' /usr/bin/ld: libbackend_train.so: undefined
+reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::DisposableTensorIndex>::get()'
+/usr/bin/ld: libbackend_train.so: undefined reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::ExtraTensorIndex>::get()'
+collect2: error: ld returned 1 exit status
+ */
+template class MemoryPlannerFactory<DisposableTensorIndex>;
+template class MemoryPlannerFactory<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryPlannerFactory.h b/runtime/onert/backend/train/MemoryPlannerFactory.h
index d1609e17559..7f42be46f90 100644
--- a/runtime/onert/backend/train/MemoryPlannerFactory.h
+++ b/runtime/onert/backend/train/MemoryPlannerFactory.h
@@ -28,7 +28,7 @@ namespace backend
 namespace train
 {
 
-class MemoryPlannerFactory
+template <typename Index> class MemoryPlannerFactory
 {
 public:
   static MemoryPlannerFactory &get();
@@ -38,7 +38,7 @@ class MemoryPlannerFactory
 
 public:
   // Currently, only the memory planner for DisposableTensor is supported
-  basic::IMemoryPlanner<DisposableTensorIndex> *create(const std::string &key);
+  basic::IMemoryPlanner<Index> *create(const std::string &key);
 };
 
 } // namespace train
diff --git a/runtime/onert/backend/train/TensorBuilder.cc b/runtime/onert/backend/train/TensorBuilder.cc
index 80452858057..115cb1e535d 100644
--- a/runtime/onert/backend/train/TensorBuilder.cc
+++ b/runtime/onert/backend/train/TensorBuilder.cc
@@ -18,6 +18,8 @@
 
 #include "Tensor.h"
 
+#include <backend/train/ExtraTensorRequest.h>
+
 namespace onert
 {
 namespace backend
@@ -97,6 +99,15 @@ void TensorBuilder::registerDisposableBackwardTensorInfo(const DisposableTensorI
   _disposable_backprops.add(index);
 }
 
+void TensorBuilder::registerExtraTensorInfo(const ExtraTensorIndex &index,
+                                            const ir::OperandInfo &info)
+{
+  assert(!info.isDynamic());
+
+  auto extra_tensor = std::make_unique<ExtraTensor>(info);
+  _tensor_reg->setExtraTensor(index, std::move(extra_tensor));
+}
+
 void TensorBuilder::notifyFirstUse(const ir::OperandIndex &index)
 {
   // TODO Support momory plan
@@ -157,6 +168,16 @@ void TensorBuilder::notifyDisposableBackPropLastUse(const DisposableTensorIndex
   _tensor_mgr->releaseDisposableBackPropPlan(index);
 }
 
+void TensorBuilder::notifyFirstUse(const ExtraTensorIndex &index)
+{
+  _tensor_mgr->claimExtraPlan(index);
+}
+
+void TensorBuilder::notifyLastUse(const ExtraTensorIndex &index)
+{
+  _tensor_mgr->releaseExtraPlan(index);
+}
+
 bool TensorBuilder::isRegistered(const ir::OperandIndex &index) const
 {
   return _tensor_info_map.find(index) != _tensor_info_map.end();
@@ -185,6 +206,8 @@ void TensorBuilder::allocateBackward(void)
   _tensor_mgr->allocateDisposableBackPropTensors();
 }
 
+void TensorBuilder::allocateExtra(void) { _tensor_mgr->allocateExtraTensors(); }
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorBuilder.h b/runtime/onert/backend/train/TensorBuilder.h
index f6ffbbb0e20..e23e185a047 100644
--- a/runtime/onert/backend/train/TensorBuilder.h
+++ b/runtime/onert/backend/train/TensorBuilder.h
@@ -18,6 +18,7 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__
 
 #include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 #include "TensorManager.h"
 #include "TensorRegistry.h"
 #include "util/Set.h"
@@ -55,6 +56,8 @@ class TensorBuilder
   void registerDisposableBackwardTensorInfo(const DisposableTensorIndex &index,
                                             const ir::OperandInfo &info);
 
+  void registerExtraTensorInfo(const ExtraTensorIndex &index, const ir::OperandInfo &info);
+
   // TODO Support memory plan of all tensors
   void notifyFirstUse(const ir::OperandIndex &);
   void notifyLastUse(const ir::OperandIndex &);
@@ -62,6 +65,8 @@ class TensorBuilder
   void notifyBackwardLastUse(const ir::OperandIndex &);
   void notifyDisposableBackPropFirstUse(const DisposableTensorIndex &);
   void notifyDisposableBackPropLastUse(const DisposableTensorIndex &);
+  void notifyFirstUse(const ExtraTensorIndex &);
+  void notifyLastUse(const ExtraTensorIndex &);
 
   bool isRegistered(const ir::OperandIndex &) const;
   bool isRegisteredBackward(const ir::OperandIndex &) const;
@@ -69,6 +74,7 @@ class TensorBuilder
 
   void allocate(void);
   void allocateBackward(void);
+  void allocateExtra(void); // < this function will be called after genKernels
 
 private:
   const std::shared_ptr<TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/backend/train/TensorManager.cc b/runtime/onert/backend/train/TensorManager.cc
index cf5373fae74..6ee643bf081 100644
--- a/runtime/onert/backend/train/TensorManager.cc
+++ b/runtime/onert/backend/train/TensorManager.cc
@@ -59,7 +59,8 @@ TensorManager::TensorManager(const std::shared_ptr<TensorRegistry> &reg,
     _back_prop_mgr{new MemoryManager(planner_id)},
     _gradient_mgr{new GradientMemoryManager(planner_id, optim_vars_count)},
     // TODO Find a suitable planner of disposable tensors to reduce peak memory usage
-    _disposable_back_prop_mgr{new DisposableMemoryManager(std::string("WIC"))}, _tensors{reg}
+    _disposable_back_prop_mgr{new DisposableMemoryManager(std::string("WIC"))},
+    _extra_mgr{new ExtraMemoryManager(std::string("WIC"))}, _tensors{reg}
 {
   // DO NOTHING
 }
@@ -118,6 +119,11 @@ void TensorManager::claimNonConstPlan(const ir::OperandIndex &index)
   _nonconst_mgr->claimPlan(index, size);
 }
 
+void TensorManager::allocateExtraTensors()
+{
+  allocateMemory(_extra_mgr.get(), _tensors->extra_tensors(), std::string{"EXTRA TENSOR "});
+}
+
 void TensorManager::releaseNonConstPlan(const ir::OperandIndex &index)
 {
   assert(_tensors->getNonConstTensor(index) && !_tensors->getNonConstTensor(index)->is_dynamic());
@@ -190,6 +196,19 @@ void TensorManager::releaseDisposableBackPropPlan(const DisposableTensorIndex &i
   _disposable_back_prop_mgr->releasePlan(index);
 }
 
+void TensorManager::claimExtraPlan(const ExtraTensorIndex &index)
+{
+  const auto tensor = _tensors->getExtraTensor(index);
+
+  auto size = alignedSize(tensor->total_size(), _align);
+  _extra_mgr->claimPlan(index, size);
+}
+
+void TensorManager::releaseExtraPlan(const ExtraTensorIndex &index)
+{
+  _extra_mgr->releasePlan(index);
+}
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorManager.h b/runtime/onert/backend/train/TensorManager.h
index f8d29b16e1d..99bedb0781a 100644
--- a/runtime/onert/backend/train/TensorManager.h
+++ b/runtime/onert/backend/train/TensorManager.h
@@ -18,6 +18,7 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_MANAGER_H__
 
 #include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 #include "MemoryManager.h"
 #include "TensorRegistry.h"
 
@@ -50,6 +51,8 @@ class TensorManager
   void allocateBackPropTensors();
   void allocateGradientTensors();
   void allocateDisposableBackPropTensors();
+  void allocateExtraTensors();
+
   // TODO Add member functions to deallocate tensors
 
   void claimNonConstPlan(const ir::OperandIndex &ind);
@@ -62,6 +65,8 @@ class TensorManager
   void releaseGradientPlan(const ir::OperandIndex &ind);
   void claimDisposableBackPropPlan(const DisposableTensorIndex &ind);
   void releaseDisposableBackPropPlan(const DisposableTensorIndex &ind);
+  void claimExtraPlan(const ExtraTensorIndex &ind);
+  void releaseExtraPlan(const ExtraTensorIndex &ind);
 
 private:
   std::unique_ptr<MemoryManager> _nonconst_mgr;
@@ -69,6 +74,7 @@ class TensorManager
   std::unique_ptr<MemoryManager> _back_prop_mgr;
   std::unique_ptr<GradientMemoryManager> _gradient_mgr;
   std::unique_ptr<DisposableMemoryManager> _disposable_back_prop_mgr;
+  std::unique_ptr<ExtraMemoryManager> _extra_mgr;
   const std::shared_ptr<TensorRegistry> _tensors;
 };
 
diff --git a/runtime/onert/backend/train/TensorRegistry.h b/runtime/onert/backend/train/TensorRegistry.h
index 13932199a9d..7dd56d53877 100644
--- a/runtime/onert/backend/train/TensorRegistry.h
+++ b/runtime/onert/backend/train/TensorRegistry.h
@@ -18,6 +18,7 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_REGISTRY__
 
 #include <backend/train/ITensorRegistry.h>
+#include <backend/train/ExtraTensorRequest.h>
 
 #include "DisposableTensorIndex.h"
 #include "Tensor.h"
@@ -60,9 +61,35 @@ class TensorRegistry
     return _disposable_back_prop;
   }
 
+  ExtraTensor *getExtraTensor(const ExtraTensorIndex &index)
+  {
+    auto itr = _extra.find(index);
+    if (itr != _extra.end())
+      return itr->second.get();
+
+    return nullptr;
+  }
+
+  void setExtraTensor(const ExtraTensorIndex &index, std::unique_ptr<ExtraTensor> tensor)
+  {
+    assert(tensor != nullptr);
+    auto itr = _extra.find(index);
+    if (itr != _extra.end())
+      throw std::runtime_error{
+        "Tried to set a extra tensor but another extra tensor already exists."};
+
+    _extra[index] = std::move(tensor);
+  }
+
+  const std::unordered_map<ExtraTensorIndex, std::unique_ptr<ExtraTensor>> &extra_tensors()
+  {
+    return _extra;
+  }
+
 private:
   // Disposable Tensors to be accumulated to BackPropTensor
   std::unordered_map<DisposableTensorIndex, std::unique_ptr<BackPropTensor>> _disposable_back_prop;
+  std::unordered_map<ExtraTensorIndex, std::unique_ptr<ExtraTensor>> _extra;
 };
 
 } // namespace train
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
index 3c4ce2f7ce1..f9518dbaca6 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
@@ -55,11 +55,19 @@ void BinaryArithmeticLayer::configureBackward(IPortableTensor *back_prop_lhs,
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
-    _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
   }
 }
 
+ExtraTensorRequests BinaryArithmeticLayer::requestExtraTensors()
+{
+  ExtraTensorRequests req;
+
+  if (_activation != ir::Activation::NONE)
+    req.push_back(ExtraTensorRequest::createLike(_back_prop_output, &_act_back_prop_output));
+
+  return req;
+}
+
 void BinaryArithmeticLayer::forward(bool) { cpu::ops::BinaryArithmeticLayer::run(); }
 
 void BinaryArithmeticLayer::backward()
@@ -72,7 +80,7 @@ void BinaryArithmeticLayer::backward()
   try
   {
     backprop_act =
-      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output.get());
+      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output);
   }
   catch (const std::exception &e)
   {
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
index 60d6e8be1cc..42e1b4327a5 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
@@ -50,6 +50,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(IPortableTensor *back_prop_lhs, IPortableTensor *back_prop_rhs,
                          const IPortableTensor *back_prop_output, const ir::Activation activation,
                          const ArithmeticType arithmetic_type);
+  ExtraTensorRequests requestExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -60,7 +61,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
 
   ArithmeticType _arithmetic_type;
   ir::Activation _activation;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
+  ExtraTensor *_act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
index 41ff7fd1c43..698164f4b62 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
@@ -30,6 +30,7 @@ namespace
 
 using namespace onert;
 
+/*
 template <typename Tensor>
 std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *origin_weights)
 {
@@ -44,6 +45,28 @@ std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *
 
   return std::make_unique<Tensor>(transposed_info);
 }
+*/
+
+ir::OperandInfo transposeOperandInfo(const ir::OperandInfo &origin_info)
+{
+  const auto &origin_shape = origin_info.shape();
+  assert(origin_shape.rank() == 4);
+
+  auto transposed_info = ir::OperandInfo(origin_info);
+  auto transposed_shape =
+    ir::Shape{origin_shape.dim(1), origin_shape.dim(2), origin_shape.dim(3), origin_shape.dim(0)};
+  transposed_info.shape(transposed_shape);
+
+  return transposed_info;
+}
+
+backend::train::ExtraTensorRequest
+createTransposeTenosrRequest(const backend::IPortableTensor *origin,
+                             backend::train::ExtraTensor **const addr)
+{
+  return backend::train::ExtraTensorRequest(transposeOperandInfo(origin->get_info()),
+                                            backend::train::ExtraTensorLifeTime::BACKWARD, addr);
+}
 
 } // namespace
 
@@ -79,27 +102,36 @@ void ConvolutionLayer::configureBackward(const IPortableTensor *weights,
   if (_dilationHeightFactor != 1 || _dilationWidthFactor != 1)
     throw std::runtime_error("train ConvolutionLayer: Unsupported dilation yet");
 
-  // TODO Optimize transposed tensors
-  _transposed_weights = createTransposedWeights<Tensor>(weights);
-  _transposed_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_weights->total_size()));
-
-  _conv_back_prop_output = std::make_unique<BackPropTensor>(back_prop_output->get_info());
-  _conv_back_prop_output->setBuffer(
-    std::make_shared<basic::Allocator>(_conv_back_prop_output->total_size()));
-
-  _transposed_grad_weights = createTransposedWeights<GradientTensor>(weights);
-  _transposed_grad_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_grad_weights->total_size()));
+  // TO avoid unused parameter error
+  if (weights == nullptr)
+  {
+  };
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<BackPropTensor>(_back_prop_output->get_info());
-    _act_back_prop_output->setBuffer(
-      std::make_shared<basic::Allocator>(_act_back_prop_output->total_size()));
   }
 }
 
+ExtraTensorRequests ConvolutionLayer::requestExtraTensors()
+{
+  ExtraTensorRequests reqs;
+
+  auto tr_weights = createTransposeTenosrRequest(_kernel, &_transposed_weights);
+  reqs.push_back(tr_weights);
+
+  auto conv_back_prop_output =
+    ExtraTensorRequest::createLike(_back_prop_output, &_conv_back_prop_output);
+  reqs.push_back(conv_back_prop_output);
+
+  auto tr_grad_weights = createTransposeTenosrRequest(_grad_weights, &_transposed_grad_weights);
+  reqs.push_back(tr_grad_weights);
+
+  if (_activation != ir::Activation::NONE)
+    reqs.push_back(ExtraTensorRequest::createLike(_back_prop_output, &_act_back_prop_output));
+
+  return reqs;
+}
+
 void ConvolutionLayer::forward(bool) { cpu::ops::ConvolutionLayer::run(); }
 void ConvolutionLayer::backward()
 {
@@ -125,7 +157,7 @@ void ConvolutionLayer::backwardFloat32()
   try
   {
     backprop_act =
-      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output.get());
+      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output);
   }
   catch (const std::exception &e)
   {
@@ -144,7 +176,7 @@ void ConvolutionLayer::backwardFloat32()
   conv_train_params.dilation_height_factor = _dilationHeightFactor;
 
   // Transpose weights from OHWI to HWIO
-  auto transposed_weights = _transposed_weights.get();
+  auto transposed_weights = _transposed_weights;
   assert(transposed_weights->getShape().rank() == 4);
   nnfw::cker::TransposeParams transpose_param;
   transpose_param.perm_count = transposed_weights->getShape().rank();
@@ -162,7 +194,7 @@ void ConvolutionLayer::backwardFloat32()
     _paddingRight, getShape(_back_prop_input), getBuffer<float>(_back_prop_input));
 
   // Calculate gradient for weights
-  auto transposed_grad_weights = _transposed_grad_weights.get();
+  auto transposed_grad_weights = _transposed_grad_weights;
   assert(_grad_weights->getShape().rank() == 4);
   assert(transposed_grad_weights->getShape().rank() == 4);
   nnfw::cker::train::ConvFilterGrad(
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.h b/runtime/onert/backend/train/ops/ConvolutionLayer.h
index ef11f68bf57..231e755dd86 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.h
@@ -41,6 +41,7 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(const IPortableTensor *weights, IPortableTensor *back_prop_input,
                          IPortableTensor *grad_weights, IPortableTensor *grad_bias,
                          const IPortableTensor *back_prop_output, const ir::Activation activation);
+  ExtraTensorRequests requestExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -54,10 +55,10 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   const IPortableTensor *_back_prop_output;
 
   // TODO Consider if these tensors should be built in TensorBuilder
-  std::unique_ptr<Tensor> _transposed_weights;
-  std::unique_ptr<BackPropTensor> _conv_back_prop_output;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
-  std::unique_ptr<GradientTensor> _transposed_grad_weights;
+  ExtraTensor *_transposed_weights;
+  ExtraTensor *_conv_back_prop_output;
+  ExtraTensor *_transposed_grad_weights;
+  ExtraTensor *_act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.cc
index 9443d0fe0ea..901e9b792f6 100644
--- a/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.cc
@@ -56,9 +56,11 @@ void DepthwiseConvolutionLayer::configureBackward(IPortableTensor *back_prop_inp
 
   if (activation != ir::Activation::NONE)
   {
+    /*
     _act_back_prop_output = std::make_unique<BackPropTensor>(_back_prop_output->get_info());
     _act_back_prop_output->setBuffer(
       std::make_shared<basic::Allocator>(_act_back_prop_output->total_size()));
+    */
   }
 
   const int64_t k_packet_size = [&]() {
@@ -75,20 +77,20 @@ void DepthwiseConvolutionLayer::configureBackward(IPortableTensor *back_prop_inp
   }();
 
   const auto incoming_shape = getShape(_back_prop_output);
-  const auto filter_shape = getShape(_kernel);
-  const int batch = incoming_shape.Dims(0);
+  // const auto filter_shape = getShape(_kernel);
+  // const int batch = incoming_shape.Dims(0);
   const int out_depth = incoming_shape.Dims(3);
-  const int filter_rows = filter_shape.Dims(1);
-  const int filter_cols = filter_shape.Dims(2);
+  // const int filter_rows = filter_shape.Dims(1);
+  // const int filter_cols = filter_shape.Dims(2);
 
-  const int filter_spatial_size = filter_rows * filter_cols;
-  const int padded_filter_inner_dim_size =
-    ((out_depth + k_packet_size - 1) / k_packet_size) * k_packet_size;
+  // const int filter_spatial_size = filter_rows * filter_cols;
+  // const int padded_filter_inner_dim_size =
+  //   ((out_depth + k_packet_size - 1) / k_packet_size) * k_packet_size;
 
   _use_padded_filter = (out_depth % k_packet_size) == 0 ? false : true;
 
   // prepare padded_filter buffer for cker
-  auto padded_filter_info = ir::OperandInfo(_kernel->get_info());
+  /* auto padded_filter_info = ir::OperandInfo(_kernel->get_info());
   padded_filter_info.shape({batch, filter_spatial_size, padded_filter_inner_dim_size});
   _padded_filter = std::make_unique<Tensor>(padded_filter_info);
   _padded_filter->setBuffer(std::make_shared<basic::Allocator>(_padded_filter->total_size()));
@@ -108,6 +110,55 @@ void DepthwiseConvolutionLayer::configureBackward(IPortableTensor *back_prop_inp
   _filter_dim_buffers = std::make_unique<Tensor>(filter_dim_buffers_info);
   _filter_dim_buffers->setBuffer(
     std::make_shared<basic::Allocator>(_filter_dim_buffers->total_size()));
+  */
+}
+
+ExtraTensorRequests DepthwiseConvolutionLayer::requestExtraTensors()
+{
+  ExtraTensorRequests reqs;
+
+  reqs.push_back(ExtraTensorRequest::createLike(_back_prop_output, &_act_back_prop_output));
+
+  const auto incoming_shape = getShape(_back_prop_output);
+  const auto batch = incoming_shape.Dims(0);
+  const auto depth = incoming_shape.Dims(3);
+
+  const auto filter_shape = getShape(_kernel);
+  const int filter_rows = filter_shape.Dims(1);
+  const int filter_cols = filter_shape.Dims(2);
+  const int filter_spatial_size = filter_rows * filter_cols;
+
+  const auto k_packet_size = _dconv_kernel->kPacketSize<float>();
+  const int padded_filter_inner_dim_size =
+    ((depth + k_packet_size - 1) / k_packet_size) * k_packet_size;
+
+  const int thread_count = _dconv_kernel->getThreadCount();
+
+  // _padded_filter
+  {
+    auto type_info = _kernel->get_info().typeInfo();
+    ir::Shape shape({batch, filter_spatial_size, padded_filter_inner_dim_size});
+    auto info = ir::OperandInfo::createStaticInfo(shape, type_info);
+    reqs.emplace_back(info, ExtraTensorLifeTime::BACKWARD, &_padded_filter);
+  }
+
+  // _filter_buffers
+  {
+    auto type_info = _kernel->get_info().typeInfo();
+    ir::Shape shape({thread_count, filter_spatial_size, padded_filter_inner_dim_size});
+    auto info = ir::OperandInfo::createStaticInfo(shape, type_info);
+    reqs.emplace_back(info, ExtraTensorLifeTime::BACKWARD, &_filter_buffers);
+  }
+
+  // _filter_dim_buffers
+  {
+    auto type = _back_prop_input->get_info().typeInfo();
+    ir::Shape shape({thread_count, padded_filter_inner_dim_size});
+    auto info = ir::OperandInfo::createStaticInfo(shape, type);
+    reqs.emplace_back(info, ExtraTensorLifeTime::BACKWARD, &_filter_dim_buffers);
+  }
+
+  return reqs;
 }
 
 void DepthwiseConvolutionLayer::forward(bool) { cpu::ops::DepthwiseConvolutionLayer::run(); }
@@ -136,7 +187,7 @@ void DepthwiseConvolutionLayer::backwardFloat32()
   try
   {
     backprop_act =
-      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output.get());
+      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output);
   }
   catch (const std::exception &e)
   {
@@ -156,15 +207,15 @@ void DepthwiseConvolutionLayer::backwardFloat32()
   // Calculate gradient for input
   nnfw::cker::train::backpropInput(
     dconv_params, getShape(backprop_act), getBuffer<float>(backprop_act), getShape(_kernel),
-    getBuffer<float>(_kernel), getBuffer<float>(_padded_filter.get()), getShape(_back_prop_input),
-    getBuffer<float>(_back_prop_input), _use_padded_filter, getBuffer<float>(_filter_buffers.get()),
-    getBuffer<float>(_filter_dim_buffers.get()));
+    getBuffer<float>(_kernel), getBuffer<float>(_padded_filter), getShape(_back_prop_input),
+    getBuffer<float>(_back_prop_input), _use_padded_filter, getBuffer<float>(_filter_buffers),
+    getBuffer<float>(_filter_dim_buffers));
 
   // Calculate gradient for weights
   nnfw::cker::train::backpropFilter(
     dconv_params, getShape(backprop_act), getBuffer<float>(backprop_act), getShape(_input),
     getBuffer<float>(_input), getShape(_grad_weights), getBuffer<float>(_grad_weights),
-    getBuffer<float>(_padded_filter.get()), getBuffer<float>(_filter_buffers.get()));
+    getBuffer<float>(_padded_filter), getBuffer<float>(_filter_buffers));
 
   // Calculate gradient for bias
   if (_bias)
diff --git a/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.h
index 5cd98e56721..1c98b24e18d 100644
--- a/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.h
@@ -42,6 +42,8 @@ class DepthwiseConvolutionLayer : public ::onert::exec::train::ITrainableFunctio
   void configureBackward(IPortableTensor *back_prop_input, IPortableTensor *grad_weights,
                          IPortableTensor *grad_bias, const IPortableTensor *back_prop_output,
                          const ir::Activation activation);
+
+  ExtraTensorRequests requestExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -54,12 +56,12 @@ class DepthwiseConvolutionLayer : public ::onert::exec::train::ITrainableFunctio
   IPortableTensor *_back_prop_input;
   const IPortableTensor *_back_prop_output;
 
-  // TODO Consider if these tensors should be built in TensorBuilder
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
+  ExtraTensor *_act_back_prop_output;
+
   bool _use_padded_filter;
-  std::unique_ptr<Tensor> _padded_filter;
-  std::unique_ptr<Tensor> _filter_buffers;
-  std::unique_ptr<Tensor> _filter_dim_buffers;
+  ExtraTensor *_padded_filter;
+  ExtraTensor *_filter_buffers;
+  ExtraTensor *_filter_dim_buffers;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
index 9d35655b26f..f2e189134c7 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
@@ -28,17 +28,24 @@ namespace
 
 using namespace onert;
 
-std::unique_ptr<backend::train::Tensor>
-createTransposedTensor(const backend::IPortableTensor *origin_tensor)
+ir::OperandInfo transposeOperandInfo(const ir::OperandInfo &origin_info)
 {
-  const auto &origin_shape = origin_tensor->getShape();
+  const auto &origin_shape = origin_info.shape();
   assert(origin_shape.rank() == 2);
 
-  auto transposed_info = origin_tensor->get_info();
+  auto transposed_info = ir::OperandInfo(origin_info);
   auto transposed_shape = ir::Shape{origin_shape.dim(1), origin_shape.dim(0)};
   transposed_info.shape(transposed_shape);
 
-  return std::make_unique<backend::train::Tensor>(transposed_info);
+  return transposed_info;
+}
+
+backend::train::ExtraTensorRequest
+createTransposeTenosrRequest(const backend::IPortableTensor *origin,
+                             backend::train::ExtraTensor **const addr)
+{
+  return backend::train::ExtraTensorRequest(transposeOperandInfo(origin->get_info()),
+                                            backend::train::ExtraTensorLifeTime::BACKWARD, addr);
 }
 
 } // namespace
@@ -85,22 +92,27 @@ void FullyConnectedLayer::configureBackward(
     throw std::runtime_error{
       "train FullyConnectedLayer: Input other ranks than 2 are not supported."};
 
-  _transposed_weights = createTransposedTensor(weights);
-  _transposed_weights->setBuffer(std::make_shared<basic::Allocator>(weights->total_size()));
+  if (activation != ir::Activation::NONE)
+  {
+  }
+}
+
+ExtraTensorRequests FullyConnectedLayer::requestExtraTensors()
+{
+  ExtraTensorRequests reqs;
+
+  reqs.push_back(createTransposeTenosrRequest(_weights, &_transposed_weights));
 
-  _transposed_input = createTransposedTensor(input);
-  _transposed_input->setBuffer(std::make_shared<basic::Allocator>(input->total_size()));
+  reqs.push_back(createTransposeTenosrRequest(_input, &_transposed_input));
 
-  _transposed_back_prop_output = createTransposedTensor(back_prop_output);
-  _transposed_back_prop_output->setBuffer(
-    std::make_shared<basic::Allocator>(back_prop_output->total_size()));
+  reqs.push_back(createTransposeTenosrRequest(_back_prop_output, &_transposed_back_prop_output));
 
-  if (activation != ir::Activation::NONE)
+  if (_activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<Tensor>(_back_prop_output->get_info());
-    _act_back_prop_output->setBuffer(
-      std::make_shared<basic::Allocator>(_back_prop_output->total_size()));
+    reqs.push_back(ExtraTensorRequest::createLike(_back_prop_output, &_act_back_prop_output));
   }
+
+  return reqs;
 }
 
 void FullyConnectedLayer::forward(bool) { cpu::ops::FullyConnectedLayer::run(); }
@@ -130,7 +142,7 @@ void FullyConnectedLayer::backwardFloat32()
   try
   {
     backprop_act =
-      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output.get());
+      backpropActivation(_activation, _output, _back_prop_output, _act_back_prop_output);
   }
   catch (const std::exception &e)
   {
@@ -157,7 +169,7 @@ void FullyConnectedLayer::backwardFloat32()
 
   // Transpose and compute gradient for input
   // ∂L/∂X = fc(Incoming gradient, transposed W)
-  auto transposed_weights = _transposed_weights.get();
+  auto transposed_weights = _transposed_weights;
   assert(transposed_weights->getShape().rank() == 2);
   nnfw::cker::Transpose(transpose_param, getShape(_weights), getBuffer<float>(_weights),
                         getShape(transposed_weights), getBuffer<float>(transposed_weights));
@@ -169,12 +181,12 @@ void FullyConnectedLayer::backwardFloat32()
 
   // Transpose and compute gradient for weights
   // ∂L/∂W = fc(transposed incomming gradient, transposed X)
-  auto transposed_input = _transposed_input.get();
+  auto transposed_input = _transposed_input;
   assert(transposed_input->getShape().rank() == 2);
   nnfw::cker::Transpose(transpose_param, getShape(_input), getBuffer<float>(_input),
                         getShape(transposed_input), getBuffer<float>(transposed_input));
 
-  auto transposed_back_prop_output = _transposed_back_prop_output.get();
+  auto transposed_back_prop_output = _transposed_back_prop_output;
   assert(transposed_back_prop_output->getShape().rank() == 2);
   nnfw::cker::Transpose(transpose_param, getShape(backprop_act), getBuffer<float>(backprop_act),
                         getShape(transposed_back_prop_output),
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.h b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
index 190bfbffe42..a19da636d8d 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
@@ -46,6 +46,7 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
                          const IPortableTensor *back_prop_output, ir::Activation activation,
                          ir::FullyConnectedWeightsFormat weights_format);
 
+  ExtraTensorRequests requestExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -58,11 +59,10 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
   IPortableTensor *_back_prop_input;
   const IPortableTensor *_back_prop_output;
 
-  // TODO Optimize memory
-  std::unique_ptr<Tensor> _transposed_weights;
-  std::unique_ptr<Tensor> _transposed_input;
-  std::unique_ptr<Tensor> _transposed_back_prop_output;
-  std::unique_ptr<Tensor> _act_back_prop_output;
+  ExtraTensor *_transposed_weights;
+  ExtraTensor *_transposed_input;
+  ExtraTensor *_transposed_back_prop_output;
+  ExtraTensor *_act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
index f77d58e6517..0e0c468de5e 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.cc
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -41,78 +41,90 @@ class MaxPool2D final : public TrainingKernelRegistry
   const IPortableTensor *_output;
   nnfw::cker::PoolParams _op_params;
 
-  std::unique_ptr<Tensor> _act_back_prop_output;
-  std::unique_ptr<Tensor> _arg_max_index;
+  ExtraTensor *_act_back_prop_output;
+  ExtraTensor *_arg_max_index;
 
 public:
   MaxPool2D(const uint32_t paddingLeft, const uint32_t, const uint32_t paddingTop, const uint32_t,
             const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t kernelWidth,
             const uint32_t kernelHeight, const ir::Activation activation,
             const IPortableTensor *output)
-    : _activation(activation), _output(output)
-  {
-    {
-      _op_params.stride_height = strideHeight;
-      _op_params.stride_width = strideWidth;
-      _op_params.filter_height = kernelHeight;
-      _op_params.filter_width = kernelWidth;
-      _op_params.padding_values.height = (int8_t)paddingTop;
-      _op_params.padding_values.width = (int8_t)paddingLeft;
-      CalculateActivationRange<float>(activation, &_op_params.float_activation_min,
-                                      &_op_params.float_activation_max);
-    }
-
-    _arg_max_index = std::make_unique<Tensor>(_output->get_info());
-    _arg_max_index->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
-
-    if (activation != ir::Activation::NONE)
-    {
-      _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
-      _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
-    }
-  };
-
-  ~MaxPool2D() {}
+    : _activation(activation), _output(output){{_op_params.stride_height = strideHeight;
+  _op_params.stride_width = strideWidth;
+  _op_params.filter_height = kernelHeight;
+  _op_params.filter_width = kernelWidth;
+  _op_params.padding_values.height = (int8_t)paddingTop;
+  _op_params.padding_values.width = (int8_t)paddingLeft;
+  CalculateActivationRange<float>(activation, &_op_params.float_activation_min,
+                                  &_op_params.float_activation_max);
+}
+
+/*
+_arg_max_index = std::make_unique<Tensor>(_output->get_info());
+_arg_max_index->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+
+if (activation != ir::Activation::NONE)
+{
+  _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
+  _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+}
+*/
+}; // namespace
+
+~MaxPool2D() {}
 
 public:
-  void forward(const IPortableTensor *in, IPortableTensor *out)
-  {
-    assert(in->layout() == ir::Layout::NHWC);
+ExtraTensorRequests requestExtraTensors() override
+{
+  auto r1 = ExtraTensorRequest(_output->get_info(), ExtraTensorLifeTime::FORWARD_TO_BACKWARD,
+                               &_arg_max_index);
+  auto r2 = ExtraTensorRequest::createLike(_output, &_act_back_prop_output);
 
-    auto out_shape = getShape(out);
-    auto out_data = getBuffer<float>(out);
-    auto arg_max_index = _arg_max_index.get();
+  ExtraTensorRequests reqs;
+  reqs.push_back(r1);
+  reqs.push_back(r2);
 
-    // maxpool forward
-    nnfw::cker::train::MaxPool2D(_op_params, getShape(in), getBuffer<float>(in), out_shape,
-                                 out_data, getBuffer<int>(arg_max_index));
-  }
+  return reqs;
+}
+
+public:
+void forward(const IPortableTensor *in, IPortableTensor *out) override
+{
+  assert(in->layout() == ir::Layout::NHWC);
 
-  void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in)
+  auto out_shape = getShape(out);
+  auto out_data = getBuffer<float>(out);
+  auto arg_max_index = _arg_max_index;
+
+  // maxpool forward
+  nnfw::cker::train::MaxPool2D(_op_params, getShape(in), getBuffer<float>(in), out_shape, out_data,
+                               getBuffer<int>(arg_max_index));
+}
+
+void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) override
+{
+  assert(back_prop_out->layout() == ir::Layout::NHWC);
+
+  // activation backward
+  try
+  {
+    back_prop_out = backpropActivation(_activation, _output, back_prop_out, _act_back_prop_output);
+  }
+  catch (const std::exception &e)
   {
-    assert(back_prop_out->layout() == ir::Layout::NHWC);
-
-    // activation backward
-    try
-    {
-      back_prop_out =
-        backpropActivation(_activation, _output, back_prop_out, _act_back_prop_output.get());
-    }
-    catch (const std::exception &e)
-    {
-      throw std::runtime_error{"train PoolLayer: " + std::string(e.what())};
-    }
-    assert(back_prop_out != nullptr);
-
-    // maxpool baackward
-    auto arg_max_index = _arg_max_index.get();
-    nnfw::cker::train::MaxPool2DGrad(getShape(back_prop_out), getBuffer<float>(back_prop_out),
-                                     getBuffer<int>(arg_max_index), getShape(back_prop_in),
-                                     getBuffer<float>(back_prop_in));
+    throw std::runtime_error{"train PoolLayer: " + std::string(e.what())};
   }
-};
+  assert(back_prop_out != nullptr);
 
-} // namespace
+  // maxpool baackward
+  auto arg_max_index = _arg_max_index;
+  nnfw::cker::train::MaxPool2DGrad(getShape(back_prop_out), getBuffer<float>(back_prop_out),
+                                   getBuffer<int>(arg_max_index), getShape(back_prop_in),
+                                   getBuffer<float>(back_prop_in));
+}
+}; // namespace ops
+
+} // namespace train
 
 PoolLayer::PoolLayer()
   : cpu::ops::PoolLayer(), _back_prop_input(nullptr), _back_prop_output(nullptr), _kernel(nullptr)
@@ -149,6 +161,8 @@ void PoolLayer::configureBackward(const uint32_t paddingLeft, const uint32_t pad
   }
 }
 
+ExtraTensorRequests PoolLayer::requestExtraTensors() { return _kernel->requestExtraTensors(); }
+
 void PoolLayer::forward(bool training)
 {
   if (training)
@@ -163,7 +177,7 @@ void PoolLayer::forward(bool training)
 
 void PoolLayer::backward() { _kernel->backward(_back_prop_output, _back_prop_input); }
 
-} // namespace ops
-} // namespace train
+} // namespace backend
+} // namespace onert
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/ops/PoolLayer.h b/runtime/onert/backend/train/ops/PoolLayer.h
index 5ced951ae6a..238e5afe77e 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.h
+++ b/runtime/onert/backend/train/ops/PoolLayer.h
@@ -38,6 +38,8 @@ class TrainingKernelRegistry
 public:
   virtual void forward(const IPortableTensor *in, IPortableTensor *out) = 0;
   virtual void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) = 0;
+  virtual ExtraTensorRequests requestExtraTensors() = 0;
+
   TrainingKernelRegistry() = default;
   virtual ~TrainingKernelRegistry() = default;
 };
@@ -61,6 +63,7 @@ class PoolLayer : public ::onert::exec::train::ITrainableFunction, public cpu::o
                          IPortableTensor *output, IPortableTensor *back_prop_input,
                          const IPortableTensor *back_prop_output);
 
+  ExtraTensorRequests requestExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
diff --git a/runtime/onert/core/include/backend/train/ExtraTensor.h b/runtime/onert/core/include/backend/train/ExtraTensor.h
new file mode 100644
index 00000000000..20e2bb32e24
--- /dev/null
+++ b/runtime/onert/core/include/backend/train/ExtraTensor.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_EXTRA_H__
+#define __ONERT_BACKEND_EXTRA_H__
+
+#include <backend/basic/Tensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+// Q: Is this renaming is necessary?
+// ExtraTensor means that a tensor accessed by only one specific operation layer.
+class ExtraTensor final : public basic::Tensor
+{
+public:
+  ExtraTensor() = delete;
+
+public:
+  ExtraTensor(const ir::OperandInfo &info) : basic::Tensor(info, nullptr)
+  {
+    // DO NOTHING
+  }
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_EXTRA_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/train/ExtraTensorRequest.h b/runtime/onert/core/include/backend/train/ExtraTensorRequest.h
new file mode 100644
index 00000000000..25d912fd908
--- /dev/null
+++ b/runtime/onert/core/include/backend/train/ExtraTensorRequest.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_EXTRA_TENSOR_REQUEST_H__
+#define __ONERT_BACKEND_EXTRA_TENSOR_REQUEST_H__
+
+#include "backend/train/ExtraTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+enum class ExtraTensorLifeTime
+{
+  BACKWARD,            // alive during backward()
+  FORWARD_TO_BACKWARD, // alive from forward to backward()
+};
+
+class ExtraTensorRequest
+{
+
+public:
+  ExtraTensorRequest(ir::OperandInfo info, ExtraTensorLifeTime lt,
+                     backend::train::ExtraTensor **addr)
+    : info(info), lifetime(lt), address(addr)
+  {
+  }
+
+  static ExtraTensorRequest createLike(const IPortableTensor *origin,
+                                       backend::train::ExtraTensor **const addr)
+  {
+    assert(origin != nullptr);
+    assert(addr != nullptr);
+
+    return ExtraTensorRequest(origin->get_info(), ExtraTensorLifeTime::BACKWARD, addr);
+  }
+
+public:
+  ir::OperandInfo info;
+  ExtraTensorLifeTime lifetime;
+  backend::train::ExtraTensor **address;
+};
+
+using ExtraTensorRequests = std::vector<ExtraTensorRequest>;
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_EXTRA_TENSOR_REQUEST_H__
diff --git a/runtime/onert/core/include/exec/train/ITrainableFunction.h b/runtime/onert/core/include/exec/train/ITrainableFunction.h
index 45adc258f68..1d1271186f2 100644
--- a/runtime/onert/core/include/exec/train/ITrainableFunction.h
+++ b/runtime/onert/core/include/exec/train/ITrainableFunction.h
@@ -18,6 +18,7 @@
 #define __ONERT_EXEC_TRAIN_I_TRAINABLE_FUNCTION_H__
 
 #include <cstdint>
+#include <backend/train/ExtraTensorRequest.h>
 
 namespace onert
 {
@@ -26,12 +27,21 @@ namespace exec
 namespace train
 {
 
+// Q: function 'extraExtraTensors' is not PURE virutal function, If so, Do we need to change this
+// class name? ITrainableFunction -> TrainableFunction
 class ITrainableFunction
 {
 public:
   virtual ~ITrainableFunction() = default;
   virtual void forward(bool training) = 0;
   virtual void backward() = 0;
+
+  // Implement this if extra tensor is needed
+  virtual backend::train::ExtraTensorRequests requestExtraTensors()
+  {
+    backend::train::ExtraTensorRequests r;
+    return r;
+  }
 };
 
 } // namespace train