diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc
index 59fee712247..b903fd09faf 100644
--- a/runtime/onert/backend/train/BackendContext.cc
+++ b/runtime/onert/backend/train/BackendContext.cc
@@ -16,12 +16,14 @@
 
 #include "BackendContext.h"
 
+#include "ExtraTensorGenerator.h"
 #include "TensorBuilder.h"
 #include "TensorPlanner.h"
 #include "KernelGenerator.h"
 #include "ops/BackPropInitializer.h"
 
 #include <backend/basic/train/TrainableBackendContextHelpers.h>
+#include <ir/train/ITrainableOperation.h>
 #include <misc/polymorphic_downcast.h>
 
 #include <cassert>
@@ -179,6 +181,32 @@ FunctionMap BackendContext::gen()
   //   fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
   // }
 
+  ExtraTensorGenerator extra_tensor_gen(trainable_graph(), _tensor_builder, _tensor_registry);
+
+  const auto &ops = trainable_graph()->operations();
+
+  for (auto &pair : fn_map)
+  {
+    auto &op_idx = pair.first;
+    auto &fn_seq = pair.second;
+
+    const ir::IOperation *op = &ops.at(op_idx);
+    const auto trainable_op = dynamic_cast<const ir::train::TrainableOperation *>(op);
+    assert(trainable_op != nullptr);
+
+    if (not trainable_op->isRequiredForBackward())
+      continue;
+
+    VERBOSE(ExtraTensor) << "register tensor for " << trainable_op->name() << std::endl;
+
+    fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
+      extra_tensor_gen.register_tensors(op_idx, (&fn)->registerExtraTensors());
+    });
+  }
+  extra_tensor_gen.plan();
+  extra_tensor_gen.allocate();
+
+
   return fn_map;
 }
 
diff --git a/runtime/onert/backend/train/ExtraTensorGenerator.cc b/runtime/onert/backend/train/ExtraTensorGenerator.cc
new file mode 100644
index 00000000000..63dbca4d7a5
--- /dev/null
+++ b/runtime/onert/backend/train/ExtraTensorGenerator.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExtraTensorGenerator.h"
+
+#include "ExtraTensorIndex.h"
+
+#include <ir/Operations.h>
+#include <util/logging.h>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+ExtraTensorGenerator::ExtraTensorGenerator(const ir::train::TrainableGraph *tgraph,
+                                           std::shared_ptr<TensorBuilder> &tensor_builder,
+                                           std::shared_ptr<ITensorRegistry> &tensor_registry)
+  : _tgraph(tgraph), _tensor_builder(tensor_builder)
+{
+  _tensor_reg = std::dynamic_pointer_cast<TensorRegistry>(tensor_registry);
+}
+
+// Move to BackendContext.cc
+void ExtraTensorGenerator::register_tensors(ir::OperationIndex op_idx,
+                                            std::optional<ExtraTensors> &&tensors)
+{
+  if (not tensors.has_value())
+    return;
+
+  auto extra_tensors = tensors.value();
+
+  auto &operations = _tgraph->operations();
+
+  for (size_t i = 0; i < extra_tensors.size(); i++)
+  {
+    // register tensor
+    ExtraTensorIndex tensor_idx(op_idx, i);
+    _tensor_builder->registerExtraTensor(tensor_idx, extra_tensors[i]);
+
+    std::stringstream op_info;
+    op_info << op_idx << "_" << operations.at(op_idx).name();
+    VERBOSE(ExtraTensorGenerator) << "register (idx:" << tensor_idx << ") requested from "
+                                  << op_info.str() << std::endl;
+  }
+  return;
+}
+
+ExtraTensors ExtraTensorGenerator::getExtraTensors(const ir::OperationIndex &op_index)
+{
+  ExtraTensors tensors;
+
+  int sub_index = 0;
+
+  ExtraTensorIndex index(op_index, sub_index);
+  auto tensor = _tensor_reg->getExtraTensor(index);
+
+  while (tensor != nullptr)
+  {
+    sub_index++;
+    tensors.push_back(tensor);
+
+    ExtraTensorIndex index(op_index, sub_index);
+    tensor = _tensor_reg->getExtraTensor(index);
+
+    VERBOSE(ExtraTensorGenerator) << "HERE " << op_index << "+" << sub_index << std::endl;
+  }
+
+  return tensors;
+}
+
+// Move to TensorPlanner
+void ExtraTensorGenerator::plan()
+{
+  // forwarding order
+  const auto f_order = _tgraph->topolSortOperations();
+  for (const auto &op_index : f_order)
+  {
+    auto tensors = getExtraTensors(op_index);
+    for (auto i = 0u; i < tensors.size(); ++i)
+    {
+      const auto &lt = tensors[i]->lifetime();
+      if (lt == ExtraTensorLifeTime::FORWARD_TO_BACKWARD)
+        _tensor_builder->notifyFirstUse(ExtraTensorIndex(op_index, i));
+    }
+  }
+
+  // backwarding order
+  const auto b_order = _tgraph->essentialBackwardOrder();
+  for (const auto &op_index : b_order)
+  {
+
+    auto tensors = getExtraTensors(op_index);
+    for (auto i = 0u; i < tensors.size(); ++i)
+    {
+      const auto &lt = tensors[i]->lifetime();
+      if (lt == ExtraTensorLifeTime::BACKWARD)
+        _tensor_builder->notifyFirstUse(ExtraTensorIndex(op_index, i));
+    }
+
+    for (auto i = 0u; i < tensors.size(); ++i)
+    {
+      const auto &lt = tensors[i]->lifetime();
+      if (lt == ExtraTensorLifeTime::FORWARD_TO_BACKWARD || lt == ExtraTensorLifeTime::BACKWARD)
+        _tensor_builder->notifyLastUse(ExtraTensorIndex(op_index, i));
+    }
+  }
+}
+
+// Move to allocateBackward()
+void ExtraTensorGenerator::allocate() { _tensor_builder->allocateExtra(); }
+
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ExtraTensorGenerator.h b/runtime/onert/backend/train/ExtraTensorGenerator.h
new file mode 100644
index 00000000000..063f52e5d66
--- /dev/null
+++ b/runtime/onert/backend/train/ExtraTensorGenerator.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__
+#define __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__
+
+#include <ir/train/TrainableGraph.h>
+#include <ir/Index.h>
+#include <backend/train/ExtraTensor.h>
+
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class ExtraTensorGenerator
+{
+public:
+  ExtraTensorGenerator() = delete;
+
+  ExtraTensorGenerator(const ir::train::TrainableGraph *tgraph,
+                       std::shared_ptr<TensorBuilder> &tensor_builder,
+                       std::shared_ptr<ITensorRegistry> &tensor_registry);
+
+public:
+  // Since register is reserved keyword, use 'register_tensors' intead of 'register'
+  void register_tensors(ir::OperationIndex idx, std::optional<ExtraTensors> &&tensors);
+  void plan();
+  void allocate();
+
+private:
+  ExtraTensors getExtraTensors(const ir::OperationIndex &op_index);
+
+  const ir::train::TrainableGraph *_tgraph;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__
diff --git a/runtime/onert/backend/train/MemoryManager.cc b/runtime/onert/backend/train/MemoryManager.cc
index 4902e2a7eaa..7948d9af205 100644
--- a/runtime/onert/backend/train/MemoryManager.cc
+++ b/runtime/onert/backend/train/MemoryManager.cc
@@ -17,6 +17,7 @@
 #include "MemoryManager.h"
 
 #include "MemoryPlannerFactory.h"
+#include "ExtraTensorIndex.h"
 
 #include <util/ConfigSource.h>
 
@@ -53,46 +54,53 @@ uint8_t *TrainableMemoryManager::getOptVarBuffer(const ir::OperandIndex &ind,
   return _var_mem_alloc->base() + var_offset + mem_blk.offset;
 }
 
-DisposableMemoryManager::DisposableMemoryManager() : _mem_planner{createMemoryPlanner()}
+template <typename Index>
+TrainMemoryManager<Index>::TrainMemoryManager() : _mem_planner{createMemoryPlanner()}
 {
   // DO NOTHING
 }
 
-basic::IMemoryPlanner<DisposableTensorIndex> *DisposableMemoryManager::createMemoryPlanner()
+template <typename Index>
+basic::IMemoryPlanner<Index> *TrainMemoryManager<Index>::createMemoryPlanner()
 {
   auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
-  return MemoryPlannerFactory::get().create(planner_id);
+  return MemoryPlannerFactory<Index>::get().create(planner_id);
 }
 
-basic::IMemoryPlanner<DisposableTensorIndex> *
-DisposableMemoryManager::createMemoryPlanner(const std::string planner_id)
+template <typename Index>
+basic::IMemoryPlanner<Index> *
+TrainMemoryManager<Index>::createMemoryPlanner(const std::string planner_id)
 {
-  return MemoryPlannerFactory::get().create(planner_id);
+  return MemoryPlannerFactory<Index>::get().create(planner_id);
 }
 
-void DisposableMemoryManager::claimPlan(const DisposableTensorIndex &ind, uint32_t size)
+template <typename Index> void TrainMemoryManager<Index>::claimPlan(const Index &ind, uint32_t size)
 {
   _mem_planner->claim(ind, size);
 }
 
-void DisposableMemoryManager::releasePlan(const DisposableTensorIndex &ind)
+template <typename Index> void TrainMemoryManager<Index>::releasePlan(const Index &ind)
 {
   _mem_planner->release(ind);
 }
 
-void DisposableMemoryManager::allocate(void)
+template <typename Index> void TrainMemoryManager<Index>::allocate(void)
 {
   _mem_alloc = std::make_shared<basic::Allocator>(_mem_planner->capacity());
   assert(_mem_alloc->base());
 }
 
-uint8_t *DisposableMemoryManager::getBuffer(const DisposableTensorIndex &ind) const
+template <typename Index> uint8_t *TrainMemoryManager<Index>::getBuffer(const Index &ind) const
 {
   assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
   const auto &mem_blk = _mem_planner->memory_plans().at(ind);
   return _mem_alloc->base() + mem_blk.offset;
 }
 
+// Instatiation
+template class TrainMemoryManager<DisposableTensorIndex>;
+template class TrainMemoryManager<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryManager.h b/runtime/onert/backend/train/MemoryManager.h
index 19a60e32deb..b15de09d239 100644
--- a/runtime/onert/backend/train/MemoryManager.h
+++ b/runtime/onert/backend/train/MemoryManager.h
@@ -20,6 +20,7 @@
 #include <backend/basic/MemoryManager.h>
 
 #include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 
 namespace onert
 {
@@ -44,29 +45,33 @@ class TrainableMemoryManager : public MemoryManager
   uint32_t _optim_vars_count;
 };
 
-class DisposableMemoryManager
+// TODO: Find a better name
+template <typename Index> class TrainMemoryManager
 {
 public:
-  DisposableMemoryManager();
+  TrainMemoryManager();
 
   void allocate(void);
-  uint8_t *getBuffer(const DisposableTensorIndex &ind) const;
+  uint8_t *getBuffer(const Index &ind) const;
   void deallocate(void) { _mem_alloc->release(); }
 
-  void claimPlan(const DisposableTensorIndex &ind, uint32_t size);
-  void releasePlan(const DisposableTensorIndex &ind);
+  void claimPlan(const Index &ind, uint32_t size);
+  void releasePlan(const Index &ind);
 
   std::shared_ptr<basic::Allocator> getMemAlloc() { return _mem_alloc; }
 
 private:
-  basic::IMemoryPlanner<DisposableTensorIndex> *createMemoryPlanner();
-  basic::IMemoryPlanner<DisposableTensorIndex> *createMemoryPlanner(const std::string planner_id);
+  basic::IMemoryPlanner<Index> *createMemoryPlanner();
+  basic::IMemoryPlanner<Index> *createMemoryPlanner(const std::string planner_id);
 
 private:
-  std::shared_ptr<basic::IMemoryPlanner<DisposableTensorIndex>> _mem_planner;
+  std::shared_ptr<basic::IMemoryPlanner<Index>> _mem_planner;
   std::shared_ptr<basic::Allocator> _mem_alloc;
 };
 
+using DisposableMemoryManager = TrainMemoryManager<DisposableTensorIndex>;
+using ExtraMemoryManager = TrainMemoryManager<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryPlanner.cc b/runtime/onert/backend/train/MemoryPlanner.cc
index ea385558e28..e60e9f99948 100644
--- a/runtime/onert/backend/train/MemoryPlanner.cc
+++ b/runtime/onert/backend/train/MemoryPlanner.cc
@@ -15,6 +15,8 @@
  */
 
 #include "MemoryPlanner.h"
+#include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 
 #include <util/logging.h>
 
@@ -27,7 +29,7 @@ namespace backend
 namespace train
 {
 
-void BumpPlanner::claim(const DisposableTensorIndex &ind, size_t size)
+template <typename Index> void BumpPlanner<Index>::claim(const Index &ind, size_t size)
 {
   basic::Block blk{_capacity, size};
   _mem_plans[ind] = blk;
@@ -36,7 +38,7 @@ void BumpPlanner::claim(const DisposableTensorIndex &ind, size_t size)
   VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl;
 }
 
-void BumpPlanner::release(const DisposableTensorIndex &ind)
+template <typename Index> void BumpPlanner<Index>::release(const Index &ind)
 {
   VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): "
                       << "NOTHING does" << std::endl;
@@ -56,7 +58,7 @@ void BumpPlanner::release(const DisposableTensorIndex &ind)
 //       point in time, it means the place at the offset can be claimed.
 // 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
 //    the previous claim_base_offset.
-void FirstFitPlanner::claim(const DisposableTensorIndex &ind, size_t size)
+template <typename Index> void FirstFitPlanner<Index>::claim(const Index &ind, size_t size)
 {
   // Find the right position for claiming
   uint32_t next_offset = 0;
@@ -88,7 +90,7 @@ void FirstFitPlanner::claim(const DisposableTensorIndex &ind, size_t size)
   }
 }
 
-void FirstFitPlanner::release(const DisposableTensorIndex &ind)
+template <typename Index> void FirstFitPlanner<Index>::release(const Index &ind)
 {
   for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
   {
@@ -107,14 +109,15 @@ void FirstFitPlanner::release(const DisposableTensorIndex &ind)
   assert(!"Cannot release for given index. It has been not claimed or released already.");
 }
 
-WICPlanner::WICPlanner()
+template <typename Index>
+WICPlanner<Index>::WICPlanner()
   : _initialized(false), _capacity(0), _mem_plans(), _live_indices(), _interference_graph(),
     _indices()
 {
   // DO NOTHING
 }
 
-void WICPlanner::claim(const DisposableTensorIndex &ind, size_t size)
+template <typename Index> void WICPlanner<Index>::claim(const Index &ind, size_t size)
 {
   _indices.emplace(size, ind);
   _interference_graph[ind].insert(_interference_graph[ind].end(), _live_indices.cbegin(),
@@ -128,7 +131,7 @@ void WICPlanner::claim(const DisposableTensorIndex &ind, size_t size)
   VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl;
 }
 
-void WICPlanner::release(const DisposableTensorIndex &ind)
+template <typename Index> void WICPlanner<Index>::release(const Index &ind)
 {
   _live_indices.erase(ind);
   VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl;
@@ -143,7 +146,7 @@ void WICPlanner::release(const DisposableTensorIndex &ind)
  * 3. Allocate memory block for sorted operands
  *   - Find free memory block which does not overlap with interfered operands
  */
-void WICPlanner::buildMemoryPlans()
+template <typename Index> void WICPlanner<Index>::buildMemoryPlans()
 {
   for (const auto &[size, ind] : _indices)
   {
@@ -194,13 +197,22 @@ void WICPlanner::buildMemoryPlans()
   _indices.clear();
 }
 
-std::unordered_map<DisposableTensorIndex, basic::Block> &WICPlanner::memory_plans()
+template <typename Index> typename WICPlanner<Index>::MemoryPlans &WICPlanner<Index>::memory_plans()
 {
   if (!_initialized)
     buildMemoryPlans();
   return _mem_plans;
 }
 
+template class BumpPlanner<DisposableTensorIndex>;
+template class BumpPlanner<ExtraTensorIndex>;
+
+template class FirstFitPlanner<DisposableTensorIndex>;
+template class FirstFitPlanner<ExtraTensorIndex>;
+
+template class WICPlanner<DisposableTensorIndex>;
+template class WICPlanner<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryPlanner.h b/runtime/onert/backend/train/MemoryPlanner.h
index 181dd5e6979..5e3f48e02f8 100644
--- a/runtime/onert/backend/train/MemoryPlanner.h
+++ b/runtime/onert/backend/train/MemoryPlanner.h
@@ -24,13 +24,14 @@
 
 #include <backend/basic/IMemoryPlanner.h>
 
-#include "DisposableTensorIndex.h"
-
 #include <map>
 #include <vector>
 #include <unordered_set>
 #include <memory>
 
+#include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
+
 namespace onert
 {
 namespace backend
@@ -41,20 +42,22 @@ namespace train
 /**
  * @brief Class to plan memory by bump way
  */
-class BumpPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
+template <typename Index> class BumpPlanner : public basic::IMemoryPlanner<Index>
 {
+  using MemoryPlans = typename basic::IMemoryPlanner<Index>::MemoryPlans;
+
 public:
   /**
    * @brief Claim memory for tensor by bump way
    * @param[in] index The tensor index
    * @param[in] size The size of the memory
    */
-  void claim(const DisposableTensorIndex &, size_t) override;
+  void claim(const Index &, size_t) override;
   /**
    * @brief Release memory for tensor by bump way
    * @param[in] index The tensor index
    */
-  void release(const DisposableTensorIndex &) override;
+  void release(const Index &) override;
   /**
    * @brief Get capacity for memory planning
    * @return The value of capacity
@@ -74,20 +77,22 @@ class BumpPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
 /**
  * @brief Class to plan memory by firstfit way
  */
-class FirstFitPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
+template <typename Index> class FirstFitPlanner : public basic::IMemoryPlanner<Index>
 {
+  using MemoryPlans = typename basic::IMemoryPlanner<Index>::MemoryPlans;
+
 public:
   /**
    * @brief Claim memory for tensor by firstfit way
    * @param[in] index The tensor index
    * @param[in] size The size of the memory
    */
-  void claim(const DisposableTensorIndex &, size_t) override;
+  void claim(const Index &, size_t) override;
   /**
    * @brief Release memory for tensor by firstfit way
    * @param[in] index The tensor index
    */
-  void release(const DisposableTensorIndex &) override;
+  void release(const Index &) override;
   /**
    * @brief Get capacity for memory planning
    * @return The value of capacity
@@ -103,14 +108,17 @@ class FirstFitPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
   uint32_t _capacity = 0;
   MemoryPlans _mem_plans;
   // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
-  std::map<uint32_t, DisposableTensorIndex> _claim_table;
+  std::map<uint32_t, Index> _claim_table;
 };
 
 /**
  * @brief Class to plan memory by Weighted Interval Color algorithm
  */
-class WICPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
+template <typename Index> class WICPlanner : public basic::IMemoryPlanner<Index>
 {
+public:
+  using MemoryPlans = typename basic::IMemoryPlanner<Index>::MemoryPlans;
+
 public:
   WICPlanner();
 
@@ -119,12 +127,12 @@ class WICPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
    * @param[in] index The tensor index
    * @param[in] size The size of the memory
    */
-  void claim(const DisposableTensorIndex &, size_t) override;
+  void claim(const Index &, size_t) override;
   /**
    * @brief Release memory for tensor by WIC algorithm
    * @param[in] index The tensor index
    */
-  void release(const DisposableTensorIndex &) override;
+  void release(const Index &) override;
   /**
    * @brief Get capacity for memory planning
    * @return The value of capacity
@@ -147,10 +155,10 @@ class WICPlanner : public basic::IMemoryPlanner<DisposableTensorIndex>
   bool _initialized;
   uint32_t _capacity;
   MemoryPlans _mem_plans;
-  std::unordered_set<DisposableTensorIndex> _live_indices;
-  DisposableTensorIndexMap<std::vector<DisposableTensorIndex>> _interference_graph;
+  std::unordered_set<Index> _live_indices;
+  std::unordered_map<Index, std::vector<Index>> _interference_graph;
   // Sort tensors by descending order of size
-  std::multimap<uint32_t, DisposableTensorIndex, std::greater<uint32_t>> _indices;
+  std::multimap<uint32_t, Index, std::greater<uint32_t>> _indices;
 };
 
 } // namespace train
diff --git a/runtime/onert/backend/train/MemoryPlanner.test.cc b/runtime/onert/backend/train/MemoryPlanner.test.cc
index 8978607706f..15b9dc15693 100644
--- a/runtime/onert/backend/train/MemoryPlanner.test.cc
+++ b/runtime/onert/backend/train/MemoryPlanner.test.cc
@@ -16,6 +16,7 @@
 
 #include <gtest/gtest.h>
 
+#include "DisposableTensorIndex.h"
 #include "MemoryPlanner.h"
 #include "ir/Index.h"
 
@@ -25,7 +26,7 @@ using onert::ir::OperationIndex;
 
 TEST(BumpPlanner, claim_test)
 {
-  BumpPlanner planner;
+  BumpPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -55,7 +56,7 @@ TEST(BumpPlanner, claim_test)
 
 TEST(FirstFitPlanner, claim_release_test)
 {
-  FirstFitPlanner planner;
+  FirstFitPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -148,7 +149,7 @@ TEST(FirstFitPlanner, claim_release_test)
 
 TEST(FirstFitPlanner, neg_release_non_existing_index)
 {
-  FirstFitPlanner planner;
+  FirstFitPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -184,7 +185,7 @@ TEST(FirstFitPlanner, neg_release_non_existing_index)
 
 TEST(FirstFitPlanner, neg_release_twice)
 {
-  FirstFitPlanner planner;
+  FirstFitPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size,
                           uint32_t expected_offset) {
@@ -223,7 +224,7 @@ TEST(FirstFitPlanner, neg_release_twice)
 
 TEST(WICPlanner, claim_release_test)
 {
-  WICPlanner planner;
+  WICPlanner<DisposableTensorIndex> planner;
 
   auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size) {
     DisposableTensorIndex mem_idx{OperationIndex{op_index}, OperandIndex{operand_index}};
diff --git a/runtime/onert/backend/train/MemoryPlannerFactory.cc b/runtime/onert/backend/train/MemoryPlannerFactory.cc
index acfa44e3511..e1e80119213 100644
--- a/runtime/onert/backend/train/MemoryPlannerFactory.cc
+++ b/runtime/onert/backend/train/MemoryPlannerFactory.cc
@@ -16,6 +16,8 @@
 
 #include "MemoryPlannerFactory.h"
 
+#include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 namespace onert
 {
 namespace backend
@@ -23,29 +25,47 @@ namespace backend
 namespace train
 {
 
-MemoryPlannerFactory &MemoryPlannerFactory::get()
+template <typename Index> MemoryPlannerFactory<Index> &MemoryPlannerFactory<Index>::get()
 {
-  static MemoryPlannerFactory instance;
+  static MemoryPlannerFactory<Index> instance;
   return instance;
 }
 
-basic::IMemoryPlanner<DisposableTensorIndex> *MemoryPlannerFactory::create(const std::string &key)
+template <typename Index>
+basic::IMemoryPlanner<Index> *MemoryPlannerFactory<Index>::create(const std::string &key)
 {
   if (key == "FirstFit")
   {
-    return new FirstFitPlanner;
+    return new FirstFitPlanner<Index>();
   }
   else if (key == "Bump")
   {
-    return new BumpPlanner;
+    return new BumpPlanner<Index>();
   }
   else if (key == "WIC")
   {
-    return new WICPlanner;
+    return new WICPlanner<Index>();
   }
-  return new FirstFitPlanner; // Default Planner
+  return new FirstFitPlanner<Index>(); // Default Planner
 }
 
+// is this necessary?
+/**
+/usr/bin/ld: libbackend_train.so: undefined reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::DisposableTensorIndex>::create(std::__cxx11::basic_string<char,
+std::char_traits<char>, std::allocator<char> > const&)' /usr/bin/ld: libbackend_train.so: undefined
+reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::ExtraTensorIndex>::create(std::__cxx11::basic_string<char,
+std::char_traits<char>, std::allocator<char> > const&)' /usr/bin/ld: libbackend_train.so: undefined
+reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::DisposableTensorIndex>::get()'
+/usr/bin/ld: libbackend_train.so: undefined reference to
+`onert::backend::train::MemoryPlannerFactory<onert::backend::train::ExtraTensorIndex>::get()'
+collect2: error: ld returned 1 exit status
+ */
+template class MemoryPlannerFactory<DisposableTensorIndex>;
+template class MemoryPlannerFactory<ExtraTensorIndex>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/MemoryPlannerFactory.h b/runtime/onert/backend/train/MemoryPlannerFactory.h
index d1609e17559..7f42be46f90 100644
--- a/runtime/onert/backend/train/MemoryPlannerFactory.h
+++ b/runtime/onert/backend/train/MemoryPlannerFactory.h
@@ -28,7 +28,7 @@ namespace backend
 namespace train
 {
 
-class MemoryPlannerFactory
+template <typename Index> class MemoryPlannerFactory
 {
 public:
   static MemoryPlannerFactory &get();
@@ -38,7 +38,7 @@ class MemoryPlannerFactory
 
 public:
   // Currently, only the memory planner for DisposableTensor is supported
-  basic::IMemoryPlanner<DisposableTensorIndex> *create(const std::string &key);
+  basic::IMemoryPlanner<Index> *create(const std::string &key);
 };
 
 } // namespace train
diff --git a/runtime/onert/backend/train/TensorBuilder.cc b/runtime/onert/backend/train/TensorBuilder.cc
index ee737222be2..3b2d6f40b9a 100644
--- a/runtime/onert/backend/train/TensorBuilder.cc
+++ b/runtime/onert/backend/train/TensorBuilder.cc
@@ -17,6 +17,9 @@
 #include "TensorBuilder.h"
 
 #include "Tensor.h"
+#include "ExtraTensorIndex.h"
+
+#include <backend/train/ExtraTensor.h>
 
 namespace onert
 {
@@ -95,6 +98,12 @@ void TensorBuilder::registerDisposableBackwardTensorInfo(const DisposableTensorI
   _disposable_backprops.add(index);
 }
 
+void TensorBuilder::registerExtraTensor(const ExtraTensorIndex &index,
+                                        std::shared_ptr<ExtraTensor> &tensor)
+{
+  _tensor_reg->setExtraTensor(index, tensor);
+}
+
 void TensorBuilder::notifyFirstUse(const ir::OperandIndex &index)
 {
   // TODO Support momory plan
@@ -155,6 +164,16 @@ void TensorBuilder::notifyDisposableBackPropLastUse(const DisposableTensorIndex
   _tensor_mgr->releaseDisposableBackPropPlan(index);
 }
 
+void TensorBuilder::notifyFirstUse(const ExtraTensorIndex &index)
+{
+  _tensor_mgr->claimExtraPlan(index);
+}
+
+void TensorBuilder::notifyLastUse(const ExtraTensorIndex &index)
+{
+  _tensor_mgr->releaseExtraPlan(index);
+}
+
 bool TensorBuilder::isRegistered(const ir::OperandIndex &index) const
 {
   return _tensor_info_map.find(index) != _tensor_info_map.end();
@@ -183,6 +202,8 @@ void TensorBuilder::allocateBackward(void)
   _tensor_mgr->allocateDisposableBackPropTensors();
 }
 
+void TensorBuilder::allocateExtra(void) { _tensor_mgr->allocateExtraTensors(); }
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorBuilder.h b/runtime/onert/backend/train/TensorBuilder.h
index 1fa46855142..c62cf1acf03 100644
--- a/runtime/onert/backend/train/TensorBuilder.h
+++ b/runtime/onert/backend/train/TensorBuilder.h
@@ -18,6 +18,7 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__
 
 #include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 #include "TensorManager.h"
 #include "TensorRegistry.h"
 #include "util/Set.h"
@@ -55,6 +56,8 @@ class TensorBuilder
   void registerDisposableBackwardTensorInfo(const DisposableTensorIndex &index,
                                             const ir::OperandInfo &info);
 
+  void registerExtraTensor(const ExtraTensorIndex &index, std::shared_ptr<ExtraTensor> &info);
+
   // TODO Support memory plan of all tensors
   void notifyFirstUse(const ir::OperandIndex &);
   void notifyLastUse(const ir::OperandIndex &);
@@ -62,6 +65,8 @@ class TensorBuilder
   void notifyBackwardLastUse(const ir::OperandIndex &);
   void notifyDisposableBackPropFirstUse(const DisposableTensorIndex &);
   void notifyDisposableBackPropLastUse(const DisposableTensorIndex &);
+  void notifyFirstUse(const ExtraTensorIndex &);
+  void notifyLastUse(const ExtraTensorIndex &);
 
   bool isRegistered(const ir::OperandIndex &) const;
   bool isRegisteredBackward(const ir::OperandIndex &) const;
@@ -69,6 +74,7 @@ class TensorBuilder
 
   void allocate(void);
   void allocateBackward(void);
+  void allocateExtra(void); // < this function will be called after genKernels
 
 private:
   const std::shared_ptr<TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/backend/train/TensorManager.cc b/runtime/onert/backend/train/TensorManager.cc
index d8404fcc9ed..675d11759b1 100644
--- a/runtime/onert/backend/train/TensorManager.cc
+++ b/runtime/onert/backend/train/TensorManager.cc
@@ -58,7 +58,8 @@ TensorManager::TensorManager(const std::shared_ptr<TensorRegistry> &reg, uint32_
     _trainable_mgr{new TrainableMemoryManager(optim_vars_count)},
     _back_prop_mgr{new MemoryManager()}, _gradient_mgr{new MemoryManager()},
     // TODO Find a suitable planner of disposable tensors to reduce peak memory usage
-    _disposable_back_prop_mgr{new DisposableMemoryManager()}, _tensors{reg}
+    _disposable_back_prop_mgr{new DisposableMemoryManager()}, _extra_mgr{new ExtraMemoryManager()},
+    _tensors{reg}
 {
   // DO NOTHING
 }
@@ -115,6 +116,11 @@ void TensorManager::claimNonConstPlan(const ir::OperandIndex &index)
   _nonconst_mgr->claimPlan(index, size);
 }
 
+void TensorManager::allocateExtraTensors()
+{
+  allocateMemory(_extra_mgr.get(), _tensors->extra_tensors(), std::string{"EXTRA TENSOR "});
+}
+
 void TensorManager::releaseNonConstPlan(const ir::OperandIndex &index)
 {
   assert(_tensors->getNonConstTensor(index) && !_tensors->getNonConstTensor(index)->is_dynamic());
@@ -187,6 +193,19 @@ void TensorManager::releaseDisposableBackPropPlan(const DisposableTensorIndex &i
   _disposable_back_prop_mgr->releasePlan(index);
 }
 
+void TensorManager::claimExtraPlan(const ExtraTensorIndex &index)
+{
+  const auto tensor = _tensors->getExtraTensor(index);
+
+  auto size = alignedSize(tensor->total_size(), _align);
+  _extra_mgr->claimPlan(index, size);
+}
+
+void TensorManager::releaseExtraPlan(const ExtraTensorIndex &index)
+{
+  _extra_mgr->releasePlan(index);
+}
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/train/TensorManager.h b/runtime/onert/backend/train/TensorManager.h
index 6e0910e182d..2da8420f26b 100644
--- a/runtime/onert/backend/train/TensorManager.h
+++ b/runtime/onert/backend/train/TensorManager.h
@@ -18,6 +18,7 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_MANAGER_H__
 
 #include "DisposableTensorIndex.h"
+#include "ExtraTensorIndex.h"
 #include "MemoryManager.h"
 #include "TensorRegistry.h"
 
@@ -49,6 +50,8 @@ class TensorManager
   void allocateBackPropTensors();
   void allocateGradientTensors();
   void allocateDisposableBackPropTensors();
+  void allocateExtraTensors();
+
   // TODO Add member functions to deallocate tensors
 
   void claimNonConstPlan(const ir::OperandIndex &ind);
@@ -61,6 +64,8 @@ class TensorManager
   void releaseGradientPlan(const ir::OperandIndex &ind);
   void claimDisposableBackPropPlan(const DisposableTensorIndex &ind);
   void releaseDisposableBackPropPlan(const DisposableTensorIndex &ind);
+  void claimExtraPlan(const ExtraTensorIndex &ind);
+  void releaseExtraPlan(const ExtraTensorIndex &ind);
 
 private:
   std::unique_ptr<MemoryManager> _nonconst_mgr;
@@ -68,6 +73,7 @@ class TensorManager
   std::unique_ptr<MemoryManager> _back_prop_mgr;
   std::unique_ptr<MemoryManager> _gradient_mgr;
   std::unique_ptr<DisposableMemoryManager> _disposable_back_prop_mgr;
+  std::unique_ptr<ExtraMemoryManager> _extra_mgr;
   const std::shared_ptr<TensorRegistry> _tensors;
 };
 
diff --git a/runtime/onert/backend/train/TensorRegistry.h b/runtime/onert/backend/train/TensorRegistry.h
index 13932199a9d..643695d61b0 100644
--- a/runtime/onert/backend/train/TensorRegistry.h
+++ b/runtime/onert/backend/train/TensorRegistry.h
@@ -18,6 +18,7 @@
 #define __ONERT_BACKEND_TRAIN_TENSOR_REGISTRY__
 
 #include <backend/train/ITensorRegistry.h>
+#include <backend/train/ExtraTensor.h>
 
 #include "DisposableTensorIndex.h"
 #include "Tensor.h"
@@ -60,9 +61,35 @@ class TensorRegistry
     return _disposable_back_prop;
   }
 
+  std::shared_ptr<ExtraTensor> getExtraTensor(const ExtraTensorIndex &index)
+  {
+    auto itr = _extra.find(index);
+    if (itr != _extra.end())
+      return itr->second;
+
+    return nullptr;
+  }
+
+  void setExtraTensor(const ExtraTensorIndex &index, std::shared_ptr<ExtraTensor> &tensor)
+  {
+    assert(tensor != nullptr);
+    auto itr = _extra.find(index);
+    if (itr != _extra.end())
+      throw std::runtime_error{
+        "Tried to set a extra tensor but another extra tensor already exists."};
+
+    _extra[index] = tensor;
+  }
+
+  const std::unordered_map<ExtraTensorIndex, std::shared_ptr<ExtraTensor>> &extra_tensors()
+  {
+    return _extra;
+  }
+
 private:
   // Disposable Tensors to be accumulated to BackPropTensor
   std::unordered_map<DisposableTensorIndex, std::unique_ptr<BackPropTensor>> _disposable_back_prop;
+  std::unordered_map<ExtraTensorIndex, std::shared_ptr<ExtraTensor>> _extra;
 };
 
 } // namespace train
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
index 3c4ce2f7ce1..a1a19fe56fb 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc
@@ -55,11 +55,22 @@ void BinaryArithmeticLayer::configureBackward(IPortableTensor *back_prop_lhs,
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
-    _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+    _act_back_prop_output = std::make_shared<ExtraTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<ExtraTensors> BinaryArithmeticLayer::registerExtraTensors()
+{
+  ExtraTensors tensors;
+
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return std::optional<ExtraTensors>(tensors);
+}
+
 void BinaryArithmeticLayer::forward(bool) { cpu::ops::BinaryArithmeticLayer::run(); }
 
 void BinaryArithmeticLayer::backward()
diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
index 60d6e8be1cc..598cd3327ca 100644
--- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
+++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h
@@ -50,6 +50,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(IPortableTensor *back_prop_lhs, IPortableTensor *back_prop_rhs,
                          const IPortableTensor *back_prop_output, const ir::Activation activation,
                          const ArithmeticType arithmetic_type);
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -60,7 +61,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction,
 
   ArithmeticType _arithmetic_type;
   ir::Activation _activation;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
index 41ff7fd1c43..28c66c13737 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
@@ -31,7 +31,7 @@ namespace
 using namespace onert;
 
 template <typename Tensor>
-std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *origin_weights)
+std::shared_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *origin_weights)
 {
   const auto &origin_shape = origin_weights->getShape();
   assert(origin_shape.rank() == 4);
@@ -42,7 +42,7 @@ std::unique_ptr<Tensor> createTransposedWeights(const backend::IPortableTensor *
     ir::Shape{origin_shape.dim(1), origin_shape.dim(2), origin_shape.dim(3), origin_shape.dim(0)};
   transposed_info.shape(transposed_shape);
 
-  return std::make_unique<Tensor>(transposed_info);
+  return std::make_shared<Tensor>(transposed_info);
 }
 
 } // namespace
@@ -79,27 +79,30 @@ void ConvolutionLayer::configureBackward(const IPortableTensor *weights,
   if (_dilationHeightFactor != 1 || _dilationWidthFactor != 1)
     throw std::runtime_error("train ConvolutionLayer: Unsupported dilation yet");
 
-  // TODO Optimize transposed tensors
-  _transposed_weights = createTransposedWeights<Tensor>(weights);
-  _transposed_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_weights->total_size()));
+  _transposed_weights = createTransposedWeights<ExtraTensor>(weights);
 
-  _conv_back_prop_output = std::make_unique<BackPropTensor>(back_prop_output->get_info());
-  _conv_back_prop_output->setBuffer(
-    std::make_shared<basic::Allocator>(_conv_back_prop_output->total_size()));
+  _conv_back_prop_output = std::make_shared<ExtraTensor>(back_prop_output->get_info());
 
-  _transposed_grad_weights = createTransposedWeights<GradientTensor>(weights);
-  _transposed_grad_weights->setBuffer(
-    std::make_shared<basic::Allocator>(_transposed_grad_weights->total_size()));
+  _transposed_grad_weights = createTransposedWeights<ExtraTensor>(weights);
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<BackPropTensor>(_back_prop_output->get_info());
-    _act_back_prop_output->setBuffer(
-      std::make_shared<basic::Allocator>(_act_back_prop_output->total_size()));
+    _act_back_prop_output = std::make_unique<ExtraTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<ExtraTensors> ConvolutionLayer::registerExtraTensors()
+{
+  ExtraTensors tensors = {_transposed_weights, _conv_back_prop_output, _transposed_grad_weights};
+
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return std::optional<ExtraTensors>(tensors);
+}
+
 void ConvolutionLayer::forward(bool) { cpu::ops::ConvolutionLayer::run(); }
 void ConvolutionLayer::backward()
 {
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.h b/runtime/onert/backend/train/ops/ConvolutionLayer.h
index ef11f68bf57..6df64a26b41 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.h
@@ -41,6 +41,7 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   void configureBackward(const IPortableTensor *weights, IPortableTensor *back_prop_input,
                          IPortableTensor *grad_weights, IPortableTensor *grad_bias,
                          const IPortableTensor *back_prop_output, const ir::Activation activation);
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -54,10 +55,10 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
   const IPortableTensor *_back_prop_output;
 
   // TODO Consider if these tensors should be built in TensorBuilder
-  std::unique_ptr<Tensor> _transposed_weights;
-  std::unique_ptr<BackPropTensor> _conv_back_prop_output;
-  std::unique_ptr<BackPropTensor> _act_back_prop_output;
-  std::unique_ptr<GradientTensor> _transposed_grad_weights;
+  std::shared_ptr<ExtraTensor> _transposed_weights;
+  std::shared_ptr<ExtraTensor> _conv_back_prop_output;
+  std::shared_ptr<ExtraTensor> _transposed_grad_weights;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
index 9d35655b26f..1e57f795228 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
@@ -28,7 +28,7 @@ namespace
 
 using namespace onert;
 
-std::unique_ptr<backend::train::Tensor>
+std::shared_ptr<backend::train::ExtraTensor>
 createTransposedTensor(const backend::IPortableTensor *origin_tensor)
 {
   const auto &origin_shape = origin_tensor->getShape();
@@ -38,7 +38,7 @@ createTransposedTensor(const backend::IPortableTensor *origin_tensor)
   auto transposed_shape = ir::Shape{origin_shape.dim(1), origin_shape.dim(0)};
   transposed_info.shape(transposed_shape);
 
-  return std::make_unique<backend::train::Tensor>(transposed_info);
+  return std::make_shared<backend::train::ExtraTensor>(transposed_info);
 }
 
 } // namespace
@@ -86,23 +86,28 @@ void FullyConnectedLayer::configureBackward(
       "train FullyConnectedLayer: Input other ranks than 2 are not supported."};
 
   _transposed_weights = createTransposedTensor(weights);
-  _transposed_weights->setBuffer(std::make_shared<basic::Allocator>(weights->total_size()));
 
   _transposed_input = createTransposedTensor(input);
-  _transposed_input->setBuffer(std::make_shared<basic::Allocator>(input->total_size()));
 
   _transposed_back_prop_output = createTransposedTensor(back_prop_output);
-  _transposed_back_prop_output->setBuffer(
-    std::make_shared<basic::Allocator>(back_prop_output->total_size()));
 
   if (activation != ir::Activation::NONE)
   {
-    _act_back_prop_output = std::make_unique<Tensor>(_back_prop_output->get_info());
-    _act_back_prop_output->setBuffer(
-      std::make_shared<basic::Allocator>(_back_prop_output->total_size()));
+    _act_back_prop_output = std::make_shared<ExtraTensor>(_back_prop_output->get_info());
   }
 }
 
+std::optional<ExtraTensors> FullyConnectedLayer::registerExtraTensors()
+{
+  ExtraTensors tensors = {_transposed_weights, _transposed_input, _transposed_back_prop_output};
+  if (_act_back_prop_output != nullptr)
+  {
+    tensors.push_back(_act_back_prop_output);
+  }
+
+  return tensors;
+}
+
 void FullyConnectedLayer::forward(bool) { cpu::ops::FullyConnectedLayer::run(); }
 
 void FullyConnectedLayer::backward()
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.h b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
index 190bfbffe42..5a691cb176b 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
@@ -46,6 +46,7 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
                          const IPortableTensor *back_prop_output, ir::Activation activation,
                          ir::FullyConnectedWeightsFormat weights_format);
 
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
@@ -58,11 +59,10 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction,
   IPortableTensor *_back_prop_input;
   const IPortableTensor *_back_prop_output;
 
-  // TODO Optimize memory
-  std::unique_ptr<Tensor> _transposed_weights;
-  std::unique_ptr<Tensor> _transposed_input;
-  std::unique_ptr<Tensor> _transposed_back_prop_output;
-  std::unique_ptr<Tensor> _act_back_prop_output;
+  std::shared_ptr<ExtraTensor> _transposed_weights;
+  std::shared_ptr<ExtraTensor> _transposed_input;
+  std::shared_ptr<ExtraTensor> _transposed_back_prop_output;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
index f77d58e6517..9a9f7b08701 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.cc
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -41,8 +41,8 @@ class MaxPool2D final : public TrainingKernelRegistry
   const IPortableTensor *_output;
   nnfw::cker::PoolParams _op_params;
 
-  std::unique_ptr<Tensor> _act_back_prop_output;
-  std::unique_ptr<Tensor> _arg_max_index;
+  std::shared_ptr<ExtraTensor> _act_back_prop_output;
+  std::shared_ptr<ExtraTensor> _arg_max_index;
 
 public:
   MaxPool2D(const uint32_t paddingLeft, const uint32_t, const uint32_t paddingTop, const uint32_t,
@@ -62,20 +62,30 @@ class MaxPool2D final : public TrainingKernelRegistry
                                       &_op_params.float_activation_max);
     }
 
-    _arg_max_index = std::make_unique<Tensor>(_output->get_info());
-    _arg_max_index->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+    _arg_max_index = std::make_shared<ExtraTensor>(_output->get_info());
 
     if (activation != ir::Activation::NONE)
     {
-      _act_back_prop_output = std::make_unique<Tensor>(_output->get_info());
-      _act_back_prop_output->setBuffer(std::make_shared<basic::Allocator>(_output->total_size()));
+      _act_back_prop_output = std::make_shared<ExtraTensor>(_output->get_info());
     }
   };
 
   ~MaxPool2D() {}
 
 public:
-  void forward(const IPortableTensor *in, IPortableTensor *out)
+  std::optional<ExtraTensors> registerExtraTensors() override
+  {
+    ExtraTensors tensors = {_arg_max_index};
+    if (_act_back_prop_output != nullptr)
+    {
+      tensors.push_back(_act_back_prop_output);
+    }
+
+    return std::optional<ExtraTensors>(tensors);
+  }
+
+public:
+  void forward(const IPortableTensor *in, IPortableTensor *out) override
   {
     assert(in->layout() == ir::Layout::NHWC);
 
@@ -88,7 +98,7 @@ class MaxPool2D final : public TrainingKernelRegistry
                                  out_data, getBuffer<int>(arg_max_index));
   }
 
-  void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in)
+  void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) override
   {
     assert(back_prop_out->layout() == ir::Layout::NHWC);
 
@@ -110,7 +120,7 @@ class MaxPool2D final : public TrainingKernelRegistry
                                      getBuffer<int>(arg_max_index), getShape(back_prop_in),
                                      getBuffer<float>(back_prop_in));
   }
-};
+}; // namespace ops
 
 } // namespace
 
@@ -149,6 +159,11 @@ void PoolLayer::configureBackward(const uint32_t paddingLeft, const uint32_t pad
   }
 }
 
+std::optional<ExtraTensors> PoolLayer::registerExtraTensors()
+{
+  return _kernel->registerExtraTensors();
+}
+
 void PoolLayer::forward(bool training)
 {
   if (training)
diff --git a/runtime/onert/backend/train/ops/PoolLayer.h b/runtime/onert/backend/train/ops/PoolLayer.h
index 5ced951ae6a..f76fc5b7020 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.h
+++ b/runtime/onert/backend/train/ops/PoolLayer.h
@@ -38,6 +38,8 @@ class TrainingKernelRegistry
 public:
   virtual void forward(const IPortableTensor *in, IPortableTensor *out) = 0;
   virtual void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) = 0;
+  virtual std::optional<ExtraTensors> registerExtraTensors() = 0;
+
   TrainingKernelRegistry() = default;
   virtual ~TrainingKernelRegistry() = default;
 };
@@ -61,6 +63,7 @@ class PoolLayer : public ::onert::exec::train::ITrainableFunction, public cpu::o
                          IPortableTensor *output, IPortableTensor *back_prop_input,
                          const IPortableTensor *back_prop_output);
 
+  std::optional<ExtraTensors> registerExtraTensors() override;
   void forward(bool training) override;
   void backward() override;
 
diff --git a/runtime/onert/core/include/backend/train/ExtraTensor.h b/runtime/onert/core/include/backend/train/ExtraTensor.h
index 15253e6deb4..e0b63d359bc 100644
--- a/runtime/onert/core/include/backend/train/ExtraTensor.h
+++ b/runtime/onert/core/include/backend/train/ExtraTensor.h
@@ -26,6 +26,12 @@ namespace backend
 namespace train
 {
 
+enum class ExtraTensorLifeTime
+{
+  BACKWARD,            // alive during backward()
+  FORWARD_TO_BACKWARD, // alive from forward to backward()
+};
+
 // ExtraTensor is a tensor that is accessed within one operation layer.
 // In other words, the scope of the extra tensor is confined to one specific layer.
 class ExtraTensor final : public basic::Tensor
@@ -34,12 +40,26 @@ class ExtraTensor final : public basic::Tensor
   ExtraTensor() = delete;
 
 public:
-  ExtraTensor(const ir::OperandInfo &info) : basic::Tensor(info, nullptr)
+  ExtraTensor(const ir::OperandInfo &info, ExtraTensorLifeTime lt)
+    : basic::Tensor(info, nullptr), _lifetime(lt)
+  {
+    // DO NOTHING
+  }
+
+  ExtraTensor(const ir::OperandInfo &info)
+    : basic::Tensor(info, nullptr), _lifetime(ExtraTensorLifeTime::BACKWARD)
   {
     // DO NOTHING
   }
+
+  ExtraTensorLifeTime lifetime() const { return _lifetime; }
+
+private:
+  ExtraTensorLifeTime _lifetime;
 };
 
+using ExtraTensors = std::vector<std::shared_ptr<backend::train::ExtraTensor>>;
+
 } // namespace train
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/include/exec/train/ITrainableFunction.h b/runtime/onert/core/include/exec/train/ITrainableFunction.h
index 45adc258f68..fd57ae3516b 100644
--- a/runtime/onert/core/include/exec/train/ITrainableFunction.h
+++ b/runtime/onert/core/include/exec/train/ITrainableFunction.h
@@ -18,6 +18,9 @@
 #define __ONERT_EXEC_TRAIN_I_TRAINABLE_FUNCTION_H__
 
 #include <cstdint>
+#include <optional>
+
+#include "backend/train/ExtraTensor.h"
 
 namespace onert
 {
@@ -26,12 +29,21 @@ namespace exec
 namespace train
 {
 
+// Q: function 'extraExtraTensors' is not PURE virutal function, If so, Do we need to change this
+// class name? ITrainableFunction -> TrainableFunction
+
 class ITrainableFunction
 {
 public:
   virtual ~ITrainableFunction() = default;
   virtual void forward(bool training) = 0;
   virtual void backward() = 0;
+
+  // Implement this if extra tensor is needed
+  virtual std::optional<onert::backend::train::ExtraTensors> registerExtraTensors()
+  {
+    return std::nullopt;
+  }
 };
 
 } // namespace train