diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 371462b8a5d..f7085a321ab 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -232,8 +232,8 @@ KernelGenerator::KernelGenerator(
   const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
   const std::shared_ptr<ExternalContext> &external_context)
   : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx{graph.operations()},
-    _current_layout{graph.layout()}, _tensor_builder(tensor_builder), _tensor_reg{tensor_reg},
-    _kernel_builder(kernel_builder), _external_context(external_context)
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -325,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
     _return_fn = std::move(fn);
     return;
   }
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(ir::Layout::NHWC);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -354,8 +354,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(ir::Layout::NHWC);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -386,7 +386,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis);
 
   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 
@@ -572,24 +572,12 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 
-  // NOTE The frontend layout and backend layout must be the same for this operation.
-  //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
-  //      is not not efficient even if it works well. If so, it would be better to set the
-  //      layout of these backend tensors to the same layout.
-  //      There is also one thing we have to think about. This operation depends on the layout of
-  //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
-  //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
-  //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  const auto &input_shape = _ctx.at(input_index).shape();
-  UNUSED_RELEASE(input_shape);
-  assert(input_shape.rank() < 4 || _current_layout == ir::Layout::NHWC);
-
-  const auto axis_raw = node.param().axis;
-  const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
+  const auto rank = _ctx.at(input_index).shape().rank();
+  const auto axis = ops::getAxis(rank, node.param().axis);
 
   auto fn = std::make_unique<ops::GatherLayer>();
 
-  fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
+  fn->configure(input_tensor, indices_tensor, output_tensor, axis);
 
   _return_fn = std::move(fn);
 }
@@ -646,7 +634,6 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
     for (const auto &idx : opSeq)
     {
       const auto &operand = _ctx.at(idx);
-      // TODO make sure using `_current_layout` is correct for custom operations
       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
       auto in_tensor = _tensor_reg->getPortableTensor(idx);
       tensors.emplace_back(in_tensor);
@@ -743,7 +730,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis);
 
   assert(-rank <= axis && axis < rank);
 
@@ -765,7 +752,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   const auto input_index{node.getInputs().at(0)};
 
   const auto rank = _ctx.at(input_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis);
 
   assert(rank == 0 || (-rank <= axis && axis < rank));
 
@@ -1037,8 +1024,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
   const auto kh = node.param().kh;
   const auto kw = node.param().kw;
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(ir::Layout::NHWC);
   const auto padding =
     ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
   const auto activation = node.param().activation;
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index d7d5fe6fcbd..8d2010151f8 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -100,7 +100,6 @@ class KernelGenerator : public basic::KernelGeneratorBase
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
-  ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<basic::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index 29452c4a586..39ef0cf6c54 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -132,7 +132,7 @@ convertActivationType(const ir::Activation activation)
   }
 }
 
-inline int32_t getAxis(uint32_t rank, int32_t axis, ir::Layout frontend_layout)
+inline int32_t getAxis(uint32_t rank, int32_t axis)
 {
   auto ret = axis;
 
@@ -141,13 +141,6 @@ inline int32_t getAxis(uint32_t rank, int32_t axis, ir::Layout frontend_layout)
     ret += rank;
   }
 
-  // NCHW -> NHWC
-  if (frontend_layout == ir::Layout::NCHW)
-  {
-    int32_t permutation[4] = {0, 3, 1, 2};
-    ret = permutation[ret];
-  }
-
   return ret;
 }
 
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
index 90fbc844791..694a75d98d6 100644
--- a/runtime/onert/backend/ruy/KernelGenerator.cc
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -78,8 +78,8 @@ KernelGenerator::KernelGenerator(
   const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
   const std::shared_ptr<ExternalContext> &external_context)
   : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx{graph.operations()},
-    _current_layout{graph.layout()}, _tensor_builder(tensor_builder), _tensor_reg{tensor_reg},
-    _kernel_builder(kernel_builder), _external_context(external_context)
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -114,8 +114,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
     _return_fn = std::move(fn);
     return;
   }
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(ir::Layout::NHWC);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h
index 31551c46c4d..c6c9438bd30 100644
--- a/runtime/onert/backend/ruy/KernelGenerator.h
+++ b/runtime/onert/backend/ruy/KernelGenerator.h
@@ -51,7 +51,6 @@ class KernelGenerator : public basic::KernelGeneratorBase
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
-  const ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<basic::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc
index 8b45a624f4c..3e72b72d2ef 100644
--- a/runtime/onert/backend/train/BackendContext.cc
+++ b/runtime/onert/backend/train/BackendContext.cc
@@ -121,8 +121,6 @@ backend::train::ITensorRegistry *BackendContext::genTrainingTensors()
         continue;
       if (external_operands().contains(ind))
         continue;
-      // NOTE Assuming there is no layout changes (Always assume NHWC or UNKNOWN)
-      assert(tgraph.layout() != ir::Layout::NCHW);
 
       const auto &operand = tgraph.operands().at(ind);
       tensor_builder->registerBackwardTensorInfo(ind, createBackwardTensorInfo(operand));
diff --git a/runtime/onert/backend/train/KernelGenerator.cc b/runtime/onert/backend/train/KernelGenerator.cc
index f6171c04908..0100f8375b8 100644
--- a/runtime/onert/backend/train/KernelGenerator.cc
+++ b/runtime/onert/backend/train/KernelGenerator.cc
@@ -147,9 +147,8 @@ KernelGenerator::KernelGenerator(const ir::train::TrainableGraph &tgraph,
                                  const std::shared_ptr<TensorRegistry> &tensor_reg,
                                  const std::shared_ptr<ExternalContext> &external_context,
                                  const exec::train::optimizer::Optimizer *optimizer)
-  : backend::train::KernelGeneratorBase{tgraph}, _current_layout{tgraph.layout()},
-    _tensor_reg{tensor_reg}, _external_context(external_context), _optimizer{optimizer},
-    _update_funcs{}, _node_to_idx{}
+  : backend::train::KernelGeneratorBase{tgraph}, _tensor_reg{tensor_reg},
+    _external_context(external_context), _optimizer{optimizer}, _update_funcs{}, _node_to_idx{}
 {
   tgraph.operations().iterate(
     [&](const onert::ir::OperationIndex &idx, const onert::ir::IOperation &op) {
@@ -211,8 +210,8 @@ void KernelGenerator::visit(const ir::train::operation::Conv2D &node)
   auto fn = std::make_unique<ops::ConvolutionLayer>();
 
   auto &operands = _tgraph.operands();
-  const auto ifm_shape = operands.at(in_index).shape().asFeature(_current_layout);
-  const auto ofm_shape = operands.at(out_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = operands.at(in_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ofm_shape = operands.at(out_index).shape().asFeature(ir::Layout::NHWC);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = operands.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -266,8 +265,8 @@ void KernelGenerator::visit(const ir::train::operation::DepthwiseConv2D &node)
 
   const auto stride = node.param().stride;
   const auto &operands = _tgraph.operands();
-  const auto ofm_shape = operands.at(ofm_index).shape().asFeature(_current_layout);
-  const auto ifm_shape = operands.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = operands.at(ofm_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ifm_shape = operands.at(ifm_index).shape().asFeature(ir::Layout::NHWC);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = operands.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -481,8 +480,8 @@ void KernelGenerator::visit(const ir::train::operation::Pool2D &node)
   const auto kh = node.param().kh;
   const auto kw = node.param().kw;
   const auto padding =
-    ir::calculatePadding(node.param().padding, ifm_shape.asFeature(_current_layout),
-                         ofm_shape.asFeature(_current_layout), stride, kw, kh);
+    ir::calculatePadding(node.param().padding, ifm_shape.asFeature(ir::Layout::NHWC),
+                         ofm_shape.asFeature(ir::Layout::NHWC), stride, kw, kh);
 
   auto out_tensor = _tensor_reg->getPortableTensor(output_index);
   auto in_tensor = _tensor_reg->getPortableTensor(input_index);
diff --git a/runtime/onert/backend/train/KernelGenerator.h b/runtime/onert/backend/train/KernelGenerator.h
index 5832bd5348b..329903b0b21 100644
--- a/runtime/onert/backend/train/KernelGenerator.h
+++ b/runtime/onert/backend/train/KernelGenerator.h
@@ -64,7 +64,6 @@ class KernelGenerator : public backend::train::KernelGeneratorBase
   IPortableTensor *getBackPropOut(const ir::OperandIndex &index);
 
 private:
-  ir::Layout _current_layout;
   std::shared_ptr<TensorRegistry> _tensor_reg;
   const std::shared_ptr<ExternalContext> _external_context;
   const exec::train::optimizer::Optimizer *_optimizer;
diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc
index 74b6b6e56be..49437f93b49 100644
--- a/runtime/onert/backend/trix/KernelGenerator.cc
+++ b/runtime/onert/backend/trix/KernelGenerator.cc
@@ -39,8 +39,7 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph,
                                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
                                  const std::shared_ptr<DevContext> &dev_context)
   : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx{graph.operations()},
-    _current_layout{graph.layout()}, _tensor_builder(tensor_builder), _tensor_reg{tensor_reg},
-    _dev_context{dev_context}
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _dev_context{dev_context}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/trix/KernelGenerator.h b/runtime/onert/backend/trix/KernelGenerator.h
index d87dc695250..4f3de01870d 100644
--- a/runtime/onert/backend/trix/KernelGenerator.h
+++ b/runtime/onert/backend/trix/KernelGenerator.h
@@ -48,7 +48,6 @@ class KernelGenerator : public basic::KernelGeneratorBase
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
-  ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<basic::TensorRegistry> _tensor_reg;
   const std::shared_ptr<DevContext> _dev_context;
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
index 2a0cc8f5d55..eb85bf4df13 100644
--- a/runtime/onert/backend/xnnpack/KernelGenerator.cc
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -42,8 +42,8 @@ KernelGenerator::KernelGenerator(
   const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
   const std::shared_ptr<ExternalContext> &external_context)
   : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx{graph.operations()},
-    _current_layout{graph.layout()}, _tensor_builder(tensor_builder), _tensor_reg{tensor_reg},
-    _kernel_builder(kernel_builder), _external_context(external_context)
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -105,8 +105,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto dilation = node.param().dilation;
   auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(ir::Layout::NHWC);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -133,8 +133,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(ir::Layout::NHWC);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h
index 271a60653e5..f4d8dcb84cd 100644
--- a/runtime/onert/backend/xnnpack/KernelGenerator.h
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -52,7 +52,6 @@ class KernelGenerator : public basic::KernelGeneratorBase
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
-  ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<basic::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;