diff --git a/runtime/onert/backend/acl_common/IACLTensor.cc b/runtime/onert/backend/acl_common/IACLTensor.cc
index 9920750fc24..50fef459daf 100644
--- a/runtime/onert/backend/acl_common/IACLTensor.cc
+++ b/runtime/onert/backend/acl_common/IACLTensor.cc
@@ -41,8 +41,6 @@ size_t IACLTensor::calcOffset(const ir::Coordinates &coords) const
   return info()->offset_element_in_bytes(acl_coords);
 }
 
-ir::Layout IACLTensor::layout() const { return acl_common::asRuntimeLayout(info()->data_layout()); }
-
 ir::DataType IACLTensor::data_type() const
 {
   return acl_common::asRuntimeDataType(info()->data_type());
diff --git a/runtime/onert/backend/acl_common/IACLTensor.h b/runtime/onert/backend/acl_common/IACLTensor.h
index 7ea6327a7c6..e43a4cbdac8 100644
--- a/runtime/onert/backend/acl_common/IACLTensor.h
+++ b/runtime/onert/backend/acl_common/IACLTensor.h
@@ -49,7 +49,6 @@ class IACLTensor : public ITensor
   uint8_t *buffer() const final { return handle()->buffer(); }
   size_t total_size() const final { return info()->total_size(); }
   size_t calcOffset(const ir::Coordinates &coords) const final;
-  ir::Layout layout() const final;
   ir::DataType data_type() const final;
   float data_scale() const override;
   int32_t data_zero_point() const override;
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 598ec422688..75c78d886cd 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -260,12 +260,6 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
 
   for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
   {
-    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
-    if (portable_tensor)
-    {
-      assert(portable_tensor->layout() == ir::Layout::NHWC);
-    }
-
     auto tensor = _tensor_reg->getNativeTensor(ind);
     if (tensor)
     {
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc
index 686865af281..5e27f175e5f 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.cc
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.cc
@@ -286,7 +286,6 @@ std::vector<int32_t> getReducerAxes(const IPortableTensor *axes)
   std::vector<int32_t> ret;
 
   auto axes_vals = (axes->getShape().rank() == 0) ? 1 : axes->getShape().dim(0);
-  assert(axes->layout() == ir::Layout::NHWC);
   assert(static_cast<size_t>(axes_vals) == axes->getShape().num_elements());
   switch (axes->data_type())
   {
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index 39ef0cf6c54..544d07b8785 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -97,9 +97,6 @@ inline nnfw::cker::Shape getShape(const IPortableTensor *tensor)
     return nnfw::cker::Shape();
 
   const ir::Shape &shape = tensor->get_info().shape();
-
-  assert(tensor->layout() == ir::Layout::NHWC);
-
   auto rank = shape.rank();
   nnfw::cker::Shape ret(rank);
   auto data = ret.DimsData();
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
index 8e2b12d4024..ef80452d5b4 100644
--- a/runtime/onert/backend/ruy/KernelGenerator.cc
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -57,12 +57,6 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
 
   for (const auto &ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
   {
-    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
-    if (portable_tensor)
-    {
-      assert(portable_tensor->layout() == ir::Layout::NHWC);
-    }
-
     auto tensor = _tensor_reg->getNativeTensor(ind);
     if (tensor)
     {
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
index 716400c1f14..5f00a240871 100644
--- a/runtime/onert/backend/ruy/ops/OperationUtils.h
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -45,9 +45,6 @@ inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor)
     return nnfw::ruy::Shape();
 
   const ir::Shape &shape = tensor->get_info().shape();
-
-  assert(tensor->layout() == ir::Layout::NHWC);
-
   auto rank = shape.rank();
   nnfw::ruy::Shape ret(rank);
   auto data = ret.DimsData();
diff --git a/runtime/onert/backend/train/KernelGenerator.cc b/runtime/onert/backend/train/KernelGenerator.cc
index f3df39735e0..aaaa50f1b72 100644
--- a/runtime/onert/backend/train/KernelGenerator.cc
+++ b/runtime/onert/backend/train/KernelGenerator.cc
@@ -133,11 +133,6 @@ std::unique_ptr<exec::train::TrainableFnSequence> KernelGenerator::generate(ir::
 
   for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
   {
-    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
-    if (portable_tensor)
-    {
-      assert(portable_tensor->layout() == ir::Layout::NHWC);
-    }
     auto tensor = _tensor_reg->getNonConstTensor(ind);
     if (tensor)
     {
diff --git a/runtime/onert/backend/train/ops/OperationUtils.cc b/runtime/onert/backend/train/ops/OperationUtils.cc
index 9736d5ba5d6..94a2f52491a 100644
--- a/runtime/onert/backend/train/ops/OperationUtils.cc
+++ b/runtime/onert/backend/train/ops/OperationUtils.cc
@@ -37,9 +37,6 @@ nnfw::cker::Shape getShape(const IPortableTensor *tensor)
   assert(!tensor->is_dynamic() && "Dynamic tensor is not supported yet");
 
   const ir::Shape &shape = tensor->get_info().shape();
-
-  assert(tensor->layout() == ir::Layout::NHWC);
-
   auto rank = shape.rank();
   nnfw::cker::Shape ret(rank);
   auto data = ret.DimsData();
diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
index f77d58e6517..e98a33050ba 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.cc
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -77,8 +77,6 @@ class MaxPool2D final : public TrainingKernelRegistry
 public:
   void forward(const IPortableTensor *in, IPortableTensor *out)
   {
-    assert(in->layout() == ir::Layout::NHWC);
-
     auto out_shape = getShape(out);
     auto out_data = getBuffer<float>(out);
     auto arg_max_index = _arg_max_index.get();
@@ -90,8 +88,6 @@ class MaxPool2D final : public TrainingKernelRegistry
 
   void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in)
   {
-    assert(back_prop_out->layout() == ir::Layout::NHWC);
-
     // activation backward
     try
     {
diff --git a/runtime/onert/backend/train/optimizer/Optimizers.test.cc b/runtime/onert/backend/train/optimizer/Optimizers.test.cc
index f24138c0481..2876fe430cc 100644
--- a/runtime/onert/backend/train/optimizer/Optimizers.test.cc
+++ b/runtime/onert/backend/train/optimizer/Optimizers.test.cc
@@ -54,8 +54,6 @@ class MockUpTensor : public IPortableTensor
 
   template <typename T> const std::vector<T> &data() const { return _data; }
 
-  ir::Layout layout() const override { return ir::Layout::NHWC; }
-
 private:
   using ITensor::setShape;
   using ITensor::set_dynamic;
@@ -89,8 +87,6 @@ class MockUpTrainableTensor : public backend::train::ITrainableTensor
     return const_cast<uint8_t *>(_data.data());
   }
 
-  ir::Layout layout() const override { return ir::Layout::NHWC; }
-
 public:
   std::vector<ITensor *> optVars() override
   {
diff --git a/runtime/onert/backend/trix/Convert.cc b/runtime/onert/backend/trix/Convert.cc
index fe003e7ead5..684dc80dd53 100644
--- a/runtime/onert/backend/trix/Convert.cc
+++ b/runtime/onert/backend/trix/Convert.cc
@@ -23,19 +23,6 @@ namespace backend
 namespace trix
 {
 
-data_layout convertDataLayout(const ir::Layout layout)
-{
-  switch (layout)
-  {
-    case ir::Layout::NCHW:
-      return DATA_LAYOUT_NCHW;
-    case ir::Layout::NHWC:
-      return DATA_LAYOUT_NHWC;
-    default:
-      throw std::runtime_error("Unknown Layout");
-  }
-}
-
 data_type convertDataType(const ir::DataType type)
 {
   switch (type)
diff --git a/runtime/onert/backend/trix/Convert.h b/runtime/onert/backend/trix/Convert.h
index 662ed44b6b7..6b1edd5162f 100644
--- a/runtime/onert/backend/trix/Convert.h
+++ b/runtime/onert/backend/trix/Convert.h
@@ -19,7 +19,6 @@
 
 #include <backend/IPortableTensor.h>
 #include <ir/DataType.h>
-#include <ir/Layout.h>
 
 #include <libnpuhost.h>
 #include <type_traits>
@@ -31,14 +30,6 @@ namespace backend
 namespace trix
 {
 
-/**
- * @brief Convert type of layout from onert type to npu type
- *
- * @param layout Layout type in onert
- * @return data_layout Layout type in npu
- */
-data_layout convertDataLayout(const ir::Layout layout);
-
 /**
  * @brief Convert type of data from onert type to npu type
  *
@@ -61,7 +52,7 @@ void setDataInfo(const std::vector<T *> &tensors, tensors_data_info *info)
 
   for (uint32_t idx = 0; idx < info->num_info; ++idx)
   {
-    info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+    info->info[idx].layout = DATA_LAYOUT_NHWC;
     info->info[idx].type = convertDataType(tensors[idx]->data_type());
   }
 }
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h
index 56041626448..81113bf24fe 100644
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -42,7 +42,6 @@ class ITensor
   virtual uint8_t *buffer() const = 0;
   virtual size_t total_size() const = 0;
   virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
-  virtual ir::Layout layout() const = 0;
   virtual ir::DataType data_type() const = 0;
   virtual float data_scale() const = 0;
   virtual int32_t data_zero_point() const = 0;
diff --git a/runtime/onert/core/include/backend/basic/Tensor.h b/runtime/onert/core/include/backend/basic/Tensor.h
index 92d98f82e13..5847dfdd82d 100644
--- a/runtime/onert/core/include/backend/basic/Tensor.h
+++ b/runtime/onert/core/include/backend/basic/Tensor.h
@@ -41,8 +41,8 @@ class Tensor : public IPortableTensor
 
 public:
   Tensor(const ir::OperandInfo &info, DynamicMemoryManager *dynamic_mem_mgr)
-    : IPortableTensor(info), _layout(ir::Layout::NHWC), _buffer(nullptr), _size(info.total_size()),
-      _num_references(0), _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
+    : IPortableTensor(info), _buffer(nullptr), _size(info.total_size()), _num_references(0),
+      _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
   {
     // DO NOTHING
   }
@@ -71,7 +71,6 @@ class Tensor : public IPortableTensor
 
 public:
   uint8_t *buffer() const override { return _buffer; }
-  ir::Layout layout() const override { return _layout; }
   void set_dynamic() override { _info.setDynamic(); }
   bool applyShape(const ir::Shape &new_shape) override;
 
@@ -126,7 +125,6 @@ class Tensor : public IPortableTensor
   void setShape(const ir::Shape &new_shape) override;
 
 protected:
-  const ir::Layout _layout;
   uint8_t *_buffer;
   size_t _size;
   int32_t _num_references;
diff --git a/runtime/onert/core/include/backend/basic/train/TrainableTensor.h b/runtime/onert/core/include/backend/basic/train/TrainableTensor.h
index 263b32962c8..6a5c41782a4 100644
--- a/runtime/onert/core/include/backend/basic/train/TrainableTensor.h
+++ b/runtime/onert/core/include/backend/basic/train/TrainableTensor.h
@@ -51,7 +51,6 @@ class TrainableTensor : public backend::train::ITrainableTensor
 
 public:
   uint8_t *buffer() const override { return _tensor.buffer(); }
-  ir::Layout layout() const override { return _tensor.layout(); }
 
 public:
   std::vector<ITensor *> optVars() override;
diff --git a/runtime/onert/core/include/ir/Coordinates.h b/runtime/onert/core/include/ir/Coordinates.h
index 9963cab4c1b..f41471e5e33 100644
--- a/runtime/onert/core/include/ir/Coordinates.h
+++ b/runtime/onert/core/include/ir/Coordinates.h
@@ -119,8 +119,7 @@ class Coordinates final
   std::vector<int32_t> _coordinates;
 };
 
-Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
-                               Layout to_layout);
+Coordinates convertCoordinates(const Coordinates &from_coordinates, const PermuteType &type);
 
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/include/ir/Shape.h b/runtime/onert/core/include/ir/Shape.h
index 744a6cb7c0c..5bccb5e15ba 100644
--- a/runtime/onert/core/include/ir/Shape.h
+++ b/runtime/onert/core/include/ir/Shape.h
@@ -137,7 +137,13 @@ struct Shape
 inline bool operator==(const Shape &lhs, const Shape &rhs) { return lhs.dims() == rhs.dims(); }
 inline bool operator!=(const Shape &lhs, const Shape &rhs) { return lhs.dims() != rhs.dims(); }
 
-Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout);
+/**
+ * @brief   Converts shape when its rank is 4
+ *
+ * @return  Return a shape based on permutation type.
+ *          If rank is not 4, input shape is returned without conversion.
+ */
+ir::Shape convertShape(const Shape &shape, const PermuteType &type);
 
 /**
  * @brief Find out if tha rank in this shape is "maybe" unspecified.
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.cc b/runtime/onert/core/src/backend/builtin/IOTensor.cc
index a467888a832..2097566fa81 100644
--- a/runtime/onert/core/src/backend/builtin/IOTensor.cc
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.cc
@@ -40,7 +40,6 @@ void IOTensor::setTensor(IPortableTensor *tensor)
 {
   assert(tensor);
   assert(tensor != this);
-  assert(tensor->layout() == _orig->layout()); // Changing layout is not considered yet
   _tensor = tensor;
   if (_info.shape() != tensor->getShape())
   {
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.h b/runtime/onert/core/src/backend/builtin/IOTensor.h
index cb8307791dd..1467ca0d8ff 100644
--- a/runtime/onert/core/src/backend/builtin/IOTensor.h
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.h
@@ -57,7 +57,7 @@ class IOTensor : public IPortableTensor
 
 public:
   uint8_t *buffer() const override { return _tensor->buffer(); }
-  ir::Layout layout() const override { return _orig->layout(); }
+  ir::Layout layout() const { return _orig->layout(); }
   void set_dynamic() override
   {
     _info.setDynamic();
diff --git a/runtime/onert/core/src/backend/builtin/UserTensor.h b/runtime/onert/core/src/backend/builtin/UserTensor.h
index b7f6ce091d1..9b6ffbd41a6 100644
--- a/runtime/onert/core/src/backend/builtin/UserTensor.h
+++ b/runtime/onert/core/src/backend/builtin/UserTensor.h
@@ -45,7 +45,7 @@ class UserTensor : public IPortableTensor
 
 public:
   uint8_t *buffer() const override { return _buffer; }
-  ir::Layout layout() const override { return _layout; }
+  ir::Layout layout() const { return _layout; }
   void set_dynamic() override { _info.setDynamic(); }
   void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
   bool applyShape(const ir::Shape &) override;
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
index 1dfa20720f4..560aa21470e 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -16,8 +16,6 @@
 
 #include "PermuteLayer.h"
 
-#include "../../../exec/ShapeConverter.h"
-
 #include <ruy/context.h> // from @ruy
 
 namespace onert
@@ -110,7 +108,7 @@ void PermuteLayer::optimize()
               const auto copy_len = loop_shape.dim(copy_axis) * data_size;
               loop_shape.dim(copy_axis) = 1;
 
-              appendPermuteTasks(src, dst, loop_shape, copy_len);
+              appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
             }
           }
           else
@@ -121,7 +119,7 @@ void PermuteLayer::optimize()
             const auto loop_shape = src_tensor.getShape();
             const auto copy_len = data_size;
 
-            appendPermuteTasks(src, dst, loop_shape, copy_len);
+            appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
           }
         });
       };
@@ -136,11 +134,12 @@ void PermuteLayer::optimize()
 }
 
 void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
-                                      const ir::Shape &loop_shape, size_t size)
+                                      const ir::Shape &loop_shape, size_t size,
+                                      const ir::PermuteType &permute_type)
 {
   size_t distributed_dim = 0;
   auto src_shape = src_tensor->getShape();
-  if (src_tensor->layout() == dst_tensor->layout())
+  if (permute_type == ir::PermuteType::COPY)
   {
     for (int i = 1; i < src_shape.rank() - 1; ++i)
     {
@@ -165,7 +164,8 @@ void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_te
     start_coords.set(distributed_dim, start);
     int end = start + (distributed_dim_val - start) / (thread_count - i);
     one_thread_loop_shape.dim(distributed_dim) = end - start;
-    tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
+    tasks.emplace_back(*src_tensor, *dst_tensor, permute_type, start_coords, one_thread_loop_shape,
+                       size);
     start = end;
   }
   assert(tasks.size() >= 1);
@@ -201,14 +201,14 @@ void PermuteLayer::run()
   {
     auto dst_tensor = _dst_tensors.at(i);
     auto src_tensor = _src_tensors.at(i);
+    auto permute_type = _permute_types.at(i);
     if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
     {
       // getting output shape
       auto src_shape = src_tensor->getShape();
 
       // set output shape and output buffer
-      ir::Shape new_shape =
-        exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
+      ir::Shape new_shape = ir::convertShape(src_shape, permute_type);
 
       try
       {
@@ -225,8 +225,7 @@ void PermuteLayer::run()
         throw;
       }
     }
-    assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
-           dst_tensor->getShape());
+    assert(ir::convertShape(src_tensor->getShape(), permute_type) == dst_tensor->getShape());
   }
   assert(_src_tensors.size() == _dst_tensors.size());
   assert(_src_tensors.size() == _src_tensors_offsets.size());
@@ -266,7 +265,7 @@ void PermuteLayer::run()
         // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
         else if (dst->needMemoryMap() && !dst->is_subtensor())
         {
-          if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
+          if (!src->has_padding() && !dst->has_padding() && permute_type == ir::PermuteType::COPY)
           {
             // This is more effective than multi-threading
             src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
@@ -282,7 +281,7 @@ void PermuteLayer::run()
           }
         }
         else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
-                 !dst->has_padding() && src->layout() == dst->layout())
+                 !dst->has_padding() && permute_type == ir::PermuteType::COPY)
         {
           // This is more effective than multi-threading
           assert(!dst->needMemoryMap());
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
index 6ae6eb9bd62..c20d1956be4 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -47,7 +47,8 @@ class PermuteLayer : public onert::exec::IPermuteFunction
 
 private:
   void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
-                          const ir::Shape &loop_shape, size_t size);
+                          const ir::Shape &loop_shape, size_t size,
+                          const ir::PermuteType &permute_type);
 
   void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
 
@@ -56,25 +57,23 @@ class PermuteLayer : public onert::exec::IPermuteFunction
     using Strides = ir::Coordinates;
 
     PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
-                      const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
+                      const ir::PermuteType &permute_type, const ir::Coordinates &start_coords,
+                      const ir::Shape &loop_shape, size_t size)
       : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
         _src_start_offset{src_tensor.calcOffset(start_coords)},
         _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
-        _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
-        _dst_layout{dst_tensor.layout()}, _is_permutation{true}
+        _loop_shape{loop_shape}, _size{size}, _permute_type{permute_type}
     {
       // Set strides
       setStrides(src_tensor, &_src_strides);
       setStrides(dst_tensor, &_dst_strides);
-
-      _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
     }
     // Constructor for a copy
     PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
                       uint32_t dst_start_offset, size_t size)
       : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
         _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, _loop_shape{1},
-        _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
+        _size{size}, _permute_type{ir::PermuteType::COPY}
     {
       // DO NOTHING
     }
@@ -90,9 +89,9 @@ class PermuteLayer : public onert::exec::IPermuteFunction
         size_t dst_offset = _dst_start_offset;
         assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
         ir::Coordinates dst_coords = coords;
-        if (_is_permutation)
+        if (_permute_type != ir::PermuteType::COPY && _loop_shape.rank() == 4)
         {
-          dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
+          dst_coords = ir::convertCoordinates(coords, _permute_type);
         }
         for (auto i = 0; i < _loop_shape.rank(); ++i)
         {
@@ -136,9 +135,7 @@ class PermuteLayer : public onert::exec::IPermuteFunction
     Strides _dst_strides;
     const ir::Shape _loop_shape;
     const size_t _size;
-    const ir::Layout _src_layout;
-    const ir::Layout _dst_layout;
-    bool _is_permutation;
+    const ir::PermuteType _permute_type;
   };
   std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
 };
diff --git a/runtime/onert/core/src/exec/EdgeTensor.h b/runtime/onert/core/src/exec/EdgeTensor.h
index 8df79c3890d..5ea9c4ac47b 100644
--- a/runtime/onert/core/src/exec/EdgeTensor.h
+++ b/runtime/onert/core/src/exec/EdgeTensor.h
@@ -36,7 +36,7 @@ class EdgeTensor : public backend::IPortableTensor
   ~EdgeTensor() = default;
 
   uint8_t *buffer() const override { return _buffer.get(); }
-  ir::Layout layout() const override { return _layout; }
+  ir::Layout layout() const { return _layout; }
   void set_dynamic() override { _info.setDynamic(); }
   bool applyShape(const ir::Shape &new_shape) override;
   void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index 2526e4e6e4f..14149fd10b5 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -16,8 +16,6 @@
 
 #include "ExecutorBase.h"
 
-#include "ShapeConverter.h"
-
 #include "util/ConfigSource.h"
 #include <misc/polymorphic_downcast.h>
 
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc
index 10794dba749..ae2040168e5 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.cc
+++ b/runtime/onert/core/src/exec/IPermuteFunction.cc
@@ -37,8 +37,6 @@ inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
 {
   const ir::Shape shape = tensor->getShape();
 
-  assert(tensor->layout() == ir::Layout::NHWC);
-
   auto rank = shape.rank();
   nnfw::cker::Shape ret(rank);
   auto data = ret.DimsData();
@@ -51,7 +49,8 @@ inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
 
 // Quantize per element
 template <typename InputT, typename OutputT>
-void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                         const ir::PermuteType &type)
 {
   const auto scale = dst_tensor->data_scale();
   const auto zero_point = dst_tensor->data_zero_point();
@@ -60,17 +59,14 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d
   int max_val = std::numeric_limits<OutputT>::max();
 
   auto loop_shape = src_tensor->getShape();
-  const auto src_layout = src_tensor->layout();
-  const auto dst_layout = dst_tensor->layout();
-  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
   ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
     const InputT *input_data =
       reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
     int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
     int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
 
-    ir::Coordinates dst_coords =
-      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
     OutputT *output_data =
       reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
     *output_data = clamped;
@@ -79,10 +75,11 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d
 
 // TODO Optimize the case where tensors has the same layout
 template <typename InputT, typename OutputT>
-void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+              const ir::PermuteType &type)
 {
-  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
-      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
+      !src_tensor->is_dynamic())
   {
     assert(!dst_tensor->is_dynamic());
 
@@ -94,28 +91,26 @@ void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
   }
   else
   {
-    elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
+    elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor, type);
   }
 }
 
 // Dequantize per element
 template <typename InputT, typename OutputT>
-void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                           const ir::PermuteType &type)
 {
   const auto scale = src_tensor->data_scale();
   const auto zero_point = src_tensor->data_zero_point();
 
   auto loop_shape = src_tensor->getShape();
-  const auto src_layout = src_tensor->layout();
-  const auto dst_layout = dst_tensor->layout();
-  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
   ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
     const InputT *input_data =
       reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
     const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));
 
-    ir::Coordinates dst_coords =
-      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
     OutputT *output_data =
       reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
     *output_data = result;
@@ -124,10 +119,11 @@ void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor
 
 // TODO Optimize the case where tensors has the same layout
 template <typename InputT, typename OutputT>
-void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                const ir::PermuteType &type)
 {
-  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
-      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
+      !src_tensor->is_dynamic())
   {
     assert(!dst_tensor->is_dynamic());
 
@@ -139,7 +135,7 @@ void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor
   }
   else
   {
-    elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
+    elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor, type);
   }
 }
 
@@ -147,7 +143,7 @@ template <typename SRC_T, typename DST_T,
           std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
                              std::is_base_of<backend::ITensor, DST_T>::value,
                            bool> = true>
-void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
+void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor, const ir::PermuteType &type)
 {
   // TODO Support other types
   if (src_tensor->data_type() == ir::DataType::FLOAT32)
@@ -156,17 +152,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
     {
       case ir::DataType::QUANT_UINT8_ASYMM:
       {
-        quantize<float, uint8_t>(src_tensor, dst_tensor);
+        quantize<float, uint8_t>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT8_SYMM:
       {
-        quantize<float, int8_t>(src_tensor, dst_tensor);
+        quantize<float, int8_t>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT16_SYMM:
       {
-        quantize<float, int16_t>(src_tensor, dst_tensor);
+        quantize<float, int16_t>(src_tensor, dst_tensor, type);
         break;
       }
       default:
@@ -182,17 +178,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
     {
       case ir::DataType::QUANT_UINT8_ASYMM:
       {
-        dequantize<uint8_t, float>(src_tensor, dst_tensor);
+        dequantize<uint8_t, float>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT8_SYMM:
       {
-        dequantize<int8_t, float>(src_tensor, dst_tensor);
+        dequantize<int8_t, float>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT16_SYMM:
       {
-        dequantize<int16_t, float>(src_tensor, dst_tensor);
+        dequantize<int16_t, float>(src_tensor, dst_tensor, type);
         break;
       }
       default:
@@ -256,7 +252,7 @@ void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *d
   assert(src_tensor != dst_tensor);
   if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
   {
-    typeAwareQuantize(src_tensor, dst_tensor);
+    typeAwareQuantize(src_tensor, dst_tensor, permute_type);
     return;
   }
 
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index 517d0dc6bee..0ea91c4535c 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -93,7 +93,7 @@ class IPermuteFunction : public IFunction
       // Now there is no case where both src and dst have cl buffer.
       assert(!src->needMemoryMap());
 
-      if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
+      if (!src->has_padding() && !dst->has_padding() && (permute_type == ir::PermuteType::COPY))
       {
         src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
       }
@@ -110,7 +110,7 @@ class IPermuteFunction : public IFunction
       }
     }
     else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
-             !dst->has_padding() && src->layout() == dst->layout())
+             !dst->has_padding() && (permute_type == ir::PermuteType::COPY))
     {
       assert(!dst->needMemoryMap());
       dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.test.cc b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
index 2a3ca58613a..586e2305708 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.test.cc
+++ b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
@@ -71,7 +71,7 @@ class MockUpTensor : public ITensor
 
   uint8_t *buffer() const override { return _data; }
 
-  ir::Layout layout() const override { return _layout; }
+  ir::Layout layout() const { return _layout; }
   ir::DataType data_type() const override { return _type_info.type(); }
   float data_scale() const override { return _type_info.scale(); }
   int32_t data_zero_point() const override { return _type_info.zero_point(); }
@@ -97,15 +97,19 @@ class MockUpTensor : public ITensor
 class MockUpLayer : public IPermuteFunction
 {
 public:
-  MockUpLayer(const std::vector<ITensor *> &inputs, const std::vector<ITensor *> &outputs)
+  MockUpLayer(const std::vector<std::unique_ptr<MockUpTensor>> &inputs,
+              const std::vector<std::unique_ptr<MockUpTensor>> &outputs)
   {
-    assert(inputs.size() == outputs.size());
-    _src_tensors = inputs;
-    _dst_tensors = outputs;
-    _permute_types.resize(inputs.size());
+    const uint32_t input_size = inputs.size();
+    assert(outputs.size() == input_size);
+    _src_tensors.resize(input_size);
+    _dst_tensors.resize(input_size);
+    _permute_types.resize(input_size);
 
-    for (uint32_t i = 0; i < inputs.size(); i++)
+    for (uint32_t i = 0; i < input_size; i++)
     {
+      _src_tensors[i] = inputs[i].get();
+      _dst_tensors[i] = outputs[i].get();
       if (inputs[i]->layout() == outputs[i]->layout())
         _permute_types[i] = ir::PermuteType::COPY;
       else if (inputs[i]->layout() == ir::Layout::NHWC)
@@ -143,10 +147,7 @@ TEST(IPermuteFunction, float_to_float)
       outputs[i]->setBuffer(output_buffers[i].get());
     }
 
-    auto mockup_layer = std::make_unique<MockUpLayer>(
-      std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-      std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(),
-                             outputs[3].get()});
+    auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
     mockup_layer->run();
 
     for (size_t i = 0; i < 4; ++i)
@@ -185,10 +186,7 @@ TEST(IPermuteFunction, float_to_float)
       outputs[i]->setBuffer(output_buffers[i].get());
     }
 
-    auto mockup_layer = std::make_unique<MockUpLayer>(
-      std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-      std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(),
-                             outputs[3].get()});
+    auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
     mockup_layer->run();
 
     for (size_t i = 0; i < 4; ++i)
@@ -230,10 +228,7 @@ TEST(IPermuteFunction, float_to_float)
       outputs[i]->setBuffer(output_buffers[i].get());
     }
 
-    auto mockup_layer = std::make_unique<MockUpLayer>(
-      std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-      std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(),
-                             outputs[3].get()});
+    auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
     mockup_layer->run();
 
     for (size_t i = 0; i < 4; ++i)
@@ -278,10 +273,7 @@ TEST(IPermuteFunction, float_to_float)
       outputs[i]->setBuffer(output_buffers[i].get());
     }
 
-    auto mockup_layer = std::make_unique<MockUpLayer>(
-      std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-      std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(),
-                             outputs[3].get()});
+    auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
     mockup_layer->run();
 
     for (size_t i = 0; i < 4; ++i)
@@ -346,10 +338,7 @@ TEST(IPermuteFunction, float_to_float)
       outputs[i]->setBuffer(output_buffers[i].get());
     }
 
-    auto mockup_layer = std::make_unique<MockUpLayer>(
-      std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-      std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(),
-                             outputs[3].get()});
+    auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
     mockup_layer->run();
 
     for (size_t i = 0; i < 4; ++i)
@@ -417,9 +406,7 @@ TEST(IPermuteFunction, float_to_qasymm8)
     outputs[i]->setBuffer(output_buffers[i].get());
   }
 
-  auto mockup_layer = std::make_unique<MockUpLayer>(
-    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
   mockup_layer->run();
 
   for (size_t i = 0; i < 4; ++i)
@@ -470,9 +457,7 @@ TEST(IPermuteFunction, float_to_qsymm8)
     outputs[i]->setBuffer(output_buffers[i].get());
   }
 
-  auto mockup_layer = std::make_unique<MockUpLayer>(
-    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
   mockup_layer->run();
 
   for (size_t i = 0; i < 4; ++i)
@@ -523,9 +508,7 @@ TEST(IPermuteFunction, float_to_qsymm16)
     outputs[i]->setBuffer(output_buffers[i].get());
   }
 
-  auto mockup_layer = std::make_unique<MockUpLayer>(
-    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
   mockup_layer->run();
 
   for (size_t i = 0; i < 4; ++i)
@@ -585,9 +568,7 @@ TEST(IPermuteFunction, qasymm8_to_float)
     outputs[i]->setBuffer(output_buffers[i].get());
   }
 
-  auto mockup_layer = std::make_unique<MockUpLayer>(
-    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
   mockup_layer->run();
 
   for (size_t i = 0; i < 4; ++i)
@@ -647,9 +628,7 @@ TEST(IPermuteFunction, qsymm8_to_float)
     outputs[i]->setBuffer(output_buffers[i].get());
   }
 
-  auto mockup_layer = std::make_unique<MockUpLayer>(
-    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
   mockup_layer->run();
 
   for (size_t i = 0; i < 4; ++i)
@@ -709,9 +688,7 @@ TEST(IPermuteFunction, qsymm16_to_float)
     outputs[i]->setBuffer(output_buffers[i].get());
   }
 
-  auto mockup_layer = std::make_unique<MockUpLayer>(
-    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
   mockup_layer->run();
 
   for (size_t i = 0; i < 4; ++i)
@@ -782,10 +759,7 @@ TEST(IPermuteFunction, float_qasymm8_layout)
       outputs[i]->setBuffer(output_buffers[i].get());
     }
 
-    auto mockup_layer = std::make_unique<MockUpLayer>(
-      std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-      std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(),
-                             outputs[3].get()});
+    auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
     mockup_layer->run();
 
     for (size_t i = 0; i < 4; ++i)
@@ -880,10 +854,7 @@ TEST(IPermuteFunction, float_qasymm8_layout)
       outputs[i]->setBuffer(output_buffers[i].get());
     }
 
-    auto mockup_layer = std::make_unique<MockUpLayer>(
-      std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
-      std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(),
-                             outputs[3].get()});
+    auto mockup_layer = std::make_unique<MockUpLayer>(inputs, outputs);
     mockup_layer->run();
 
     for (size_t i = 0; i < 4; ++i)
diff --git a/runtime/onert/core/src/exec/ShapeConverter.cc b/runtime/onert/core/src/exec/ShapeConverter.cc
deleted file mode 100644
index 707aef29b30..00000000000
--- a/runtime/onert/core/src/exec/ShapeConverter.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ShapeConverter.h"
-
-namespace onert
-{
-namespace exec
-{
-
-ir::Shape convertShape(const ir::Shape &shape, ir::Layout src_layout, ir::Layout dst_layout)
-{
-  if (shape.rank() != 4)
-    return shape;
-
-  if (src_layout == dst_layout)
-    return shape;
-
-  if (src_layout == ir::Layout::NCHW && dst_layout == ir::Layout::NHWC)
-  {
-    const ir::Shape &src_NCHW = shape;
-    ir::Shape dst_NHWC(4);
-    dst_NHWC.dim(0) = src_NCHW.dim(0); // N
-    dst_NHWC.dim(1) = src_NCHW.dim(2); // H
-    dst_NHWC.dim(2) = src_NCHW.dim(3); // W
-    dst_NHWC.dim(3) = src_NCHW.dim(1); // C
-
-    return dst_NHWC;
-  }
-
-  if (src_layout == ir::Layout::NHWC && dst_layout == ir::Layout::NCHW)
-  {
-    const ir::Shape &src_NHWC = shape;
-    ir::Shape dst_NCHW(4);
-    dst_NCHW.dim(0) = src_NHWC.dim(0); // N
-    dst_NCHW.dim(1) = src_NHWC.dim(3); // C
-    dst_NCHW.dim(2) = src_NHWC.dim(1); // H
-    dst_NCHW.dim(3) = src_NHWC.dim(2); // W
-
-    return dst_NCHW;
-  }
-
-  throw std::runtime_error("Should not reach here");
-}
-
-} // namespace exec
-} // namespace onert
diff --git a/runtime/onert/core/src/exec/ShapeConverter.h b/runtime/onert/core/src/exec/ShapeConverter.h
deleted file mode 100644
index 7dc7e7536f1..00000000000
--- a/runtime/onert/core/src/exec/ShapeConverter.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_SHAPE_CONVERTER_H__
-#define __ONERT_EXEC_SHAPE_CONVERTER_H__
-
-#include <ir/Layout.h>
-#include <ir/Shape.h>
-
-namespace onert
-{
-namespace exec
-{
-
-/**
- * @brief Converts shape when its rank is 4
- *
- * @return ir::Shape Return a shape based on dst_layout. If rank is not 4, input shape is
- *         returned without conversion.
- */
-ir::Shape convertShape(const ir::Shape &shape, ir::Layout src_layout, ir::Layout dst_layout);
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_SHAPE_CONVERTER_H__
diff --git a/runtime/onert/core/src/exec/feature/MockTensor.test.h b/runtime/onert/core/src/exec/feature/MockTensor.test.h
index 1d2d375e2ae..bdddad99aae 100644
--- a/runtime/onert/core/src/exec/feature/MockTensor.test.h
+++ b/runtime/onert/core/src/exec/feature/MockTensor.test.h
@@ -47,7 +47,7 @@ template <typename T> class MockTensor : public onert::backend::ITensor
 
 public: // DUMMY methods
   size_t total_size() const override { return 0; }
-  onert::ir::Layout layout() const override { return _layout; }
+  onert::ir::Layout layout() const { return _layout; }
   onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; }
   float data_scale() const override { return 0; }
   int32_t data_zero_point() const override { return 0; }
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h
index e1a963cbdc6..0519b995718 100644
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -52,8 +52,6 @@ template <typename T> class Reader : public feature::Reader<T>
   Reader(backend::ITensor *tensor)
     : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
   {
-    assert(tensor->layout() == ir::Layout::NCHW);
-
     const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
     auto shape = tensor->getShape();
     _strides.W = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
index 3e3c431bfa5..eec0675553a 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -53,8 +53,6 @@ template <typename T> class Reader : public feature::Reader<T>
   Reader(const backend::ITensor *tensor)
     : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
   {
-    assert(tensor->layout() == ir::Layout::NHWC);
-
     const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
     auto shape = tensor->getShape();
     _strides.C = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
diff --git a/runtime/onert/core/src/ir/Coordinates.cc b/runtime/onert/core/src/ir/Coordinates.cc
index a02a56567ef..b8e1d442129 100644
--- a/runtime/onert/core/src/ir/Coordinates.cc
+++ b/runtime/onert/core/src/ir/Coordinates.cc
@@ -23,20 +23,23 @@ namespace onert
 namespace ir
 {
 
-Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
-                               Layout to_layout)
+Coordinates convertCoordinates(const Coordinates &from_coordinates, const PermuteType &type)
 {
   assert(from_coordinates.size() == 4);
   Coordinates to{from_coordinates};
-  if (from_layout == Layout::NHWC && to_layout == Layout::NCHW)
+  if (type == PermuteType::COPY)
+    return to;
+
+  if (type == PermuteType::NHWC_TO_NCHW)
   {
     to.set(0, from_coordinates[0]);
     to.set(1, from_coordinates[3]);
     to.set(2, from_coordinates[1]);
     to.set(3, from_coordinates[2]);
   }
-  else if (from_layout == Layout::NCHW && to_layout == Layout::NHWC)
+  else
   {
+    assert(type == PermuteType::NCHW_TO_NHWC);
     to.set(0, from_coordinates[0]);
     to.set(1, from_coordinates[2]);
     to.set(2, from_coordinates[3]);
diff --git a/runtime/onert/core/src/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.cc
index 1961aea5da1..ad199f2d5dd 100644
--- a/runtime/onert/core/src/ir/Shape.cc
+++ b/runtime/onert/core/src/ir/Shape.cc
@@ -66,29 +66,29 @@ uint64_t Shape::num_elements() const
                          std::multiplies<uint64_t>());
 }
 
-Shape permuteShape(const Shape &shape, Layout from, Layout to)
+Shape convertShape(const Shape &shape, const PermuteType &type)
 {
   assert(shape.rank() <= Shape::kMaxRank);
   Shape ret{shape};
-  if (from == to)
-    return ret;
-  if (shape.rank() < 4)
+
+  if (type == ir::PermuteType::COPY || shape.rank() < 4)
     return ret;
+
   // Permutation changing layout beyond 4-D is not supported yet
   assert(shape.rank() <= 4);
-  if (from == Layout::NHWC && to == Layout::NCHW)
+
+  if (type == ir::PermuteType::NHWC_TO_NCHW)
   {
     ret.dim(1) = shape.dim(3);
     ret.dim(2) = shape.dim(1);
     ret.dim(3) = shape.dim(2);
+    return ret;
   }
-  else if (from == Layout::NCHW && to == Layout::NHWC)
-  {
-    ret.dim(1) = shape.dim(2);
-    ret.dim(2) = shape.dim(3);
-    ret.dim(3) = shape.dim(1);
-  }
-  // Other cases(either `from` or `to` is UNKNOWN), just return the original shape
+
+  assert(type == ir::PermuteType::NCHW_TO_NHWC);
+  ret.dim(1) = shape.dim(2);
+  ret.dim(2) = shape.dim(3);
+  ret.dim(3) = shape.dim(1);
   return ret;
 }