diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp
index 467c2e36f9..a952ea8a54 100644
--- a/nntrainer/tensor/float_tensor.cpp
+++ b/nntrainer/tensor/float_tensor.cpp
@@ -162,12 +162,19 @@ const void *FloatTensor::getAddress(unsigned int i) const {
   return &((float *)getData())[i];
 }
 
-const float FloatTensor::getValue(unsigned int i) const {
+const float &FloatTensor::getValue(unsigned int i) const {
   return ((float *)getData())[i];
 }
 
-const float FloatTensor::getValue(unsigned int b, unsigned int c,
-                                  unsigned int h, unsigned int w) const {
+float &FloatTensor::getValue(unsigned int i) { return ((float *)getData())[i]; }
+
+const float &FloatTensor::getValue(unsigned int b, unsigned int c,
+                                   unsigned int h, unsigned int w) const {
+  return getValue(getIndex(b, c, h, w));
+}
+
+float &FloatTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
+                             unsigned int w) {
   return getValue(getIndex(b, c, h, w));
 }
 
@@ -896,6 +903,191 @@ void FloatTensor::zoneout_mask(TensorV2 &opposite, float zoneout) {
   }
 }
 
+std::vector<TensorV2> FloatTensor::split(std::vector<size_t> sizes, int axis) {
+  size_t num_size = sizes.size();
+
+  if (axis == -1) {
+    axis = 3;
+  }
+
+  size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0);
+  NNTR_THROW_IF(dim.getTensorDim(axis) != total_size, std::invalid_argument)
+    << "given sum of sizes did not match with origin tensor dim, tensor dim: "
+    << dim.getTensorDim(axis) << " total size: " << total_size;
+
+  std::vector<TensorDim> ret_dims;
+  ret_dims.reserve(num_size);
+  for (unsigned int i = 0; i < num_size; ++i) {
+    ret_dims[i] = dim;
+    ret_dims[i].setTensorDim(axis, sizes[i]);
+  }
+
+  bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false;
+  std::vector<TensorV2> ret;
+
+  auto iter_value = [this, is_format_nchw](
+                      std::array<size_t, 4> &loc,
+                      const std::array<size_t, 4> &end_loc,
+                      const std::array<size_t, 4> &reset_dim_arr) -> float & {
+    auto &value = (is_format_nchw) ? getValue(loc[0], loc[1], loc[2], loc[3])
+                                   : getValue(loc[0], loc[3], loc[1], loc[2]);
+    for (int i = 3; i >= 0; --i) {
+      loc[i]++;
+      if (loc[i] == end_loc[i]) {
+        loc[i] -= reset_dim_arr[i];
+        continue;
+      }
+      break;
+    }
+    return value;
+  };
+
+  ret.reserve(num_size);
+
+  unsigned int accumulated_size = 0;
+  for (unsigned int i = 0; i < num_size; ++i) {
+    std::array<size_t, 4> loc = {0, 0, 0, 0};
+
+    if (is_format_nchw) {
+      loc[axis] += accumulated_size;
+    } else {
+      if (axis == 0) {
+        loc[0] += accumulated_size;
+      } else if (axis == 1) {
+        loc[3] += accumulated_size;
+      } else if (axis == 2 || axis == 3) {
+        loc[axis - 1] += accumulated_size;
+      }
+    }
+
+    ret.emplace_back(ret_dims[i]);
+    auto &ret_t = ret.back();
+
+    std::array<size_t, 4> end_loc;
+
+    if (is_format_nchw) {
+      end_loc = {ret_dims[i].batch(), ret_dims[i].channel(),
+                 ret_dims[i].height(), ret_dims[i].width()};
+    } else {
+      end_loc = {ret_dims[i].batch(), ret_dims[i].height(), ret_dims[i].width(),
+                 ret_dims[i].channel()};
+    }
+
+    accumulated_size += sizes[i];
+
+    if (is_format_nchw) {
+      end_loc[axis] = accumulated_size;
+    } else {
+      if (axis == 0) {
+        end_loc[0] = accumulated_size;
+      } else if (axis == 1) {
+        end_loc[3] = accumulated_size;
+      } else if (axis == 2 || axis == 3) {
+        end_loc[axis - 1] = accumulated_size;
+      }
+    }
+
+    std::array<size_t, 4> reset_dim_arr;
+    if (is_format_nchw) {
+      reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].channel(),
+                       ret_dims[i].height(), ret_dims[i].width()};
+    } else {
+      reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].height(),
+                       ret_dims[i].width(), ret_dims[i].channel()};
+    }
+
+    ret_t.apply_i<float>(
+      [&iter_value, &loc, &end_loc, &reset_dim_arr](float _) {
+        return iter_value(loc, end_loc, reset_dim_arr);
+      });
+  }
+
+  return ret;
+}
+
+TensorV2 FloatTensor::cat(const std::vector<TensorV2> &tensors, int axis) {
+  if (axis == -1) {
+    axis = 3;
+  }
+
+  TensorV2 ret;
+  auto ref_dim = tensors.front().getDim();
+  bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW);
+  ref_dim.setTensorDim(axis, 1);
+  NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(),
+                             [&ref_dim, axis](const TensorV2 &t) {
+                               auto cur_dim = t.getDim();
+                               cur_dim.setTensorDim(axis, 1);
+                               return ref_dim == cur_dim;
+                             }),
+                std::invalid_argument)
+    << " all tensor must have the same dimension except for the axis, ref_dim: "
+    << ref_dim << " axis : " << axis;
+
+  auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u,
+                                  [axis](unsigned cur, const TensorV2 &t) {
+                                    return cur += t.getDim().getTensorDim(axis);
+                                  });
+  auto iter_value =
+    [is_format_nchw](std::array<unsigned, 4> &loc,
+                     const std::array<unsigned, 4> &start_loc, TensorV2 &t,
+                     const std::array<unsigned, 4> &ref_dim_arr) -> float & {
+    auto &value = is_format_nchw
+                    ? t.getValue<float>(loc[0], loc[1], loc[2], loc[3])
+                    : t.getValue<float>(loc[0], loc[3], loc[1], loc[2]);
+
+    for (int i = 3; i >= 0; --i) {
+      loc[i]++;
+      if (loc[i] - start_loc[i] == ref_dim_arr[i]) {
+        loc[i] = start_loc[i];
+        continue;
+      }
+      break;
+    }
+    return value;
+  };
+
+  auto ret_dim = ref_dim;
+  ret_dim.setTensorDim(axis, axis_dim);
+
+  ret = TensorV2(ret_dim);
+
+  std::array<unsigned, 4> loc = {0, 0, 0, 0};
+  for (auto &t : tensors) {
+    std::array<unsigned, 4> start_loc = loc;
+    std::array<unsigned, 4> tensor_dim_arr;
+    if (is_format_nchw) {
+      tensor_dim_arr[0] = t.getDim().getTensorDim(0);
+      tensor_dim_arr[1] = t.getDim().getTensorDim(1);
+      tensor_dim_arr[2] = t.getDim().getTensorDim(2);
+      tensor_dim_arr[3] = t.getDim().getTensorDim(3);
+    } else {
+      tensor_dim_arr[0] = t.getDim().getTensorDim(0);
+      tensor_dim_arr[1] = t.getDim().getTensorDim(2);
+      tensor_dim_arr[2] = t.getDim().getTensorDim(3);
+      tensor_dim_arr[3] = t.getDim().getTensorDim(1);
+    }
+
+    for (size_t i = 0u, sz = t.size(); i < sz; ++i) {
+      iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue<float>(i);
+    }
+
+    if (is_format_nchw) {
+      loc[axis] += t.getDim().getTensorDim(axis);
+    } else {
+      if (axis == 0) {
+        loc[0] += t.getDim().getTensorDim(axis);
+      } else if (axis == 1) {
+        loc[3] += t.getDim().getTensorDim(axis);
+      } else if (axis == 2 || axis == 3) {
+        loc[axis - 1] += t.getDim().getTensorDim(axis);
+      }
+    }
+  }
+
+  return ret;
+}
+
 void FloatTensor::print(std::ostream &out) const {
   printInstance(out, this);
   const float *data = (float *)getData();
diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h
index c8f239ef26..7f27788e07 100644
--- a/nntrainer/tensor/float_tensor.h
+++ b/nntrainer/tensor/float_tensor.h
@@ -119,7 +119,13 @@ class FloatTensor : public TensorBase {
    * @brief     return value at specific location
    * @param[in] i index
    */
-  const float getValue(unsigned int i) const;
+  const float &getValue(unsigned int i) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] i index
+   */
+  float &getValue(unsigned int i);
 
   /**
    * @brief     return value at specific location
@@ -128,8 +134,18 @@ class FloatTensor : public TensorBase {
    * @param[in] h height location
    * @param[in] w width location
    */
-  const float getValue(unsigned int b, unsigned int c, unsigned int h,
-                       unsigned int w) const;
+  const float &getValue(unsigned int b, unsigned int c, unsigned int h,
+                        unsigned int w) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] b batch location
+   * @param[in] c channel location
+   * @param[in] h height location
+   * @param[in] w width location
+   */
+  float &getValue(unsigned int b, unsigned int c, unsigned int h,
+                  unsigned int w);
 
   /**
    * @copydoc TensorV2::setValue(float value)
@@ -302,6 +318,16 @@ class FloatTensor : public TensorBase {
    */
   void zoneout_mask(TensorV2 &opposite, float zoneout) override;
 
+  /**
+   * @copydoc TensorV2::split(std::vector<size_t> sizes, int axis)
+   */
+  std::vector<TensorV2> split(std::vector<size_t> sizes, int axis) override;
+
+  /**
+   * @copydoc TensorV2::cat(const std::vector<TensorV2> &tensors, int axis)
+   */
+  static TensorV2 cat(const std::vector<TensorV2> &tensors, int axis);
+
   /**
    * @copydoc TensorV2::copy(const TensorV2 &from)
    */
diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp
index 5902c8b6ac..14b752b7f1 100644
--- a/nntrainer/tensor/half_tensor.cpp
+++ b/nntrainer/tensor/half_tensor.cpp
@@ -162,12 +162,19 @@ const void *HalfTensor::getAddress(unsigned int i) const {
   return &((_FP16 *)getData())[i];
 }
 
-const _FP16 HalfTensor::getValue(unsigned int i) const {
+const _FP16 &HalfTensor::getValue(unsigned int i) const {
   return ((_FP16 *)getData())[i];
 }
 
-const _FP16 HalfTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
-                                 unsigned int w) const {
+_FP16 &HalfTensor::getValue(unsigned int i) { return ((_FP16 *)getData())[i]; }
+
+const _FP16 &HalfTensor::getValue(unsigned int b, unsigned int c,
+                                  unsigned int h, unsigned int w) const {
+  return getValue(getIndex(b, c, h, w));
+}
+
+_FP16 &HalfTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
+                            unsigned int w) {
   return getValue(getIndex(b, c, h, w));
 }
 
@@ -755,6 +762,189 @@ void HalfTensor::zoneout_mask(TensorV2 &opposite, float zoneout) {
   }
 }
 
+std::vector<TensorV2> HalfTensor::split(std::vector<size_t> sizes, int axis) {
+  size_t num_size = sizes.size();
+
+  if (axis == -1) {
+    axis = 3;
+  }
+
+  size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0);
+  NNTR_THROW_IF(dim.getTensorDim(axis) != total_size, std::invalid_argument)
+    << "given sum of sizes did not match with origin tensor dim, tensor dim: "
+    << dim.getTensorDim(axis) << " total size: " << total_size;
+
+  std::vector<TensorDim> ret_dims;
+  ret_dims.reserve(num_size);
+  for (unsigned int i = 0; i < num_size; ++i) {
+    ret_dims[i] = dim;
+    ret_dims[i].setTensorDim(axis, sizes[i]);
+  }
+
+  bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false;
+  std::vector<TensorV2> ret;
+
+  auto iter_value = [this, is_format_nchw](
+                      std::array<size_t, 4> &loc,
+                      const std::array<size_t, 4> &end_loc,
+                      const std::array<size_t, 4> &reset_dim_arr) -> _FP16 & {
+    auto &value = (is_format_nchw) ? getValue(loc[0], loc[1], loc[2], loc[3])
+                                   : getValue(loc[0], loc[3], loc[1], loc[2]);
+    for (int i = 3; i >= 0; --i) {
+      loc[i]++;
+      if (loc[i] == end_loc[i]) {
+        loc[i] -= reset_dim_arr[i];
+        continue;
+      }
+      break;
+    }
+    return value;
+  };
+
+  ret.reserve(num_size);
+
+  unsigned int accumulated_size = 0;
+  for (unsigned int i = 0; i < num_size; ++i) {
+    std::array<size_t, 4> loc = {0, 0, 0, 0};
+
+    if (is_format_nchw) {
+      loc[axis] += accumulated_size;
+    } else {
+      if (axis == 0) {
+        loc[0] += accumulated_size;
+      } else if (axis == 1) {
+        loc[3] += accumulated_size;
+      } else if (axis == 2 || axis == 3) {
+        loc[axis - 1] += accumulated_size;
+      }
+    }
+
+    ret.emplace_back(ret_dims[i]);
+    auto &ret_t = ret.back();
+
+    std::array<size_t, 4> end_loc;
+
+    if (is_format_nchw) {
+      end_loc = {ret_dims[i].batch(), ret_dims[i].channel(),
+                 ret_dims[i].height(), ret_dims[i].width()};
+    } else {
+      end_loc = {ret_dims[i].batch(), ret_dims[i].height(), ret_dims[i].width(),
+                 ret_dims[i].channel()};
+    }
+
+    accumulated_size += sizes[i];
+
+    if (is_format_nchw) {
+      end_loc[axis] = accumulated_size;
+    } else {
+      if (axis == 0) {
+        end_loc[0] = accumulated_size;
+      } else if (axis == 1) {
+        end_loc[3] = accumulated_size;
+      } else if (axis == 2 || axis == 3) {
+        end_loc[axis - 1] = accumulated_size;
+      }
+    }
+
+    std::array<size_t, 4> reset_dim_arr;
+    if (is_format_nchw) {
+      reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].channel(),
+                       ret_dims[i].height(), ret_dims[i].width()};
+    } else {
+      reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].height(),
+                       ret_dims[i].width(), ret_dims[i].channel()};
+    }
+
+    ret_t.apply_i<_FP16>(
+      [&iter_value, &loc, &end_loc, &reset_dim_arr](_FP16 _) {
+        return iter_value(loc, end_loc, reset_dim_arr);
+      });
+  }
+
+  return ret;
+}
+
+TensorV2 HalfTensor::cat(const std::vector<TensorV2> &tensors, int axis) {
+  if (axis == -1) {
+    axis = 3;
+  }
+  TensorV2 ret;
+  auto ref_dim = tensors.front().getDim();
+  bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW);
+  ref_dim.setTensorDim(axis, 1);
+  NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(),
+                             [&ref_dim, axis](const TensorV2 &t) {
+                               auto cur_dim = t.getDim();
+                               cur_dim.setTensorDim(axis, 1);
+                               return ref_dim == cur_dim;
+                             }),
+                std::invalid_argument)
+    << " all tensor must have the same dimension except for the axis, ref_dim: "
+    << ref_dim << " axis : " << axis;
+
+  auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u,
+                                  [axis](unsigned cur, const TensorV2 &t) {
+                                    return cur += t.getDim().getTensorDim(axis);
+                                  });
+  auto iter_value =
+    [is_format_nchw](std::array<unsigned, 4> &loc,
+                     const std::array<unsigned, 4> &start_loc, TensorV2 &t,
+                     const std::array<unsigned, 4> &ref_dim_arr) -> _FP16 & {
+    auto &value = is_format_nchw
+                    ? t.getValue<_FP16>(loc[0], loc[1], loc[2], loc[3])
+                    : t.getValue<_FP16>(loc[0], loc[3], loc[1], loc[2]);
+
+    for (int i = 3; i >= 0; --i) {
+      loc[i]++;
+      if (loc[i] - start_loc[i] == ref_dim_arr[i]) {
+        loc[i] = start_loc[i];
+        continue;
+      }
+      break;
+    }
+    return value;
+  };
+
+  auto ret_dim = ref_dim;
+  ret_dim.setTensorDim(axis, axis_dim);
+
+  ret = TensorV2(ret_dim);
+
+  std::array<unsigned, 4> loc = {0, 0, 0, 0};
+  for (auto &t : tensors) {
+    std::array<unsigned, 4> start_loc = loc;
+    std::array<unsigned, 4> tensor_dim_arr;
+    if (is_format_nchw) {
+      tensor_dim_arr[0] = t.getDim().getTensorDim(0);
+      tensor_dim_arr[1] = t.getDim().getTensorDim(1);
+      tensor_dim_arr[2] = t.getDim().getTensorDim(2);
+      tensor_dim_arr[3] = t.getDim().getTensorDim(3);
+    } else {
+      tensor_dim_arr[0] = t.getDim().getTensorDim(0);
+      tensor_dim_arr[1] = t.getDim().getTensorDim(2);
+      tensor_dim_arr[2] = t.getDim().getTensorDim(3);
+      tensor_dim_arr[3] = t.getDim().getTensorDim(1);
+    }
+
+    for (size_t i = 0u, sz = t.size(); i < sz; ++i) {
+      iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue<_FP16>(i);
+    }
+
+    if (is_format_nchw) {
+      loc[axis] += t.getDim().getTensorDim(axis);
+    } else {
+      if (axis == 0) {
+        loc[0] += t.getDim().getTensorDim(axis);
+      } else if (axis == 1) {
+        loc[3] += t.getDim().getTensorDim(axis);
+      } else if (axis == 2 || axis == 3) {
+        loc[axis - 1] += t.getDim().getTensorDim(axis);
+      }
+    }
+  }
+  return ret;
+}
+
 void HalfTensor::print(std::ostream &out) const {
   printInstance(out, this);
   const _FP16 *data = (_FP16 *)getData();
diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h
index a905ba4bb8..3dc3a081b9 100644
--- a/nntrainer/tensor/half_tensor.h
+++ b/nntrainer/tensor/half_tensor.h
@@ -118,7 +118,13 @@ class HalfTensor : public TensorBase {
    * @brief     return value at specific location
    * @param[in] idx location
    */
-  const _FP16 getValue(unsigned int i) const;
+  const _FP16 &getValue(unsigned int i) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] idx location
+   */
+  _FP16 &getValue(unsigned int i);
 
   /**
    * @brief     return value at specific location
@@ -127,8 +133,18 @@ class HalfTensor : public TensorBase {
    * @param[in] h height location
    * @param[in] w width location
    */
-  const _FP16 getValue(unsigned int b, unsigned int c, unsigned int h,
-                       unsigned int w) const;
+  const _FP16 &getValue(unsigned int b, unsigned int c, unsigned int h,
+                        unsigned int w) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] b batch location
+   * @param[in] c channel location
+   * @param[in] h height location
+   * @param[in] w width location
+   */
+  _FP16 &getValue(unsigned int b, unsigned int c, unsigned int h,
+                  unsigned int w);
 
   /**
    * @copydoc TensorV2::setValue(float value)
@@ -301,6 +317,16 @@ class HalfTensor : public TensorBase {
    */
   void zoneout_mask(TensorV2 &opposite, float zoneout) override;
 
+  /**
+   * @copydoc TensorV2::split(std::vector<size_t> sizes, int axis)
+   */
+  std::vector<TensorV2> split(std::vector<size_t> sizes, int axis) override;
+
+  /**
+   * @copydoc TensorV2::cat(const std::vector<TensorV2> &tensors, int axis)
+   */
+  static TensorV2 cat(const std::vector<TensorV2> &tensors, int axis);
+
   /**
    * @copydoc TensorV2::copy(const TensorV2 &from)
    */
diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h
index 28484ed9e9..a8a3fff017 100644
--- a/nntrainer/tensor/tensor_base.h
+++ b/nntrainer/tensor/tensor_base.h
@@ -327,6 +327,11 @@ class TensorBase {
    */
   virtual void zoneout_mask(TensorV2 &opposite, float zoneout) = 0;
 
+  /**
+   * @copydoc TensorV2::split(std::vector<size_t> sizes, int axis)
+   */
+  virtual std::vector<TensorV2> split(std::vector<size_t> sizes, int axis) = 0;
+
   /**
    * @copydoc TensorV2::print(std::ostream &out)
    */
diff --git a/nntrainer/tensor/tensor_v2.cpp b/nntrainer/tensor/tensor_v2.cpp
index d37c6b2371..98fd095e6b 100644
--- a/nntrainer/tensor/tensor_v2.cpp
+++ b/nntrainer/tensor/tensor_v2.cpp
@@ -669,6 +669,69 @@ void TensorV2::zoneout_mask(TensorV2 &opposite, float zoneout) {
   itensor->zoneout_mask(opposite, zoneout);
 }
 
+std::vector<TensorV2> TensorV2::split(unsigned num_size, int axis) {
+  NNTR_THROW_IF(num_size == 0, std::invalid_argument)
+    << "num size cannot be zero";
+
+  if (axis == -1) {
+    axis = 3;
+  }
+
+  NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument)
+    << "cannot split axis of axis: " << axis;
+
+  NNTR_THROW_IF(getDim().getTensorDim(axis) % num_size != 0,
+                std::invalid_argument)
+    << "axis is not divisible by num_size, axis: " << axis
+    << " num size: " << num_size;
+
+  std::vector<size_t> sizes;
+  sizes.resize(num_size);
+
+  unsigned int sz = getDim().getTensorDim(axis) / num_size;
+  std::fill(sizes.begin(), sizes.end(), sz);
+
+  return split(sizes, axis);
+}
+
+std::vector<TensorV2> TensorV2::split(std::vector<size_t> sizes, int axis) {
+  NNTR_THROW_IF(sizes.size() == 0, std::invalid_argument)
+    << "num size cannot be zero";
+
+  NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument)
+    << "cannot split axis of axis: " << axis;
+
+  NNTR_THROW_IF(
+    std::any_of(sizes.begin(), sizes.end(), [](size_t sz) { return !sz; }),
+    std::invalid_argument)
+    << "among given sizes at least one of size is 0";
+
+  return itensor->split(sizes, axis);
+}
+
+TensorV2 TensorV2::cat(const std::vector<TensorV2> &tensors, int axis) {
+  NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument)
+    << "cannot split axis of axis: " << axis;
+
+  NNTR_THROW_IF(tensors.empty(), std::invalid_argument)
+    << "given tensor vector is empty";
+
+  TensorV2 output;
+  Tdatatype dtype = tensors.front().getDim().getDataType();
+
+  if (dtype == Tdatatype::FP32) {
+    output = FloatTensor::cat(tensors, axis);
+  } else if (dtype == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    output = HalfTensor::cat(tensors, axis);
+#else
+    throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+  }
+
+  return output;
+}
+
 void TensorV2::print(std::ostream &out) const { itensor->print(out); }
 
 void TensorV2::putData() const { itensor->putData(); }
diff --git a/nntrainer/tensor/tensor_v2.h b/nntrainer/tensor/tensor_v2.h
index ba0bf36a4d..9c1171adb9 100644
--- a/nntrainer/tensor/tensor_v2.h
+++ b/nntrainer/tensor/tensor_v2.h
@@ -1010,6 +1010,35 @@ class TensorV2 {
    */
   void zoneout_mask(TensorV2 &opposite, float zoneout);
 
+  /**
+   * @brief split tensor along axis.
+   *
+   * @param num_size num_size
+   * @param axis axis
+   * @return Tensor splitted tensor
+   */
+  std::vector<TensorV2> split(unsigned num_size, int axis = 0);
+
+  /**
+   * @brief split tensor along axis.
+   *
+   * @param sizes sizes
+   * @param axis axis
+   * @return Tensor splitted tensor
+   * @note if the given array sizes is just a 1 unsigned int value, assumes that
+   * it divide tensor by given size evenly
+   */
+  std::vector<TensorV2> split(std::vector<size_t> sizes, int axis = 0);
+
+  /**
+   * @brief concatenate tensors along axis
+   *
+   * @param tensors tensors to be concatenated to the first tensor
+   * @param axis axis
+   * @return Tensor concatenated tensor
+   */
+  static TensorV2 cat(const std::vector<TensorV2> &tensors, int axis = 0);
+
   /**
    * @brief     Print element
    * @param[in] out out stream