diff --git a/compute/cker/include/cker/train/operation/ReLU6.h b/compute/cker/include/cker/train/operation/ReLU6.h
new file mode 100644
index 00000000000..1061a8d1a12
--- /dev/null
+++ b/compute/cker/include/cker/train/operation/ReLU6.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TRAIN_OPERATION_RELU6_H__
+#define __NNFW_CKER_TRAIN_OPERATION_RELU6_H__
+
+#include "cker/Shape.h"
+#include "cker/eigen/Utils.h"
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace train
+{
+
+inline void ReLU6Grad(const Shape &output_shape, const float *output_data,
+                      const Shape &incoming_shape, const float *incoming_data,
+                      const Shape &grad_shape, float *grad_data)
+{
+  const auto output_map = MapAsVector(output_data, output_shape);
+  const auto incoming_map = MapAsVector(incoming_data, incoming_shape);
+  auto grad_map = MapAsVector(grad_data, grad_shape);
+
+  if (output_shape == incoming_shape && output_shape == grad_shape)
+    grad_map.array() =
+      incoming_map.array() *
+      (0.0f < output_map.array() && output_map.array() < 6.0f).template cast<float>();
+  else
+    throw std::runtime_error("cker::ReLUGrad: Unsupported shape");
+}
+
+} // namespace train
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_TRAIN_OPERATION_RELU6_H__
diff --git a/compute/cker/src/train/Relu6.test.cc b/compute/cker/src/train/Relu6.test.cc
new file mode 100644
index 00000000000..2ab56b19cc7
--- /dev/null
+++ b/compute/cker/src/train/Relu6.test.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/ReLU6.h>
+#include <cker/train/operation/ReLU6.h>
+
+#include <gtest/gtest.h>
+#include <gtest/gtest-spi.h>
+#include <vector>
+
+namespace
+{
+
+using namespace nnfw::cker;
+
+template <typename T> class Relu6OpVerifier
+{
+public:
+  void verifyForward(const std::vector<T> &input, const std::vector<T> &expected_output)
+  {
+    assert(input.size() == expected_output.size());
+
+    std::vector<T> calc_output(input.size()); // calcuated output
+    ReLU6(Shape{static_cast<int>(input.size())}, input.data(), calc_output.data());
+
+    for (size_t i = 0; i < calc_output.size(); ++i)
+      ASSERT_EQ(expected_output[i], calc_output[i]);
+  }
+
+  void verifyBackward(const std::vector<T> &output, const std::vector<T> &input_bwd,
+                      const std::vector<T> &expected_output_bwd, bool expect_eq = true)
+  {
+    std::vector<T> calc_output_bwd(input_bwd.size()); // calculated output backward
+    train::ReLU6Grad(Shape{static_cast<int>(output.size())}, output.data(),
+                     Shape{static_cast<int>(input_bwd.size())}, input_bwd.data(),
+                     Shape{static_cast<int>(calc_output_bwd.size())}, calc_output_bwd.data());
+
+    if (expect_eq)
+      EXPECT_EQ(expected_output_bwd, calc_output_bwd);
+    else
+      EXPECT_NE(expected_output_bwd, calc_output_bwd);
+  }
+};
+
+} // namespace
+
+TEST(CKer_Operation, ReLU6)
+{
+  {
+    Relu6OpVerifier<float> verifier;
+
+    // clang-format off
+    // std::vector<float> input_fwd         = {-2.0,  -1.0,  2.0,  3.0,  6.0,  7.0};
+    std::vector<float> output_fwd           = { 0.0,   0.0,  2.0,  3.0,  6.0,  7.0};  
+    std::vector<float> input_bwd            = {-0.1,  -0.2,  0.3,  0.4, -0.1,  0.5};
+    std::vector<float> expected_output_bwd  = { 0.0,   0.0,  0.3,  0.4,  0.0,  0.0};
+    // clang-format on
+
+    verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd);
+  }
+
+  {
+    Relu6OpVerifier<float> verifier;
+
+    // clang-format off
+    // std::vector<float> input_fwd         = { 7.0,   8.0,  4.0, -4.0, -5.0, 10.0};
+    std::vector<float> output_fwd           = { 6.0,   6.0,  4.0,  0.0,  0.0,  6.0};  
+    std::vector<float> input_bwd            = {-6.1,  -3.3,  7.0,  8.4, -9.2,  0.0};
+    std::vector<float> expected_output_bwd  = { 0.0,   0.0,  7.0,  0.0,  0.0,  0.0};
+    // clang-format on
+
+    verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd);
+  }
+}
+
+TEST(CKer_Operation, neg_ReLU6)
+{
+  {
+    Relu6OpVerifier<float> verifier;
+
+    // clang-format off
+    // std::vector<float> input_fwd         = { 0.0,   2.0,  4.0,  6.0,  8.0, 10.0};
+    std::vector<float> output_fwd           = { 0.0,   2.0,  4.0,  6.0,  6.0,  6.0};  
+    std::vector<float> input_bwd            = { 0.1,   0.2,  0.3,  0.4,  0.5,  0.6};
+    std::vector<float> expected_output_bwd  = { 0.1,   0.2,  0.3,  0.4,  0.5,  0.6};  // wrong value
+    // clang-format on
+
+    verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd, false);
+  }
+
+  {
+    Relu6OpVerifier<float> verifier;
+
+    // clang-format off
+    // std::vector<float> input_fwd         = { 0.0,   2.0,  4.0,  6.0,  8.0, 10.0};
+    std::vector<float> output_fwd           = { 0.0,   2.0,  4.0,  6.0,  6.0,  6.0};  
+    std::vector<float> input_bwd            = { 0.1,   0.2,  0.3,  0.4};  // size mismatch
+    std::vector<float> expected_output_bwd  = { 0.0,   0.2,  0.3,  0.4};
+    // clang-format on
+
+    EXPECT_ANY_THROW(verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd, false));
+  }
+}
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
index e34cc7ceafc..a9091fb0033 100644
--- a/runtime/onert/backend/train/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
@@ -133,6 +133,10 @@ void ConvolutionLayer::backward()
 void ConvolutionLayer::backwardFloat32()
 {
   // Calculate gradient for activation
+  const IPortableTensor *backprop_act = getFusedActivationBackprop(
+    _activation, _output, _back_prop_output, _act_back_prop_output.get());
+
+  /*
   const IPortableTensor *backprop_act;
   switch (_activation)
   {
@@ -148,7 +152,7 @@ void ConvolutionLayer::backwardFloat32()
       break;
     default:
       throw std::runtime_error("train ConvolutionLayer: Unsupported activation type yet");
-  }
+  }*/
 
   // Initialize conv params for training kernels
   nnfw::cker::ConvParams conv_train_params;
diff --git a/runtime/onert/backend/train/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/train/ops/ElementwiseActivationLayer.cc
index a7a4d412576..b8e896da5ca 100644
--- a/runtime/onert/backend/train/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/train/ops/ElementwiseActivationLayer.cc
@@ -19,6 +19,7 @@
 #include "OperationUtils.h"
 
 #include <cker/train/operation/ReLU.h>
+#include <cker/train/operation/ReLU6.h>
 
 namespace onert
 {
@@ -54,22 +55,27 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
     case ElementwiseActivationType::kReLU:
       if (input->data_type() == OperandType::FLOAT32)
       {
-        if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+        // configure
+        if ((alpha == std::numeric_limits<float>::infinity() || alpha == 6.0f) && beta == 0.f)
         {
           cpu::ops::ElementwiseActivationLayer::configure(
             input, output, alpha, beta, cpu::ops::ElementwiseActivationType::kReLU);
 
-          _backward_kernel = [](const IPortableTensor *output, const IPortableTensor *incoming,
-                                IPortableTensor *outgoing) {
-            nnfw::cker::train::ReLUGrad(getShape(output), getBuffer<float>(output),
-                                        getShape(incoming), getBuffer<float>(incoming),
-                                        getShape(outgoing), getBuffer<float>(outgoing));
+          auto relu_cker = nnfw::cker::train::ReLUGrad;
+          if (alpha == 6.0)
+            relu_cker = nnfw::cker::train::ReLU6Grad;
+
+          _backward_kernel = [&relu_cker](const IPortableTensor *output,
+                                          const IPortableTensor *incoming,
+                                          IPortableTensor *outgoing) {
+            relu_cker(getShape(output), getBuffer<float>(output), getShape(incoming),
+                      getBuffer<float>(incoming), getShape(outgoing), getBuffer<float>(outgoing));
           };
         }
         else
         {
           throw std::runtime_error("train ElementwiseActivationLayer : This layer does not "
-                                   "suppport other ReLU except for ReLU(0-inf)");
+                                   "suppport other ReLU except for ReLU(0-inf) and ReLU6(0-6)");
         }
       }
       else
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
index 539a759307e..df23efd8ad2 100644
--- a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
@@ -133,6 +133,11 @@ void FullyConnectedLayer::backward()
 void FullyConnectedLayer::backwardFloat32()
 {
   // Calculate gradient for activation
+
+  const IPortableTensor *backprop_act = getFusedActivationBackprop(
+    _activation, _output, _back_prop_output, _act_back_prop_output.get());
+
+  /*
   const IPortableTensor *backprop_act;
   switch (_activation)
   {
@@ -149,6 +154,7 @@ void FullyConnectedLayer::backwardFloat32()
     default:
       throw std::runtime_error("train FullyConnectedLayer: Unsupported activation type yet");
   }
+  */
 
   // Initialize TransposeParams
   nnfw::cker::TransposeParams transpose_param;
diff --git a/runtime/onert/backend/train/ops/OperationUtils.cc b/runtime/onert/backend/train/ops/OperationUtils.cc
new file mode 100644
index 00000000000..270b0a839f1
--- /dev/null
+++ b/runtime/onert/backend/train/ops/OperationUtils.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+#include <cker/train/operation/ReLU.h>
+#include <cker/train/operation/ReLU6.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+const IPortableTensor *getFusedActivationBackprop(const ir::Activation &activation,
+                                                  const IPortableTensor *output,
+                                                  const IPortableTensor *input_backprop,
+                                                  IPortableTensor *output_backprop)
+{
+  const IPortableTensor *res;
+
+  switch (activation)
+  {
+    case ir::Activation::NONE:
+      res = input_backprop;
+      break;
+    case ir::Activation::RELU:
+      nnfw::cker::train::ReLUGrad(getShape(output), getBuffer<float>(output),
+                                  getShape(input_backprop), getBuffer<float>(input_backprop),
+                                  getShape(output_backprop), getBuffer<float>(output_backprop));
+      res = output_backprop;
+      break;
+    case ir::Activation::RELU6:
+      nnfw::cker::train::ReLU6Grad(getShape(output), getBuffer<float>(output),
+                                   getShape(input_backprop), getBuffer<float>(input_backprop),
+                                   getShape(output_backprop), getBuffer<float>(output_backprop));
+      res = output_backprop;
+      break;
+    default:
+      throw std::runtime_error("Unsupported activation type yet");
+  }
+
+  return res;
+}
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/OperationUtils.h b/runtime/onert/backend/train/ops/OperationUtils.h
index 470b0fd91c9..da07c4cc9db 100644
--- a/runtime/onert/backend/train/ops/OperationUtils.h
+++ b/runtime/onert/backend/train/ops/OperationUtils.h
@@ -36,6 +36,27 @@ using cpu::ops::getNumberOfDimensions;
 using cpu::ops::getNumberOfElements;
 using cpu::ops::getSizeOfDimension;
 
+/**
+ * @brief calcuate fused acitvation back propagation
+ *
+ *             -- forward direction -->
+ *
+ *   [ current layer ]   ----   [next layer ]
+ *   [ op    |  act  ]
+ *
+ *             <-- backward direction --
+ *
+ * @param activation      activation of current layer
+ * @param output          forward direction's output of current layer
+ * @param input_backprop  backward direction's from next layer, incoming gradient from next layer
+ * @param output_backprop backward direction's output of activation, outcoming gradient of
+ * acitvation
+ */
+const IPortableTensor *getFusedActivationBackprop(const ir::Activation &activation,
+                                                  const IPortableTensor *output,
+                                                  const IPortableTensor *input_backprop,
+                                                  IPortableTensor *output_backprop);
+
 } // namespace ops
 } // namespace train
 } // namespace backend
diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
index 96b14fc81e6..b1ddf0322ae 100644
--- a/runtime/onert/backend/train/ops/PoolLayer.cc
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -104,6 +104,9 @@ class MaxPool2D final : public TrainingKernelRegistry
     assert(back_prop_out->layout() == ir::Layout::NHWC);
 
     // activation bacward
+    back_prop_out =
+      getFusedActivationBackprop(_activation, _output, back_prop_out, _act_back_prop_output.get());
+    /*
     switch (_activation)
     {
       case ir::Activation::NONE:
@@ -118,6 +121,7 @@ class MaxPool2D final : public TrainingKernelRegistry
       default:
         throw std::runtime_error("PoolLayer: Unsupported activation type yet");
     }
+    */
 
     // maxpool baackward
     auto arg_max_index = _arg_max_index.get();