From fcf7762b7e471faa67ffee916271c048f01901e1 Mon Sep 17 00:00:00 2001 From: sseung Date: Tue, 2 Jan 2024 17:09:53 +0900 Subject: [PATCH] Draft: [onert] support ReLU6 training This draft is to support ReLU6 training feature. - Add ReLU6Grad cker - Extract fused activation gradient calculation part to reuse ONE-DCO-1.0-Signed-off-by: SeungHui Youn --- .../cker/include/cker/train/operation/ReLU6.h | 51 ++++++++ compute/cker/src/train/Relu6.test.cc | 116 ++++++++++++++++++ .../backend/train/ops/ConvolutionLayer.cc | 6 +- .../backend/train/ops/FullyConnectedLayer.cc | 6 + .../onert/backend/train/ops/OperationUtils.cc | 68 ++++++++++ .../onert/backend/train/ops/OperationUtils.h | 21 ++++ runtime/onert/backend/train/ops/PoolLayer.cc | 4 + 7 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 compute/cker/include/cker/train/operation/ReLU6.h create mode 100644 compute/cker/src/train/Relu6.test.cc create mode 100644 runtime/onert/backend/train/ops/OperationUtils.cc diff --git a/compute/cker/include/cker/train/operation/ReLU6.h b/compute/cker/include/cker/train/operation/ReLU6.h new file mode 100644 index 00000000000..1061a8d1a12 --- /dev/null +++ b/compute/cker/include/cker/train/operation/ReLU6.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_TRAIN_OPERATION_RELU6_H__ +#define __NNFW_CKER_TRAIN_OPERATION_RELU6_H__ + +#include "cker/Shape.h" +#include "cker/eigen/Utils.h" +#include + +namespace nnfw +{ +namespace cker +{ +namespace train +{ + +inline void ReLU6Grad(const Shape &output_shape, const float *output_data, + const Shape &incoming_shape, const float *incoming_data, + const Shape &grad_shape, float *grad_data) +{ + const auto output_map = MapAsVector(output_data, output_shape); + const auto incoming_map = MapAsVector(incoming_data, incoming_shape); + auto grad_map = MapAsVector(grad_data, grad_shape); + + if (output_shape == incoming_shape && output_shape == grad_shape) + grad_map.array() = + incoming_map.array() * + (0.0f < output_map.array() && output_map.array() < 6.0f).template cast(); + else + throw std::runtime_error("cker::ReLUGrad: Unsupported shape"); +} + +} // namespace train +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_TRAIN_OPERATION_RELU6_H__ diff --git a/compute/cker/src/train/Relu6.test.cc b/compute/cker/src/train/Relu6.test.cc new file mode 100644 index 00000000000..f3d8dd78c2a --- /dev/null +++ b/compute/cker/src/train/Relu6.test.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +namespace +{ + +using namespace nnfw::cker; + +template class Relu6OpVerifier +{ +public: + void verifyForward(const std::vector &input, const std::vector &expected_output) + { + assert(input.size() == expected_output.size()); + + std::vector calc_output(input.size()); // calcuated output + ReLU6(Shape{static_cast(input.size())}, input.data(), calc_output.data()); + + for (size_t i = 0; i < calc_output.size(); ++i) + ASSERT_EQ(expected_output[i], calc_output[i]); + } + + void verifyBackward(const std::vector &output, const std::vector &input_bwd, + const std::vector &expected_output_bwd, bool expect_eq = false) + { + std::vector calc_output_bwd(input_bwd.size()); // calculated output backward + train::ReLU6Grad(Shape{static_cast(output.size())}, output.data(), + Shape{static_cast(input_bwd.size())}, input_bwd.data(), + Shape{static_cast(calc_output_bwd.size())}, calc_output_bwd.data()); + + if (expect_eq) + EXPECT_EQ(expected_output_bwd, calc_output_bwd); + else + EXPECT_NE(expected_output_bwd, calc_output_bwd); + } +}; + +} // namespace + +TEST(CKer_Operation, ReLU6) +{ + { + Relu6OpVerifier verifier; + + // clang-format off + // std::vector input_fwd = {-2.0, -1.0, 2.0, 3.0, 6.0, 7.0}; + std::vector output_fwd = { 0.0, 0.0, 2.0, 3.0, 6.0, 7.0}; + std::vector input_bwd = {-0.1, -0.2, 0.3, 0.4, -0.1, 0.5}; + std::vector expected_output_bwd = { 0.0, 0.0, 0.3, 0.4, 0.0, 0.0}; + // clang-format on + + verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd); + } + + { + Relu6OpVerifier verifier; + + // clang-format off + // std::vector input_fwd = { 7.0, 8.0, 4.0, -4.0, -5.0, 10.0}; + std::vector output_fwd = { 6.0, 6.0, 4.0, 0.0, 0.0, 6.0}; + std::vector input_bwd = {-6.1, -3.3, 7.0, 8.4, -9.2, 0.0}; + std::vector expected_output_bwd = { 0.0, 0.0, 7.0, 0.0, 0.0, 0.0}; + // clang-format on + + verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd); + } +} + +TEST(CKer_Operation, neg_ReLU6) +{ + { + Relu6OpVerifier verifier; + + // clang-format off + // std::vector input_fwd = { 0.0, 2.0, 4.0, 6.0, 8.0, 10.0}; + std::vector output_fwd = { 0.0, 2.0, 4.0, 6.0, 6.0, 6.0}; + std::vector input_bwd = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6}; + std::vector expected_output_bwd = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6}; // wrong value + // clang-format on + + verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd, false); + } + + { + Relu6OpVerifier verifier; + + // clang-format off + // std::vector input_fwd = { 0.0, 2.0, 4.0, 6.0, 8.0, 10.0}; + std::vector output_fwd = { 0.0, 2.0, 4.0, 6.0, 6.0, 6.0}; + std::vector input_bwd = { 0.1, 0.2, 0.3, 0.4}; // size mismatch + std::vector expected_output_bwd = { 0.0, 0.2, 0.3, 0.4}; + // clang-format on + + EXPECT_ANY_THROW(verifier.verifyBackward(output_fwd, input_bwd, expected_output_bwd, false)); + } +} diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc index e34cc7ceafc..a9091fb0033 100644 --- a/runtime/onert/backend/train/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc @@ -133,6 +133,10 @@ void ConvolutionLayer::backward() void ConvolutionLayer::backwardFloat32() { // Calculate gradient for activation + const IPortableTensor *backprop_act = getFusedActivationBackprop( + _activation, _output, _back_prop_output, _act_back_prop_output.get()); + + /* const IPortableTensor *backprop_act; switch (_activation) { @@ -148,7 +152,7 @@ void ConvolutionLayer::backwardFloat32() break; default: throw std::runtime_error("train ConvolutionLayer: Unsupported activation type yet"); - } + }*/ // Initialize conv params for training kernels nnfw::cker::ConvParams conv_train_params; diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc index 539a759307e..df23efd8ad2 100644 --- a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc +++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc @@ -133,6 +133,11 @@ void FullyConnectedLayer::backward() void FullyConnectedLayer::backwardFloat32() { // Calculate gradient for activation + + const IPortableTensor *backprop_act = getFusedActivationBackprop( + _activation, _output, _back_prop_output, _act_back_prop_output.get()); + + /* const IPortableTensor *backprop_act; switch (_activation) { @@ -149,6 +154,7 @@ void FullyConnectedLayer::backwardFloat32() default: throw std::runtime_error("train FullyConnectedLayer: Unsupported activation type yet"); } + */ // Initialize TransposeParams nnfw::cker::TransposeParams transpose_param; diff --git a/runtime/onert/backend/train/ops/OperationUtils.cc b/runtime/onert/backend/train/ops/OperationUtils.cc new file mode 100644 index 00000000000..acfc7ee7c01 --- /dev/null +++ b/runtime/onert/backend/train/ops/OperationUtils.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationUtils.h" + +#include +#include + +namespace onert +{ +namespace backend +{ +namespace train +{ +namespace ops +{ + +const IPortableTensor *getFusedActivationBackprop(const ir::Activation& activation, + const IPortableTensor *output, + const IPortableTensor *input_backprop, + IPortableTensor *output_backprop) +{ + const IPortableTensor* res; + + switch (activation) + { + case ir::Activation::NONE: + res = input_backprop; + break; + case ir::Activation::RELU: + nnfw::cker::train::ReLUGrad(getShape(output), getBuffer(output), + getShape(input_backprop), getBuffer(input_backprop), + getShape(output_backprop), getBuffer(output_backprop)); + res = output_backprop; + break; + case ir::Activation::RELU6: + nnfw::cker::train::ReLU6Grad(getShape(output), getBuffer(output), + getShape(input_backprop), getBuffer(input_backprop), + getShape(output_backprop), getBuffer(output_backprop)); + res = output_backprop; + break; + default: + throw std::runtime_error("Unsupported activation type yet"); + } + + return res; +} + + +} // namespace ops +} // namespace train +} // namesapce backend +} // namespace onert + + diff --git a/runtime/onert/backend/train/ops/OperationUtils.h b/runtime/onert/backend/train/ops/OperationUtils.h index 470b0fd91c9..da07c4cc9db 100644 --- a/runtime/onert/backend/train/ops/OperationUtils.h +++ b/runtime/onert/backend/train/ops/OperationUtils.h @@ -36,6 +36,27 @@ using cpu::ops::getNumberOfDimensions; using cpu::ops::getNumberOfElements; using cpu::ops::getSizeOfDimension; +/** + * @brief calcuate fused acitvation back propagation + * + * -- forward direction --> + * + * [ current layer ] ---- [next layer ] + * [ op | act ] + * + * <-- backward direction -- + * + * @param activation activation of current layer + * @param output forward direction's output of current layer + * @param input_backprop backward direction's from next layer, incoming gradient from next layer + * @param output_backprop backward direction's output of activation, outcoming gradient of + * acitvation + */ +const IPortableTensor *getFusedActivationBackprop(const ir::Activation &activation, + const IPortableTensor *output, + const IPortableTensor *input_backprop, + IPortableTensor *output_backprop); + } // namespace ops } // namespace train } // namespace backend diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc index 96b14fc81e6..b1ddf0322ae 100644 --- a/runtime/onert/backend/train/ops/PoolLayer.cc +++ b/runtime/onert/backend/train/ops/PoolLayer.cc @@ -104,6 +104,9 @@ class MaxPool2D final : public TrainingKernelRegistry assert(back_prop_out->layout() == ir::Layout::NHWC); // activation bacward + back_prop_out = + getFusedActivationBackprop(_activation, _output, back_prop_out, _act_back_prop_output.get()); + /* switch (_activation) { case ir::Activation::NONE: @@ -118,6 +121,7 @@ class MaxPool2D final : public TrainingKernelRegistry default: throw std::runtime_error("PoolLayer: Unsupported activation type yet"); } + */ // maxpool baackward auto arg_max_index = _arg_max_index.get();