diff --git a/onert-micro/onert-micro/include/pal/common/PALSubCommon.h b/onert-micro/onert-micro/include/pal/common/PALSubCommon.h index 1df3370c020..62964a51f56 100644 --- a/onert-micro/onert-micro/include/pal/common/PALSubCommon.h +++ b/onert-micro/onert-micro/include/pal/common/PALSubCommon.h @@ -25,6 +25,26 @@ namespace execute { namespace pal { + +int8_t SubFunc(int8_t x, int8_t y, const core::ArithmeticQuantParams ¶ms) +{ + const int32_t input1_val = params.input1_offset + x; + const int32_t input2_val = params.input2_offset + y; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_sum = scaled_input1_val - scaled_input2_val; + const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + return static_cast(clamped_output); +} + template OMStatus Sub(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, const T *input1_data, const T *input2_data, T *output_data) @@ -44,6 +64,16 @@ OMStatus BroadcastSub4DSlow(const core::BinaryArithmeticBroadcastParams ¶ms, return Ok; } +OMStatus BroadcastSub4DSlow(const core::ArithmeticQuantParams ¶ms, + const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, + const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, + const core::OMRuntimeShape &output_shape, int8_t *output_data) +{ + BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data, SubFunc); + return Ok; +} + } // namespace pal } // namespace execute } // namespace onert_micro diff --git a/onert-micro/onert-micro/include/pal/mcu/PALSub.h b/onert-micro/onert-micro/include/pal/mcu/PALSub.h index a3bd4b7368a..60c5fb6eac8 100644 --- a/onert-micro/onert-micro/include/pal/mcu/PALSub.h +++ b/onert-micro/onert-micro/include/pal/mcu/PALSub.h @@ -19,5 +19,24 @@ #define ONERT_MICRO_EXECUTE_PAL_SUB_H #include "PALSubCommon.h" +#include "PALUtils.h" + +namespace onert_micro +{ +namespace execute +{ +namespace pal +{ + +OMStatus Sub(const core::ArithmeticQuantParams ¶ms, const uint32_t flat_size, + const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data) +{ + ElementWise(flat_size, params, input1_data, input2_data, output_data, SubFunc); + return Ok; +} + +} // namespace pal +} // namespace execute +} // namespace onert_micro #endif // ONERT_MICRO_EXECUTE_PAL_MUL_H diff --git a/onert-micro/onert-micro/include/test_models/sub/S8SubKernel.h b/onert-micro/onert-micro/include/test_models/sub/S8SubKernel.h new file mode 100644 index 00000000000..276ba9eb1b9 --- /dev/null +++ b/onert-micro/onert-micro/include/test_models/sub/S8SubKernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TEST_MODELS_SUB_KERNEL_S8_H +#define ONERT_MICRO_TEST_MODELS_SUB_KERNEL_S8_H + +#include "TestDataSubBase.h" + +namespace onert_micro +{ +namespace test_model +{ +namespace sub_s8_no_broadcasting +{ + +/* + * Sub Kernel: + * + * Input_1(2, 2) Input_2(2, 2) + * \ / + * Sub(w/o broadcast) + * | + * Output(2, 2) + */ + +unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x44, 0x02, 0x00, 0x00, 0x60, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, + 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x10, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, + 0x0c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfe, 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x03, 0x00, 0x00, 0x00, + 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x4c, 0x00, 0x00, 0x00, + 0x7c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfe, 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x42, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; +std::vector input1_data = { + 5, + 7, + 13, + -3, +}; +std::vector input2_data = {-5, -11, 5, 5}; +std::vector reference_output_data = {10, 18, 8, -8}; +} // namespace sub_s8_no_broadcasting + +class TestDataS8Sub : public TestDataSubBase +{ +public: + explicit TestDataS8Sub(bool is_with_broadcast) : TestDataSubBase(is_with_broadcast) + { + if (is_with_broadcast) + { + std::cerr << ("Sub S8 with broadcasting not supported yet!\n"); + } + else + { + _input1_data = sub_s8_no_broadcasting::input1_data; + _input2_data = sub_s8_no_broadcasting::input2_data; + _reference_output_data = sub_s8_no_broadcasting::reference_output_data; + _test_kernel_model_circle = sub_s8_no_broadcasting::test_kernel_model_circle; + } + } + + ~TestDataS8Sub() override = default; +}; + +} // namespace test_model +} // namespace onert_micro + +#endif // ONERT_MICRO_TEST_MODELS_SUB_KERNEL_S8_H diff --git a/onert-micro/onert-micro/src/execute/kernels/Sub.cpp b/onert-micro/onert-micro/src/execute/kernels/Sub.cpp index 6e32e88f3b2..788eb6813e0 100644 --- a/onert-micro/onert-micro/src/execute/kernels/Sub.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/Sub.cpp @@ -38,6 +38,50 @@ constexpr uint32_t input1TensorIdx = 0; constexpr uint32_t input2TensorIdx = 1; constexpr uint32_t outputTensorIdx = 0; +void calculateQuantParams(core::ArithmeticQuantParams ¶ms, const circle::Tensor *input1, + const circle::Tensor *input2, const circle::Tensor *output, + circle::ActivationFunctionType act) +{ + long input1_zp; + long input2_zp; + long output_zp; + + float input1_scale; + float input2_scale; + float output_scale; + + // Read input1 quant params + readQuantParams(input1, input1_zp, input1_scale); + // Read input2 quant params + readQuantParams(input2, input2_zp, input2_scale); + // Read output quant params + readQuantParams(output, output_zp, output_scale); + + params.input1_offset = -static_cast(input1_zp); + params.input2_offset = -static_cast(input2_zp); + params.output_offset = static_cast(output_zp); + params.left_shift = (output->type() == circle::TensorType_INT16) ? 15 : 20; + const double twice_max_input_scale = + 2 * static_cast(std::max(input1_scale, input2_scale)); + const double real_input1_multiplier = static_cast(input1_scale) / twice_max_input_scale; + const double real_input2_multiplier = static_cast(input2_scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / ((1 << params.left_shift) * static_cast(output_scale)); + + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, ¶ms.input1_multiplier, + ¶ms.input1_shift); + + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, ¶ms.input2_multiplier, + ¶ms.input2_shift); + + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, ¶ms.output_multiplier, + ¶ms.output_shift); + + calculateActivationRangeQuantized(act, output_zp, output_scale, output->type(), + ¶ms.quantized_activation_min, + ¶ms.quantized_activation_max); +} + } // namespace // NOTE: doesnt currently support dynamic shapes @@ -160,6 +204,31 @@ OMStatus onert_micro::execute::execute_kernel_CircleSub(const OMExecuteArgs &exe } break; #endif // DIS_FLOAT +#ifndef DIS_QUANT + case circle::TensorType_INT8: + { + core::ArithmeticQuantParams sub_params{}; + + calculateQuantParams(sub_params, input1, input2, output, + options->fused_activation_function()); + + if (need_broadcast) + { + status = pal::BroadcastSub4DSlow( + sub_params, input1_shape, core::utils::castInputData(input1_data), input2_shape, + core::utils::castInputData(input2_data), output_shape, + core::utils::castOutputData(output_data)); + } + else + { + status = pal::Sub(sub_params, input1_shape.flatSize(), + core::utils::castInputData(input1_data), + core::utils::castInputData(input2_data), + core::utils::castOutputData(output_data)); + } + } + break; +#endif // DIF_QUANT default: { status = UnsupportedType; diff --git a/onert-micro/onert-micro/src/execute/kernels/tests/Sub.test.cpp b/onert-micro/onert-micro/src/execute/kernels/tests/Sub.test.cpp index 91d9da48a75..56b361d7f30 100644 --- a/onert-micro/onert-micro/src/execute/kernels/tests/Sub.test.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/tests/Sub.test.cpp @@ -18,6 +18,7 @@ #include "test_models/sub/FloatSubKernel.h" #include "test_models/sub/NegSubKernel.h" #include "test_models/sub/IntSubKernel.h" +#include "test_models/sub/S8SubKernel.h" namespace onert_micro { @@ -53,6 +54,18 @@ TEST_F(SubTest, INT_P) } } +TEST_F(SubTest, S8_P) +{ + // No broadcast + { + const bool is_with_broadcast = false; + test_model::TestDataS8Sub test_data_add_no_broadcasting(is_with_broadcast); + std::vector output_data_vector = + onert_micro::execute::testing::checkKernel(2, &test_data_add_no_broadcasting); + EXPECT_THAT(output_data_vector, test_data_add_no_broadcasting.get_output_data_by_index(0)); + } +} + TEST_F(SubTest, Float_P) { // No broadcast