diff --git a/onert-micro/onert-micro/include/core/OMKernelData.h b/onert-micro/onert-micro/include/core/OMKernelData.h index da08bfbec47..b0580ee28f9 100644 --- a/onert-micro/onert-micro/include/core/OMKernelData.h +++ b/onert-micro/onert-micro/include/core/OMKernelData.h @@ -103,6 +103,22 @@ struct TransposeParams int32_t perm[5]; }; +struct ArithmeticQuantParams +{ + int32_t input1_offset; + int32_t input2_offset; + int left_shift; + int32_t input1_multiplier; + int32_t input2_multiplier; + int input1_shift; + int input2_shift; + int32_t output_multiplier; + int output_shift; + int32_t output_offset; + int32_t quantized_activation_max; + int32_t quantized_activation_min; +}; + struct BinaryArithmeticBroadcastParams { // float activation params. diff --git a/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst b/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst index 049a77a140e..1e4fd4ec832 100644 --- a/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst +++ b/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst @@ -1,5 +1,5 @@ #/*REGISTER_KERNEL(ABS, Abs)*/ -#/*REGISTER_KERNEL(ADD, Add)*/ +REGISTER_KERNEL(ADD, Add) #/*REGISTER_KERNEL(ADD_N, AddN)*/ #/*REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)*/ #/*REGISTER_KERNEL(ARG_MAX, ArgMax)*/ diff --git a/onert-micro/onert-micro/include/pal/cmsisnn/PALAdd.h b/onert-micro/onert-micro/include/pal/cmsisnn/PALAdd.h new file mode 100644 index 00000000000..3a3fdf574bb --- /dev/null +++ b/onert-micro/onert-micro/include/pal/cmsisnn/PALAdd.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_EXECUTE_PAL_ADD_H +#define ONERT_MICRO_EXECUTE_PAL_ADD_H + +#include "PALAddCommon.h" +#include "PALUtils.h" + +#include "arm_nnfunctions.h" + +namespace onert_micro +{ +namespace execute +{ +namespace pal +{ + +OMStatus Add(const core::ArithmeticQuantParams ¶ms, const uint32_t flat_size, + const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data) +{ + auto status = arm_elementwise_add_s8( + input1_data, input2_data, params.input1_offset, params.input1_multiplier, params.input1_shift, + params.input2_offset, params.input2_multiplier, params.input2_shift, params.left_shift, + output_data, params.output_offset, params.output_multiplier, params.output_shift, + params.quantized_activation_min, params.quantized_activation_max, flat_size); + + assert(status == ARM_CMSIS_NN_SUCCESS); + if (status != ARM_CMSIS_NN_SUCCESS) + return UnknownError; + + return Ok; +} + +} // namespace pal +} // namespace execute +} // namespace onert_micro + +#endif // ONERT_MICRO_EXECUTE_PAL_ADD_H diff --git a/onert-micro/onert-micro/include/pal/common/PALAddCommon.h b/onert-micro/onert-micro/include/pal/common/PALAddCommon.h index 6be8699f28f..fb65c0e461d 100644 --- a/onert-micro/onert-micro/include/pal/common/PALAddCommon.h +++ b/onert-micro/onert-micro/include/pal/common/PALAddCommon.h @@ -27,6 +27,25 @@ namespace execute namespace pal { +int8_t AddFunc(int8_t x, int8_t y, const core::ArithmeticQuantParams ¶ms) +{ + const int32_t input1_val = params.input1_offset + x; + const int32_t input2_val = params.input2_offset + y; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + return static_cast(clamped_output); +} + template OMStatus Add(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, const T *input1_data, const T *input2_data, T *output_data) @@ -46,6 +65,16 @@ OMStatus BroadcastAdd4DSlow(const core::BinaryArithmeticBroadcastParams ¶ms, return Ok; } +OMStatus BroadcastAdd4DSlow(const core::ArithmeticQuantParams ¶ms, + const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, + const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, + const core::OMRuntimeShape &output_shape, int8_t *output_data) +{ + BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data, AddFunc); + return Ok; +} + } // namespace pal } // namespace execute } // namespace onert_micro diff --git a/onert-micro/onert-micro/include/pal/common/PALArithmeticOpCommon.h b/onert-micro/onert-micro/include/pal/common/PALArithmeticOpCommon.h index 1ee592d5e44..608cbd7d980 100644 --- a/onert-micro/onert-micro/include/pal/common/PALArithmeticOpCommon.h +++ b/onert-micro/onert-micro/include/pal/common/PALArithmeticOpCommon.h @@ -65,6 +65,17 @@ OMStatus ArithmeticOp(const core::BinaryArithmeticBroadcastParams ¶ms, const return Ok; } +template +void ElementWise(const uint32_t size, const core::ArithmeticQuantParams ¶ms, + const T *input1_data, const T *input2_data, T *output_data, + T (*binary_func)(T, T, const core::ArithmeticQuantParams &)) +{ + for (int i = 0; i < size; ++i) + { + output_data[i] = binary_func(input1_data[i], input2_data[i], params); + } +} + template inline void ArithmeticOpScalar(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, const T *input_data, const T scalar_value, @@ -130,6 +141,126 @@ OMStatus BroadcastArithmeticOp4DSlow(const core::BinaryArithmeticBroadcastParams return Ok; } +template +void BroadcastInput1(int size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, + const T *input2_data, T *output_data, + T (*binary_func)(T, T, const core::ArithmeticQuantParams &)) +{ + for (int i = 0; i < size; ++i) + { + output_data[i] = binary_func(input1_data[0], input2_data[i], params); + } +} + +template +void BroadcastInput2(int size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, + const T *input2_data, T *output_data, + T (*binary_func)(T, T, const core::ArithmeticQuantParams &)) +{ + for (int i = 0; i < size; ++i) + { + output_data[i] = binary_func(input1_data[i], input2_data[0], params); + } +} + +template +void BroadcastRecursiveDimensions(const core::ArithmeticQuantParams ¶ms, int dimension, + size_t *input1_offset_p, size_t *input2_offset_p, + size_t *output_offset, size_t *compressed_input1_stride, + size_t *compressed_input2_stride, size_t *compressed_output_shape, + const T *input1_data, const T *input2_data, T *output_data, + T (*binary_func)(T, T, const core::ArithmeticQuantParams &)) +{ + if (dimension > 0) + { + for (size_t c = 0; c < compressed_output_shape[dimension]; ++c) + { + size_t input1_offset_c = *input1_offset_p; + size_t input2_offset_c = *input2_offset_p; + BroadcastRecursiveDimensions(params, dimension - 1, &input1_offset_c, &input2_offset_c, + output_offset, compressed_input1_stride, + compressed_input2_stride, compressed_output_shape, input1_data, + input2_data, output_data, binary_func); + *input1_offset_p += compressed_input1_stride[dimension]; + *input2_offset_p += compressed_input2_stride[dimension]; + } + } + else + { + assert(dimension == 0); + bool input1_is_broadcast = compressed_input1_stride[dimension] == 0; + bool input2_is_broadcast = compressed_input2_stride[dimension] == 0; + assert(!(input1_is_broadcast && input2_is_broadcast)); + const T *input1_data_ptr = input1_data + *input1_offset_p; + const T *input2_data_ptr = input2_data + *input2_offset_p; + T *output_data_ptr = output_data + *output_offset; + if (input1_is_broadcast) + { + // input1 is broadcast. + BroadcastInput1(compressed_output_shape[dimension], params, input1_data_ptr, + input2_data_ptr, output_data_ptr, binary_func); + *input2_offset_p += compressed_output_shape[dimension]; + } + else if (input2_is_broadcast) + { + // input2 is broadcast. + BroadcastInput2(compressed_output_shape[dimension], params, input1_data_ptr, + input2_data_ptr, output_data_ptr, binary_func); + *input1_offset_p += compressed_output_shape[dimension]; + } + else + { + // Add element-wise. + ElementWise(compressed_output_shape[dimension], params, input1_data_ptr, input2_data_ptr, + output_data_ptr, binary_func); + *input1_offset_p += compressed_output_shape[dimension]; + *input2_offset_p += compressed_output_shape[dimension]; + } + *output_offset += compressed_output_shape[dimension]; + } +} + +template +void BroadcastBinaryFunction6DSlow(const core::ArithmeticQuantParams ¶ms, + const core::OMRuntimeShape &input1_shape, const T *input1_data, + const core::OMRuntimeShape &input2_shape, const T *input2_data, + const core::OMRuntimeShape &output_shape, T *output_data, + T (*binary_func)(T, T, const core::ArithmeticQuantParams &)) +{ + constexpr int kMaxBroadcastDim = 6; + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + size_t compressed_input1_stride[kMaxBroadcastDim]; + size_t compressed_input2_stride[kMaxBroadcastDim]; + size_t compressed_output_shape[kMaxBroadcastDim]; + bool broadcastable_shape = ReduceDimensionsForBroadcast( + input1_shape, input2_shape, compressed_input1_stride, compressed_input2_stride, + compressed_output_shape); + // Skip broadcasting for degenerate shapes. + if (!broadcastable_shape) + { + return; + } + + size_t input1_offset = 0; + size_t input2_offset = 0; + size_t output_offset = 0; + BroadcastRecursiveDimensions(params, kMaxBroadcastDim - 1, &input1_offset, &input2_offset, + &output_offset, compressed_input1_stride, compressed_input2_stride, + compressed_output_shape, input1_data, input2_data, output_data, + binary_func); +} + } // namespace pal } // namespace execute } // namespace onert_micro diff --git a/onert-micro/onert-micro/include/pal/common/PALUtils.h b/onert-micro/onert-micro/include/pal/common/PALUtils.h index d48e3cd54c7..50866949cb7 100644 --- a/onert-micro/onert-micro/include/pal/common/PALUtils.h +++ b/onert-micro/onert-micro/include/pal/common/PALUtils.h @@ -212,6 +212,143 @@ inline T activationFunctionWithMinMax(T x, T output_activation_min, T output_act return min(max(x, output_activation_min), output_activation_max); } +// Reduces and compresses dimensions so that broadcast handling becomes more +// efficient. Returns true if the output shape is broadcastable; it doesn't +// contain any degenerate dimension, i.e. shape dimension = 0. False otherwise. +template +bool ReduceDimensionsForBroadcast(const core::OMRuntimeShape &input1_shape, + const core::OMRuntimeShape &input2_shape, + size_t *compressed_input1_stride, + size_t *compressed_input2_stride, size_t *compressed_output_shape) +{ + size_t num_compressed_dims = 0; + size_t compressed_input1_shape[MAX_DIM]; + size_t compressed_input2_shape[MAX_DIM]; + std::fill(compressed_input1_shape, compressed_input1_shape + MAX_DIM, 1); + std::fill(compressed_input2_shape, compressed_input2_shape + MAX_DIM, 1); + std::fill(compressed_output_shape, compressed_output_shape + MAX_DIM, 1); + bool broadcast_input1 = false; + bool broadcast_input2 = false; + bool first_nonunit = true; + const size_t num_input1_dims = input1_shape.dimensionsCount(); + const size_t num_input2_dims = input2_shape.dimensionsCount(); + const int32_t *input1_dims = input1_shape.dimsData(); + const int32_t *input2_dims = input2_shape.dimsData(); + const size_t num_common_dims = std::min(num_input1_dims, num_input2_dims); + for (size_t i = 1; i <= num_common_dims; i++) + { + const size_t input1_dim = input1_dims[num_input1_dims - i]; + const size_t input2_dim = input2_dims[num_input2_dims - i]; + if (input1_dim == 0 || input2_dim == 0) + { + return false; + } + if (input1_dim == 1 && input2_dim == 1) + { + continue; + } + assert(!broadcast_input1 || !broadcast_input2); + + if (input1_dim == 1) + { + if (!broadcast_input1) + { + broadcast_input1 = true; + broadcast_input2 = false; + num_compressed_dims++; + } + compressed_input2_shape[num_compressed_dims - 1] *= input2_dim; + compressed_output_shape[num_compressed_dims - 1] *= input2_dim; + } + else if (input2_dim == 1) + { + if (!broadcast_input2) + { + broadcast_input1 = false; + broadcast_input2 = true; + num_compressed_dims++; + } + compressed_input1_shape[num_compressed_dims - 1] *= input1_dim; + compressed_output_shape[num_compressed_dims - 1] *= input1_dim; + } + else + { + assert(input1_dim == input2_dim); + if (broadcast_input1 || broadcast_input2 || first_nonunit) + { + broadcast_input1 = false; + broadcast_input2 = false; + num_compressed_dims++; + } + compressed_input1_shape[num_compressed_dims - 1] *= input1_dim; + compressed_input2_shape[num_compressed_dims - 1] *= input1_dim; + compressed_output_shape[num_compressed_dims - 1] *= input1_dim; + } + first_nonunit = false; + } + if (num_input1_dims > num_input2_dims) + { + if (!broadcast_input2) + { + num_compressed_dims++; + } + for (size_t i = 0; i < num_input1_dims - num_input2_dims; i++) + { + const size_t input1_dim = input1_dims[i]; + if (input1_dim == 0) + { + return false; + } + compressed_input1_shape[num_compressed_dims - 1] *= input1_dim; + compressed_output_shape[num_compressed_dims - 1] *= input1_dim; + } + } + else if (num_input2_dims > num_input1_dims) + { + if (!broadcast_input1) + { + num_compressed_dims++; + } + for (size_t i = 0; i < num_input2_dims - num_input1_dims; i++) + { + const size_t input2_dim = input2_dims[i]; + if (input2_dim == 0) + { + return false; + } + compressed_input2_shape[num_compressed_dims - 1] *= input2_dim; + compressed_output_shape[num_compressed_dims - 1] *= input2_dim; + } + } + num_compressed_dims = (num_compressed_dims > 1) ? num_compressed_dims : 1; + + int input1_stride = 1; + int input2_stride = 1; + for (int i = 0; i < MAX_DIM; ++i) + { + compressed_input1_stride[i] = input1_stride; + input1_stride *= compressed_input1_shape[i]; + compressed_input2_stride[i] = input2_stride; + input2_stride *= compressed_input2_shape[i]; + } + for (int i = 0; i < MAX_DIM; ++i) + { + if (compressed_input1_shape[i] != compressed_input2_shape[i]) + { + if (compressed_input1_shape[i] == 1) + { + compressed_input1_stride[i] = 0; + } + else + { + assert(compressed_input2_shape[i] == 1); + compressed_input2_stride[i] = 0; + } + } + } + return true; +} + } // namespace pal } // namespace execute } // namespace onert_micro diff --git a/onert-micro/onert-micro/include/pal/mcu/PALAdd.h b/onert-micro/onert-micro/include/pal/mcu/PALAdd.h index 449daa30278..3f13e784ac5 100644 --- a/onert-micro/onert-micro/include/pal/mcu/PALAdd.h +++ b/onert-micro/onert-micro/include/pal/mcu/PALAdd.h @@ -19,6 +19,7 @@ #define ONERT_MICRO_EXECUTE_PAL_ADD_H #include "PALAddCommon.h" +#include "PALUtils.h" namespace onert_micro { @@ -26,18 +27,12 @@ namespace execute { namespace pal { -template <> -OMStatus Add(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, - const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data) -{ - assert(false && "Not IMPL yet"); -} -template <> -OMStatus Add(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, - const int16_t *input1_data, const int16_t *input2_data, int16_t *output_data) +OMStatus Add(const core::ArithmeticQuantParams ¶ms, const uint32_t flat_size, + const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data) { - assert(false && "Not IMPL yet"); + ElementWise(flat_size, params, input1_data, input2_data, output_data, AddFunc); + return Ok; } } // namespace pal diff --git a/onert-micro/onert-micro/include/test_models/add/NegAddKernel.h b/onert-micro/onert-micro/include/test_models/add/NegAddKernel.h index 10aa81374fc..3d8476b1d59 100644 --- a/onert-micro/onert-micro/include/test_models/add/NegAddKernel.h +++ b/onert-micro/onert-micro/include/test_models/add/NegAddKernel.h @@ -67,6 +67,61 @@ const unsigned char test_kernel_model_circle[] = { } // namespace neg_add_input_type_mismatch +namespace neg_add_no_scale_output +{ +/* + * Quantize Add Kernel with output without scale: + * + * Input_1(1, 4, 4, 3) - Int8 Input_2(1, 4, 4, 1) - Int8 + * \ / + * Add(with broadcast) + * | + * Output(1, 4, 4, 3) - no scale and zero_point + */ +const unsigned char test_kernel_model_circle[] = { + 0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x30, 0x02, 0x00, 0x00, 0x3c, 0x02, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xe4, 0xfd, 0xff, 0xff, 0xe8, 0xfd, 0xff, 0xff, 0xec, 0xfd, 0xff, 0xff, + 0xf0, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, + 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x5c, 0xfe, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, + 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x2a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x3c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00, + 0x1c, 0xff, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x03, 0x00, 0x00, 0x00, + 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x4c, 0x00, 0x00, 0x00, + 0x7c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x39, 0xb4, 0x3c, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xf4, 0xfd, 0x54, 0x3c, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +} // namespace neg_add_no_scale_output + namespace neg_add_no_quant_params { /* @@ -126,6 +181,22 @@ class NegTestDataInputMismatchAddKernel : public NegTestDataBase const unsigned char *_test_kernel_model_circle; }; +class NegTestQuantAddNoScaleKernel : public NegTestDataBase +{ +public: + NegTestQuantAddNoScaleKernel() + { + _test_kernel_model_circle = neg_add_no_scale_output::test_kernel_model_circle; + } + + ~NegTestQuantAddNoScaleKernel() override = default; + + const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; } + +protected: + const unsigned char *_test_kernel_model_circle; +}; + class NegTestDataNoQuantParamsS16AddKernel : public NegTestDataBase { public: diff --git a/onert-micro/onert-micro/include/test_models/add/QuantAddKernel.h b/onert-micro/onert-micro/include/test_models/add/QuantAddKernel.h new file mode 100644 index 00000000000..d93886ddb19 --- /dev/null +++ b/onert-micro/onert-micro/include/test_models/add/QuantAddKernel.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TEST_MODELS_ADD_KERNEL_QUANT_H +#define ONERT_MICRO_TEST_MODELS_ADD_KERNEL_QUANT_H + +#include "TestDataAddBase.h" + +namespace onert_micro +{ +namespace test_model +{ +namespace add_int8_no_broadcasting +{ + +/* + * Add Kernel: + * + * Input_1(1, 4, 4, 3) Input_2(1, 4, 4, 3) + * \ / + * Add(no broadcast) + * | + * Output(1, 4, 4, 3) + */ +const unsigned char test_kernel_model_circle[] = { + 0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x40, 0x02, 0x00, 0x00, 0x4c, 0x02, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xd4, 0xfd, 0xff, 0xff, 0xd8, 0xfd, 0xff, 0xff, 0xdc, 0xfd, 0xff, 0xff, + 0xe0, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, + 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x4c, 0xfe, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, + 0x0c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x6f, 0x12, 0x83, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x03, 0x00, 0x00, 0x00, + 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x4c, 0x00, 0x00, 0x00, + 0x7c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x39, 0xb4, 0x3c, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xf4, 0xfd, 0x54, 0x3c, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +const std::vector input1_data = { + 0, 7, -3, 7, -9, 2, -5, 0, 9, -8, 1, 4, -2, -6, -2, -7, -10, -10, -8, 1, -8, 0, 2, -5, + 7, -10, -7, 9, 4, 7, -1, -1, -9, -9, 2, -9, 8, 5, 1, -3, 4, 7, 0, -7, 1, 4, 7, 0}; +const std::vector input2_data = { + 3, -6, 5, -6, 3, -7, 3, -7, 5, 5, -9, 8, -7, -9, -1, -5, -4, 7, 2, -3, 2, 7, -3, 0, + 2, -9, 4, 7, 2, 1, 7, 3, -5, -3, 7, 9, 7, 1, 8, 3, -6, -6, -3, 8, -7, -7, 4, 1}; +const std::vector reference_output_data = { + 66, -41, 71, -41, -51, -128, 1, -128, 127, 6, -128, 127, -128, -128, -48, -128, + -128, 24, -60, -53, -60, 127, -40, -65, 127, -128, -3, 127, 96, 113, 127, 53, + -128, -128, 127, 81, 127, 87, 127, 27, -80, -41, -66, 85, -128, -102, 127, 22}; + +} // namespace add_int8_no_broadcasting + +namespace add_int8_with_broadcasting +{ +/* + * Add Kernel: + * + * Input_1(1, 4, 4, 3) Input_2(1, 4, 4, 3) + * \ / + * Add(with broadcast) + * | + * Output(1, 4, 4, 3) + */ +const unsigned char test_kernel_model_circle[] = { + 0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x40, 0x02, 0x00, 0x00, 0x4c, 0x02, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xd4, 0xfd, 0xff, 0xff, 0xd8, 0xfd, 0xff, 0xff, 0xdc, 0xfd, 0xff, 0xff, + 0xe0, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, + 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x4c, 0xfe, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, + 0x0c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x6f, 0x12, 0x83, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x03, 0x00, 0x00, 0x00, + 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x4c, 0x00, 0x00, 0x00, + 0x7c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x39, 0xb4, 0x3c, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xf4, 0xfd, 0x54, 0x3c, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +std::vector input1_data = {5, 0, 2, -9, -3, 2, 1, -1, 8, 5, -5, 2, -4, 9, 7, 9, + 4, -8, -3, 4, -5, 0, 4, -4, -2, -9, -8, 4, -5, -9, -10, 4, + -8, -1, 1, 1, 4, -3, -1, -7, -10, 5, 5, 3, -9, 9, 0, 8}; +std::vector input2_data = {-8, 0, 0, -8, -4, 8, 1, -2, -5, -1, 6, -9, -6, -1, 6, -10}; +std::vector reference_output_data = { + -111, -128, -128, -117, -39, 26, 13, -13, 104, -111, -128, -128, -128, 29, 3, 127, + 127, 72, -17, 74, -43, -44, 8, -96, -128, -128, -128, 30, -87, -128, 2, 127, + 28, -128, -128, -128, -80, -128, -128, -113, -128, 43, 127, 127, 15, -103, -128, -116}; + +} // namespace add_int8_with_broadcasting + +class TestDataInt8Add : public TestDataAddBase +{ +public: + explicit TestDataInt8Add(bool is_with_broadcast) : TestDataAddBase(is_with_broadcast) + { + if (is_with_broadcast) + { + _input1_data = add_int8_with_broadcasting::input1_data; + _input2_data = add_int8_with_broadcasting::input2_data; + _reference_output_data = add_int8_with_broadcasting::reference_output_data; + _test_add_kernel_model_circle = add_int8_with_broadcasting::test_kernel_model_circle; + } + else + { + _input1_data = add_int8_no_broadcasting::input1_data; + _input2_data = add_int8_no_broadcasting::input2_data; + _reference_output_data = add_int8_no_broadcasting::reference_output_data; + _test_add_kernel_model_circle = add_int8_no_broadcasting::test_kernel_model_circle; + } + } + + ~TestDataInt8Add() override = default; +}; + +} // namespace test_model +} // namespace onert_micro + +#endif // ONERT_MICRO_TEST_MODELS_ADD_KERNEL_QUANT_H diff --git a/onert-micro/onert-micro/src/execute/kernels/Add.cpp b/onert-micro/onert-micro/src/execute/kernels/Add.cpp index 44b4c67daea..cebe900f876 100644 --- a/onert-micro/onert-micro/src/execute/kernels/Add.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/Add.cpp @@ -33,6 +33,62 @@ constexpr uint32_t input1TensorIdx = 0; constexpr uint32_t input2TensorIdx = 1; constexpr uint32_t outputTensorIdx = 0; +void calculateQuantParams(core::ArithmeticQuantParams ¶ms, const circle::Tensor *input1, + const circle::Tensor *input2, const circle::Tensor *output, + circle::ActivationFunctionType act) +{ + assert(input1->quantization() != nullptr); // Fix caller + assert(input2->quantization() != nullptr); // Fix caller + assert(output->quantization() != nullptr); // Fix caller + + assert(input1->quantization()->scale() != nullptr and + input1->quantization()->scale()->size() == 1); // Fix caller + assert(input2->quantization()->scale() != nullptr and + input2->quantization()->scale()->size() == 1); // Fix caller + assert(output->quantization()->scale() != nullptr and + output->quantization()->scale()->size() == 1); // Fix caller + + assert(input1->quantization()->zero_point() != nullptr and + input1->quantization()->zero_point()->size() == 1); // Fix caller + assert(input2->quantization()->zero_point() != nullptr and + input2->quantization()->zero_point()->size() == 1); // Fix caller + assert(output->quantization()->zero_point() != nullptr and + output->quantization()->zero_point()->size() == 1); // Fix caller + + // 8bit -> 8bit general quantized path, with general rescalings + const auto input1_zp = input1->quantization()->zero_point()->operator[](0); + const auto input2_zp = input2->quantization()->zero_point()->operator[](0); + const auto output_zp = output->quantization()->zero_point()->operator[](0); + + const auto input1_scale = input1->quantization()->scale()->operator[](0); + const auto input2_scale = input2->quantization()->scale()->operator[](0); + const auto output_scale = output->quantization()->scale()->operator[](0); + + params.input1_offset = -static_cast(input1_zp); + params.input2_offset = -static_cast(input2_zp); + params.output_offset = static_cast(output_zp); + params.left_shift = (output->type() == circle::TensorType_INT16) ? 15 : 20; + const double twice_max_input_scale = + 2 * static_cast(std::max(input1_scale, input2_scale)); + const double real_input1_multiplier = static_cast(input1_scale) / twice_max_input_scale; + const double real_input2_multiplier = static_cast(input2_scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / ((1 << params.left_shift) * static_cast(output_scale)); + + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, ¶ms.input1_multiplier, + ¶ms.input1_shift); + + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, ¶ms.input2_multiplier, + ¶ms.input2_shift); + + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, ¶ms.output_multiplier, + ¶ms.output_shift); + + calculateActivationRangeQuantized(act, output_zp, output_scale, output->type(), + ¶ms.quantized_activation_min, + ¶ms.quantized_activation_max); +} + } // namespace // NOTE: doesnt currently support dynamic shapes @@ -100,7 +156,6 @@ OMStatus onert_micro::execute::execute_kernel_CircleAdd(const OMExecuteArgs &exe // Check broadcast property core::BinaryArithmeticBroadcastParams params{}; const bool need_broadcast = pal::processBroadcastShapes(input1_shape, input2_shape, ¶ms); - switch (input1->type()) { #ifndef DIS_FLOAT @@ -124,6 +179,7 @@ OMStatus onert_micro::execute::execute_kernel_CircleAdd(const OMExecuteArgs &exe } } break; +#endif // DIS_FLOAT case circle::TensorType_INT64: { execute::calculateActivationRange(options->fused_activation_function(), @@ -166,7 +222,31 @@ OMStatus onert_micro::execute::execute_kernel_CircleAdd(const OMExecuteArgs &exe } } break; -#endif // DIS_FLOAT +#ifndef DIS_QUANT + case circle::TensorType_INT8: + { + core::ArithmeticQuantParams add_params{}; + + calculateQuantParams(add_params, input1, input2, output, + options->fused_activation_function()); + + if (need_broadcast) + { + status = pal::BroadcastAdd4DSlow( + add_params, input1_shape, core::utils::castInputData(input1_data), input2_shape, + core::utils::castInputData(input2_data), output_shape, + core::utils::castOutputData(output_data)); + } + else + { + status = pal::Add(add_params, input1_shape.flatSize(), + core::utils::castInputData(input1_data), + core::utils::castInputData(input2_data), + core::utils::castOutputData(output_data)); + } + } + break; +#endif // DIF_QUANT default: { status = UnsupportedType; diff --git a/onert-micro/onert-micro/src/execute/kernels/tests/Add.test.cpp b/onert-micro/onert-micro/src/execute/kernels/tests/Add.test.cpp index 7927548ebd7..c152c9e2507 100644 --- a/onert-micro/onert-micro/src/execute/kernels/tests/Add.test.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/tests/Add.test.cpp @@ -18,6 +18,7 @@ #include "test_models/add/FloatAddKernel.h" #include "test_models/add/NegAddKernel.h" #include "test_models/add/IntAddKernel.h" +#include "test_models/add/QuantAddKernel.h" namespace onert_micro { @@ -97,6 +98,26 @@ TEST_F(AddTest, Float_P) } } +TEST_F(AddTest, INT8_P) +{ + // No broadcast + { + const bool is_with_broadcast = false; + test_model::TestDataInt8Add test_data_add_no_broadcasting(is_with_broadcast); + std::vector output_data_vector = + onert_micro::execute::testing::checkKernel(2, &test_data_add_no_broadcasting); + EXPECT_THAT(output_data_vector, test_data_add_no_broadcasting.get_output_data_by_index(0)); + } + // With broadcast + { + const bool is_with_broadcast = true; + test_model::TestDataInt8Add test_data_add_with_broadcasting(is_with_broadcast); + std::vector output_data_vector = + onert_micro::execute::testing::checkKernel(2, &test_data_add_with_broadcasting); + EXPECT_THAT(output_data_vector, test_data_add_with_broadcasting.get_output_data_by_index(0)); + } +} + TEST_F(AddTest, Input_output_type_mismatch_NEG) { onert_micro::test_model::NegTestDataInputMismatchAddKernel test_data_kernel; @@ -111,6 +132,13 @@ TEST_F(AddTest, No_quant_params_NEG) EXPECT_DEATH(checkNEGSISOKernel(&test_data_kernel), ""); } +TEST_F(AddTest, No_output_scale_param_NEG) +{ + onert_micro::test_model::NegTestQuantAddNoScaleKernel test_data_kernel; + + EXPECT_DEATH(checkNEGSISOKernel(&test_data_kernel), ""); +} + } // namespace testing } // namespace execute } // namespace onert_micro diff --git a/onert-micro/onert-micro/src/import/kernels/Add.cpp b/onert-micro/onert-micro/src/import/kernels/Add.cpp index d10d91b6ce7..52ee337a788 100644 --- a/onert-micro/onert-micro/src/import/kernels/Add.cpp +++ b/onert-micro/onert-micro/src/import/kernels/Add.cpp @@ -69,10 +69,28 @@ OMStatus onert_micro::import::configure_kernel_CircleAdd(const OMConfigureArgs & return NoQuantization; } - if (input1->quantization()->scale()->size() != 1 or - input2->quantization()->scale()->size() != 1 or output->quantization()->scale()->size() != 1) + if (input1->quantization()->scale() == nullptr or + input1->quantization()->zero_point() == nullptr or + input1->quantization()->scale()->size() != 1 or + input1->quantization()->zero_point()->size() != 1) { - return UnsupportedType; + return NoQuantization; + } + + if (input2->quantization()->scale() == nullptr or + input2->quantization()->zero_point() == nullptr or + input2->quantization()->scale()->size() != 1 or + input2->quantization()->zero_point()->size() != 1) + { + return NoQuantization; + } + + if (output->quantization()->scale() == nullptr or + output->quantization()->zero_point() == nullptr or + output->quantization()->scale()->size() != 1 or + output->quantization()->zero_point()->size() != 1) + { + return NoQuantization; } return status;