-
Notifications
You must be signed in to change notification settings - Fork 159
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[onert-micro] Support S8 Conv2D (#13185)
This pr adds supporting of s8 + cmsis_nn. ONE-DCO-1.0-Signed-off-by: Artem Balyshev <[email protected]>
- Loading branch information
1 parent
be96c99
commit ff12cae
Showing
15 changed files
with
721 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
onert-micro/onert-micro/include/pal/cmsisnn/PALConv2D.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/* | ||
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* Copyright 2017 The TensorFlow Authors. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef ONERT_MICRO_EXECUTE_PAL_CONV_2D_H | ||
#define ONERT_MICRO_EXECUTE_PAL_CONV_2D_H | ||
|
||
#include "PALConv2DCommon.h" | ||
#include "core/OMKernelData.h" | ||
#include "core/OMRuntimeShape.h" | ||
#include "PALUtils.h" | ||
|
||
#include <arm_nnfunctions.h> | ||
|
||
namespace onert_micro | ||
{ | ||
namespace execute | ||
{ | ||
namespace pal | ||
{ | ||
|
||
// Fixed-point per-channel-quantization convolution reference kernel. | ||
OMStatus ConvPerChannel(const core::ConvQuant ¶ms, const core::OMRuntimeShape &input_shape, | ||
const int8_t *input_data, const core::OMRuntimeShape &filter_shape, | ||
const int8_t *filter_data, const int32_t *bias_data, | ||
const core::OMRuntimeShape &output_shape, int8_t *output_data) | ||
{ | ||
cmsis_nn_conv_params conv_params; | ||
conv_params.dilation.h = params.dilation_height_factor; | ||
conv_params.dilation.w = params.dilation_width_factor; | ||
|
||
assert(conv_params.dilation.h == 1); | ||
assert(conv_params.dilation.w == 1); | ||
|
||
conv_params.input_offset = params.input_offset; | ||
conv_params.output_offset = params.output_offset; | ||
conv_params.stride.h = params.stride_height; | ||
conv_params.stride.w = params.stride_width; | ||
conv_params.padding.h = params.pad_h; | ||
conv_params.padding.w = params.pad_w; | ||
conv_params.activation.min = params.quantized_activation_min; | ||
conv_params.activation.max = params.quantized_activation_max; | ||
|
||
cmsis_nn_per_channel_quant_params quant_params; | ||
quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data()); | ||
quant_params.shift = const_cast<int32_t *>( | ||
reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data())); | ||
|
||
assert(conv_params.activation.min <= conv_params.activation.max); | ||
const int batch_size = input_shape.dims(0); | ||
const int input_depth = input_shape.dims(3); | ||
const int output_depth = filter_shape.dims(0); | ||
|
||
cmsis_nn_dims input_dims; | ||
input_dims.n = batch_size; | ||
input_dims.h = input_shape.dims(1); | ||
input_dims.w = input_shape.dims(2); | ||
input_dims.c = input_depth; | ||
|
||
cmsis_nn_dims filter_dims; | ||
filter_dims.n = output_depth; | ||
filter_dims.h = filter_shape.dims(1); | ||
filter_dims.w = filter_shape.dims(2); | ||
filter_dims.c = input_depth; | ||
|
||
cmsis_nn_dims bias_dims; | ||
bias_dims.n = 1; | ||
bias_dims.h = 1; | ||
bias_dims.w = 1; | ||
bias_dims.c = output_depth; | ||
|
||
cmsis_nn_dims output_dims; | ||
output_dims.n = batch_size; | ||
output_dims.h = output_shape.dims(1); | ||
output_dims.w = output_shape.dims(2); | ||
output_dims.c = output_depth; | ||
|
||
auto buf_size = | ||
arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims); | ||
|
||
auto buffer = std::make_unique<int8_t[]>(buf_size); | ||
assert(buffer != nullptr); | ||
|
||
cmsis_nn_context ctx; | ||
ctx.buf = buffer.get(); | ||
ctx.size = buf_size; | ||
|
||
auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data, | ||
&filter_dims, filter_data, &bias_dims, bias_data, &output_dims, | ||
output_data); | ||
|
||
assert(res == ARM_CMSIS_NN_SUCCESS); | ||
if (res != ARM_CMSIS_NN_SUCCESS) | ||
return CmsisNNError; | ||
return Ok; | ||
} | ||
|
||
} // namespace pal | ||
} // namespace execute | ||
} // namespace onert_micro | ||
|
||
#endif // ONERT_MICRO_EXECUTE_PAL_CONV_2D_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
/* | ||
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* Copyright 2017 The TensorFlow Authors. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef ONERT_MICRO_EXECUTE_PAL_CONV_2D_H | ||
#define ONERT_MICRO_EXECUTE_PAL_CONV_2D_H | ||
|
||
#include "PALConv2DCommon.h" | ||
#include "core/OMKernelData.h" | ||
#include "core/OMRuntimeShape.h" | ||
#include "PALUtils.h" | ||
|
||
namespace onert_micro | ||
{ | ||
namespace execute | ||
{ | ||
namespace pal | ||
{ | ||
|
||
// Fixed-point per-channel-quantization convolution reference kernel. | ||
OMStatus ConvPerChannel(const core::ConvQuant ¶ms, const core::OMRuntimeShape &input_shape, | ||
const int8_t *input_data, const core::OMRuntimeShape &filter_shape, | ||
const int8_t *filter_data, const int32_t *bias_data, | ||
const core::OMRuntimeShape &output_shape, int8_t *output_data) | ||
{ | ||
// Get parameters. | ||
const int32_t input_offset = params.input_offset; // r = s(q - Z) | ||
const int stride_width = params.stride_width; | ||
const int stride_height = params.stride_height; | ||
const int dilation_width_factor = params.dilation_width_factor; | ||
const int dilation_height_factor = params.dilation_height_factor; | ||
const int pad_width = params.pad_w; | ||
const int pad_height = params.pad_h; | ||
const int32_t output_offset = params.output_offset; | ||
|
||
const std::vector<int32_t> &output_multiplier = params.per_channel_output_multiplier; | ||
const std::vector<int32_t> &output_shift = params.per_channel_output_shift; | ||
|
||
// Set min and max value of the output. | ||
const int32_t output_activation_min = params.quantized_activation_min; | ||
const int32_t output_activation_max = params.quantized_activation_max; | ||
|
||
// Consistency check. | ||
assert(output_activation_max >= output_activation_min); | ||
assert(input_shape.dimensionsCount() == 4); | ||
assert(filter_shape.dimensionsCount() == 4); | ||
assert(output_shape.dimensionsCount() == 4); | ||
|
||
const int batches = MatchingDim(input_shape, 0, output_shape, 0); | ||
const int input_depth = input_shape.dims(3); | ||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); | ||
|
||
// Check dimensions of the tensors. | ||
const int input_height = input_shape.dims(1); | ||
const int input_width = input_shape.dims(2); | ||
const int filter_height = filter_shape.dims(1); | ||
const int filter_width = filter_shape.dims(2); | ||
const int filter_input_depth = filter_shape.dims(3); | ||
const int groups = input_depth / filter_input_depth; | ||
assert(groups != 0); | ||
assert(input_depth % filter_input_depth == 0); | ||
const int filters_per_group = output_depth / groups; | ||
assert(filters_per_group != 0); | ||
const int output_height = output_shape.dims(1); | ||
const int output_width = output_shape.dims(2); | ||
for (int batch = 0; batch < batches; ++batch) | ||
{ | ||
for (int out_y = 0; out_y < output_height; ++out_y) | ||
{ | ||
const int in_y_origin = (out_y * stride_height) - pad_height; | ||
for (int out_x = 0; out_x < output_width; ++out_x) | ||
{ | ||
const int in_x_origin = (out_x * stride_width) - pad_width; | ||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) | ||
{ | ||
auto group = out_channel / filters_per_group; | ||
int32_t acc = 0; | ||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) | ||
{ | ||
const int in_y = in_y_origin + dilation_height_factor * filter_y; | ||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) | ||
{ | ||
const int in_x = in_x_origin + dilation_width_factor * filter_x; | ||
|
||
// Zero padding by omitting the areas outside the image. | ||
const bool is_point_inside_image = | ||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); | ||
|
||
if (!is_point_inside_image) | ||
{ | ||
continue; | ||
} | ||
|
||
for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel) | ||
{ | ||
int32_t input_val = input_data[offset(input_shape.dimsData(), batch, in_y, in_x, | ||
in_channel + group * filter_input_depth)]; | ||
int32_t filter_val = filter_data[offset(filter_shape.dimsData(), out_channel, | ||
filter_y, filter_x, in_channel)]; | ||
// Accumulate with 32 bits accumulator. | ||
// In the nudging process during model quantization, we force | ||
// real value of 0.0 be represented by a quantized value. This | ||
// guarantees that the input_offset is a int8_t, even though | ||
// it is represented using int32_t. int32_t += int8_t * | ||
// (int8_t - int8_t) so the highest value we can get from each | ||
// accumulation is [-127, 127] * ([-128, 127] - | ||
// [-128, 127]), which is [-32512, 32512]. log2(32512) | ||
// = 14.98, which means we can accumulate at least 2^16 | ||
// multiplications without overflow. The accumulator is | ||
// applied to a filter so the accumulation logic will hold as | ||
// long as the filter size (filter_y * filter_x * in_channel) | ||
// does not exceed 2^16, which is the case in all the models | ||
// we have seen so far. | ||
// accumulator depth is smaller than 2^16. | ||
acc += filter_val * (input_val + input_offset); | ||
} | ||
} | ||
} | ||
|
||
if (bias_data) | ||
{ | ||
acc += bias_data[out_channel]; | ||
} | ||
acc = multiplyByQuantizedMultiplier(acc, output_multiplier[out_channel], | ||
output_shift[out_channel]); | ||
acc += output_offset; | ||
acc = std::max(acc, output_activation_min); | ||
acc = std::min(acc, output_activation_max); | ||
output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)] = | ||
static_cast<int8_t>(acc); | ||
} | ||
} | ||
} | ||
} | ||
return Ok; | ||
} | ||
|
||
} // namespace pal | ||
} // namespace execute | ||
} // namespace onert_micro | ||
|
||
#endif // ONERT_MICRO_EXECUTE_PAL_CONV_2D_H |
Oops, something went wrong.