Skip to content

Commit

Permalink
[onert-micro] Support S8 and S16 FullyConnected (#13163)
Browse files Browse the repository at this point in the history
This pr adds supporting of s8 and s16 + cmsis_nn.

ONE-DCO-1.0-Signed-off-by: Artem Balyshev <[email protected]>
  • Loading branch information
BalyshevArtem authored Jun 13, 2024
1 parent 1f33982 commit b734be3
Show file tree
Hide file tree
Showing 8 changed files with 578 additions and 11 deletions.
1 change: 1 addition & 0 deletions onert-micro/onert-micro/include/OMStatus.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ enum OMStatus
UnsupportedDynamicShapeCase,
FailReadWOFFile,
FailReadCheckpointFile,
CmsisNNError,
};

} // namespace onert_micro
Expand Down
87 changes: 87 additions & 0 deletions onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#/*REGISTER_KERNEL(ABS, Abs)*/
#/*REGISTER_KERNEL(ADD, Add)*/
#/*REGISTER_KERNEL(ADD_N, AddN)*/
#/*REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)*/
#/*REGISTER_KERNEL(ARG_MAX, ArgMax)*/
#/*REGISTER_KERNEL(ARG_MIN, ArgMin)*/
#/*REGISTER_KERNEL(CONCATENATION, Concatenation)*/
#/*REGISTER_KERNEL(CUSTOM, BroadcastTo)*/
#/*REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)*/
#/*REGISTER_KERNEL(CEIL, Ceil)*/
#/*REGISTER_KERNEL(COS, Cos)*/
#/*REGISTER_KERNEL(CAST, Cast)*/
#/*REGISTER_KERNEL(DIV, Div)*/
#/*REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)*/
#/*REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)*/
#/*REGISTER_KERNEL(DEQUANTIZE, Dequantize)*/
REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
#/*REGISTER_KERNEL(CONV_2D, Conv2D)*/
#/*REGISTER_KERNEL(LOGISTIC, Logistic)*/
#/*REGISTER_KERNEL(LOG, Log)*/
#/*REGISTER_KERNEL(GATHER, Gather)*/
#/*REGISTER_KERNEL(GATHER_ND, GatherND)*/
#/*REGISTER_KERNEL(EXP, Exp)*/
#/*REGISTER_KERNEL(GREATER, Greater)*/
#/*REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)*/
#/*REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)*/
#/*REGISTER_KERNEL(ELU, Elu)*/
#/*REGISTER_KERNEL(EQUAL, Equal)*/
#/*REGISTER_KERNEL(FILL, Fill)*/
#/*REGISTER_KERNEL(FLOOR, Floor)*/
#/*REGISTER_KERNEL(FLOOR_DIV, FloorDiv)*/
#/*REGISTER_KERNEL(FLOOR_MOD, FloorMod)*/
#/*REGISTER_KERNEL(PACK, Pack)*/
#/*REGISTER_KERNEL(PAD, Pad)*/
#/*REGISTER_KERNEL(PADV2, PadV2)*/
#/*REGISTER_KERNEL(PRELU, PRelu)*/
#/*REGISTER_KERNEL(RESHAPE, Reshape)*/
#/*REGISTER_KERNEL(RELU, Relu)*/
#/*REGISTER_KERNEL(RELU6, Relu6)*/
#/*REGISTER_KERNEL(REDUCE_PROD, ReduceCommon)*/
#/*REGISTER_KERNEL(REDUCE_MAX, ReduceMax)*/
#/*REGISTER_KERNEL(ROUND, Round)*/
#/*REGISTER_KERNEL(LESS, Less)*/
#/*REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)*/
#/*REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)*/
#/*REGISTER_KERNEL(LESS_EQUAL, LessEqual)*/
#/*REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)*/
#/*REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)*/
#/*REGISTER_KERNEL(LOGICAL_OR, LogicalOr)*/
#/*REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)*/
#/*REGISTER_KERNEL(LOG_SOFTMAX, LogSoftmax)*/
#/*REGISTER_KERNEL(MUL, Mul)*/
#/*REGISTER_KERNEL(MIRROR_PAD, MirrorPad)*/
#/*REGISTER_KERNEL(MAXIMUM, Maximum)*/
#/*REGISTER_KERNEL(MEAN, Mean)*/
#/*REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)*/
#/*REGISTER_KERNEL(MINIMUM, Minimum)*/
#/*REGISTER_KERNEL(SHAPE, Shape)*/
#/*REGISTER_KERNEL(NOT_EQUAL, NotEqual)*/
#/*REGISTER_KERNEL(SIN, Sin)*/
#/*REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)*/
#/*REGISTER_KERNEL(SLICE, Slice)*/
#/*REGISTER_KERNEL(SUB, Sub)*/
#/*REGISTER_KERNEL(SPLIT, Split)*/
#/*REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)*/
#/*REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)*/
#/*REGISTER_KERNEL(SPLIT_V, SplitV)*/
#/*REGISTER_KERNEL(SQUARE, Square)*/
#/*REGISTER_KERNEL(SQRT, Sqrt)*/
#/*REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)*/
#/*REGISTER_KERNEL(QUANTIZE, Quantize)*/
#/*REGISTER_KERNEL(TANH, Tanh)*/
#/*REGISTER_KERNEL(TRANSPOSE, Transpose)*/
#/*REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)*/
#/*REGISTER_KERNEL(SOFTMAX, Softmax)*/
#/*REGISTER_KERNEL(SUM, Sum)*/
#/*REGISTER_KERNEL(SELECT_V2, SelectV2)*/
#/*REGISTER_KERNEL(SVDF, SVDF)*/
#/*REGISTER_KERNEL(WHILE, While)*/
#/*REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)*/
#/*REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)*/
#/*REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)*/
#/*REGISTER_KERNEL(RSQRT, Rsqrt)*/
#/*REGISTER_KERNEL(NEG, Neg)*/
#/*REGISTER_KERNEL(ZEROS_LIKE, ZerosLike)*/
#/*REGISTER_KERNEL(SQUEEZE, Squeeze)*/
#/*REGISTER_KERNEL(UNPACK, Unpack)*/
168 changes: 168 additions & 0 deletions onert-micro/onert-micro/include/pal/cmsisnn/PALFullyConnected.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_H
#define ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_H

#include "PALFullyConnectedCommon.h"

#include <arm_nnfunctions.h>

namespace onert_micro
{
namespace execute
{
namespace pal
{
template <>
OMStatus FullyConnected<int8_t>(const core::FullyConnectedParams &params, const int8_t *input_data,
const core::OMRuntimeShape &filter_shape, const int8_t *filter_data,
const int32_t *bias_data, const core::OMRuntimeShape &output_shape,
int8_t *output_data)
{
const int filter_dim_count = filter_shape.dimensionsCount();
const int output_dim_count = output_shape.dimensionsCount();
const int batches =
flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
const int output_depth = output_shape.dims(output_dim_count - 1);
const int accum_depth = filter_shape.dims(filter_dim_count - 1);

cmsis_nn_fc_params fc_params;
fc_params.input_offset = params.input_offset;
fc_params.output_offset = params.output_offset;
fc_params.filter_offset = params.weights_offset;
fc_params.activation.min = params.quantized_activation_min;
fc_params.activation.max = params.quantized_activation_max;

cmsis_nn_per_tensor_quant_params quant_params;
quant_params.multiplier = params.output_multiplier;
quant_params.shift = params.output_shift;

cmsis_nn_dims input_dims;
input_dims.n = batches;
input_dims.h = 1;
input_dims.w = 1;
input_dims.c = accum_depth;

cmsis_nn_dims filter_dims;
filter_dims.n = accum_depth;
filter_dims.h = 1;
filter_dims.w = 1;
filter_dims.c = output_depth;

cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_depth;

cmsis_nn_dims output_dims;
output_dims.n = batches;
output_dims.h = 1;
output_dims.w = 1;
output_dims.c = output_depth;

int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
auto buffer = std::make_unique<int8_t[]>(buf_size);
assert(buffer != nullptr);

cmsis_nn_context ctx;
ctx.buf = buffer.get();
ctx.size = buf_size;

auto res =
arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
filter_data, &bias_dims, bias_data, &output_dims, output_data);
assert(res == ARM_CMSIS_NN_SUCCESS);
if (res != ARM_CMSIS_NN_SUCCESS)
return CmsisNNError;

return Ok;
}

template <>
OMStatus FullyConnected(const core::FullyConnectedParams &params, const int16_t *input_data,
const core::OMRuntimeShape &filter_shape, const int8_t *filter_data,
const int64_t *bias_data, const core::OMRuntimeShape &output_shape,
int16_t *output_data)
{
const int filter_dim_count = filter_shape.dimensionsCount();
const int output_dim_count = output_shape.dimensionsCount();
const int batches =
flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
const int output_depth = output_shape.dims(output_dim_count - 1);
const int accum_depth = filter_shape.dims(filter_dim_count - 1);

cmsis_nn_fc_params fc_params;
fc_params.input_offset = params.input_offset;
fc_params.output_offset = params.output_offset;
fc_params.filter_offset = params.weights_offset;
fc_params.activation.min = params.quantized_activation_min;
fc_params.activation.max = params.quantized_activation_max;

cmsis_nn_per_tensor_quant_params quant_params;
quant_params.multiplier = params.output_multiplier;
quant_params.shift = params.output_shift;

cmsis_nn_dims input_dims;
input_dims.n = batches;
input_dims.h = 1;
input_dims.w = 1;
input_dims.c = accum_depth;

cmsis_nn_dims filter_dims;
filter_dims.n = accum_depth;
filter_dims.h = 1;
filter_dims.w = 1;
filter_dims.c = output_depth;

cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_depth;

cmsis_nn_dims output_dims;
output_dims.n = batches;
output_dims.h = 1;
output_dims.w = 1;
output_dims.c = output_depth;

int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
auto buffer = std::make_unique<int8_t[]>(buf_size);
assert(buffer != nullptr);

cmsis_nn_context ctx;
ctx.buf = buffer.get();
ctx.size = buf_size;

auto res =
arm_fully_connected_s16(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
filter_data, &bias_dims, bias_data, &output_dims, output_data);
assert(res == ARM_CMSIS_NN_SUCCESS);

if (res != ARM_CMSIS_NN_SUCCESS)
return CmsisNNError;

return Ok;
}

} // namespace pal
} // namespace execute
} // namespace onert_micro

#endif // ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_COMMON_H
Loading

0 comments on commit b734be3

Please sign in to comment.