Skip to content

Commit

Permalink
[onert-micro] Add FullyConnected kernel (#12686)
Browse files Browse the repository at this point in the history
This pr adds FullyConnected kernel to onert-micro.

ONE-DCO-1.0-Signed-off-by: Artem Balyshev <[email protected]>
  • Loading branch information
BalyshevArtem authored Mar 12, 2024
1 parent 2b0bc6b commit c783355
Show file tree
Hide file tree
Showing 11 changed files with 995 additions and 0 deletions.
1 change: 1 addition & 0 deletions onert-micro/onert-micro/include/OMStatus.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum OMStatus
{
Ok,
UnsupportedType,
UnsupportedQuantizationType,
UnsupportedActivation,
UnsupportedOp,
UnknownError,
Expand Down
119 changes: 119 additions & 0 deletions onert-micro/onert-micro/include/pal/common/PALFullyConnectedCommon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_COMMON_H
#define ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_COMMON_H

#include "OMStatus.h"
#include "PALUtils.h"

#include <cmath>

namespace onert_micro
{
namespace execute
{
namespace pal
{

template <typename InputType, typename WeightType, typename OutputType, typename BiasType>
OMStatus FullyConnected(const core::FullyConnectedParams &params, const InputType *input_data,
const core::OMRuntimeShape &filter_shape, const WeightType *filter_data,
const BiasType *bias_data, const core::OMRuntimeShape &output_shape,
OutputType *output_data)
{
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;

const int filter_dim_count = filter_shape.dimensionsCount();
const int output_dim_count = output_shape.dimensionsCount();
const int batches =
flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
const int output_depth = output_shape.dims(output_dim_count - 1);

const int accum_depth = filter_shape.dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b)
{
for (int out_c = 0; out_c < output_depth; ++out_c)
{
BiasType acc = 0;
for (int d = 0; d < accum_depth; ++d)
{
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data)
{
acc += bias_data[out_c];
}
int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc_scaled += output_offset;
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
}
}
return Ok;
}

template <>
OMStatus inline FullyConnected<float>(const core::FullyConnectedParams &params,
const float *input_data,
const core::OMRuntimeShape &filter_shape,
const float *filter_data, const float *bias_data,
const core::OMRuntimeShape &output_shape, float *output_data)
{
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;

const int batches = flatSizeSkipDim(output_shape.dimsData(), output_shape.dimensionsCount() - 1,
output_shape.dimensionsCount());
const int output_depth = output_shape.dims(output_shape.dimensionsCount() - 1);
const int accum_depth = filter_shape.dims(filter_shape.dimensionsCount() - 1);

for (int b = 0; b < batches; ++b)
{
for (int out_c = 0; out_c < output_depth; ++out_c)
{
float total = 0.f;
for (int d = 0; d < accum_depth; ++d)
{
total += input_data[b * accum_depth + d] * filter_data[out_c * accum_depth + d];
}
float bias_value = 0.0f;
if (bias_data)
{
bias_value = bias_data[out_c];
}
output_data[out_c + output_depth * b] =
std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
}
}
return Ok;
}

} // namespace pal
} // namespace execute
} // namespace onert_micro

#endif // ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_COMMON_H
1 change: 1 addition & 0 deletions onert-micro/onert-micro/include/pal/mcu/KernelsToBuild.lst
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ REGISTER_KERNEL(CONCATENATION, Concatenation)
REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
REGISTER_KERNEL(MUL, Mul)
REGISTER_KERNEL(SUB, Sub)
REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
23 changes: 23 additions & 0 deletions onert-micro/onert-micro/include/pal/mcu/PALFullyConnected.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_H
#define ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_H

#include "PALFullyConnectedCommon.h"

#endif // ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_H
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ONERT_MICRO_TEST_MODELS_FULLY_CONNECTED_KERNEL_FLOAT_H
#define ONERT_MICRO_TEST_MODELS_FULLY_CONNECTED_KERNEL_FLOAT_H

#include "TestDataFullyConnectedBase.h"

namespace onert_micro
{
namespace test_model
{
namespace fully_connected_float
{

/*
* FullyConnected Kernel:
*
* Input(1, 16) Weight(4, 16) Bias(4)
* \ | /
* \ | /
* FullyConnected
* |
* Output(1, 4)
*/

const unsigned char test_kernel_model_circle[] = {
0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x60, 0x01, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, 0xc4, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x4c, 0x01, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0xc0, 0x00, 0x00, 0x80, 0x40,
0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
0x00, 0x00, 0xa0, 0x40, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00,
0x54, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x90, 0xff, 0xff, 0xff,
0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x6f, 0x75, 0x74, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x11, 0x00, 0x00, 0x00,
0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
0x65, 0x00, 0x00, 0x00};

const std::vector<float> input_data = {
17.491695, 15.660671, 4.7347794, -15.796822, 20.4776, 18.438372, -0.7529831, 10.671711,
10.699566, 3.1682281, -22.776001, 1.527811, -0.1198349, -5.748741, -5.1772327, 20.06879};

const std::vector<float> reference_output_data = {263.84323, 260.84323, 259.84323, 266.84323};

} // namespace fully_connected_float

class TestDataFloatFullyConnected : public TestDataFullyConnectedBase<float>
{
public:
TestDataFloatFullyConnected()
{
_input_data = fully_connected_float::input_data;
_reference_output_data = fully_connected_float::reference_output_data;
_test_kernel_model_circle = fully_connected_float::test_kernel_model_circle;
}

~TestDataFloatFullyConnected() override = default;
};

} // namespace test_model
} // namespace onert_micro

#endif // ONERT_MICRO_TEST_MODELS_FULLY_CONNECTED_KERNEL_FLOAT_H
Loading

0 comments on commit c783355

Please sign in to comment.