Skip to content

Commit

Permalink
[onert-micro] Add Add kernel (#12651)
Browse files Browse the repository at this point in the history
This pr adds first Add kernel with common arithmetic functions and common PALUtils.

ONE-DCO-1.0-Signed-off-by: Artem Balyshev <[email protected]>
  • Loading branch information
BalyshevArtem authored Feb 21, 2024
1 parent 7951b25 commit d511b6d
Show file tree
Hide file tree
Showing 13 changed files with 1,717 additions and 0 deletions.
53 changes: 53 additions & 0 deletions onert-micro/onert-micro/include/pal/common/PALAddCommon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ONERT_MICRO_EXECUTE_PAL_ADD_COMMON_H
#define ONERT_MICRO_EXECUTE_PAL_ADD_COMMON_H

#include "PALArithmeticOpCommon.h"

namespace onert_micro
{
namespace execute
{
namespace pal
{

template <typename T>
OMStatus Add(const core::BinaryArithmeticBroadcastParams &params, const int flat_size,
const T *input1_data, const T *input2_data, T *output_data)
{
ArithmeticOp<T, AddFn<T>>(params, flat_size, input1_data, input2_data, output_data);
return Ok;
}

template <typename T>
OMStatus BroadcastAdd4DSlow(const core::BinaryArithmeticBroadcastParams &params,
const core::OMRuntimeShape &input1_shape, const T *input1_data,
const core::OMRuntimeShape &input2_shape, const T *input2_data,
const core::OMRuntimeShape &output_shape, T *output_data)
{
BroadcastArithmeticOp4DSlow<T, AddFn<T>>(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
return Ok;
}

} // namespace pal
} // namespace execute
} // namespace onert_micro

#endif // ONERT_MICRO_EXECUTE_PAL_ADD_COMMON_H
134 changes: 134 additions & 0 deletions onert-micro/onert-micro/include/pal/common/PALArithmeticOpCommon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ONERT_MICRO_EXECUTE_PAL_ARITHMETIC_OP_COMMON_H
#define ONERT_MICRO_EXECUTE_PAL_ARITHMETIC_OP_COMMON_H

#include "PALUtils.h"
#include "ProcessBroadcastShapes.h"

#include "core/OMKernelData.h"

namespace onert_micro
{
namespace execute
{
namespace pal
{

template <typename T> struct AddFn
{
T operator()(T lhs, T rhs) { return lhs + rhs; }
};
template <typename T> struct SubFn
{
T operator()(T lhs, T rhs) { return lhs - rhs; }
};
template <typename T> struct MulFn
{
T operator()(T lhs, T rhs) { return lhs * rhs; }
};
template <typename T> struct DivFn
{
T operator()(T lhs, T rhs) { return lhs / rhs; }
};

template <typename T, typename Fn>
OMStatus ArithmeticOp(const core::BinaryArithmeticBroadcastParams &params, const int flat_size,
const T *input1_data, const T *input2_data, T *output_data)
{
T activation_min, activation_max;
getActivationParams(params, &activation_min, &activation_max);

Fn func;
for (int i = 0; i < flat_size; ++i)
output_data[i] =
std::min(std::max(func(input1_data[i], input2_data[i]), activation_min), activation_max);

return Ok;
}

template <typename T, typename Fn>
inline void ArithmeticOpScalar(const core::BinaryArithmeticBroadcastParams &params,
const int flat_size, const T *input_data, const T scalar_value,
T *output_data)
{
T activation_min, activation_max;
getActivationParams(params, &activation_min, &activation_max);

for (int i = 0; i < flat_size; ++i)
output_data[i] =
std::min(std::max(func(input_data[i], scalar_value), activation_min), activation_max);
}

template <typename T, typename Fn>
OMStatus BroadcastArithmeticOp4DSlow(const core::BinaryArithmeticBroadcastParams &params,
const core::OMRuntimeShape &input1_shape, const T *input1_data,
const core::OMRuntimeShape &input2_shape, const T *input2_data,
const core::OMRuntimeShape &output_shape, T *output_data)
{
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
const core::OMRuntimeShape extended_output_shape =
core::OMRuntimeShape::extendedShape(4, output_shape);

T activation_min, activation_max;
getActivationParams(params, &activation_min, &activation_max);

// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
Fn func;
for (int b = 0; b < extended_output_shape.dims(0); ++b)
{
for (int y = 0; y < extended_output_shape.dims(1); ++y)
{
for (int x = 0; x < extended_output_shape.dims(2); ++x)
{
for (int c = 0; c < extended_output_shape.dims(3); ++c)
{
const int output_data_offset =
((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
extended_output_shape.dims(3) +
c;

output_data[output_data_offset] =
std::min(std::max(func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
input2_data[subscriptToIndex(desc2, b, y, x, c)]),
activation_min),
activation_max);
}
}
}
}
return Ok;
}

} // namespace pal
} // namespace execute
} // namespace onert_micro

#endif // ONERT_MICRO_EXECUTE_PAL_ARITHMETIC_OP_COMMON_H
Loading

0 comments on commit d511b6d

Please sign in to comment.