diff --git a/onert-micro/onert-micro/include/pal/common/PALSoftmaxInputGrad.h b/onert-micro/onert-micro/include/pal/common/PALSoftmaxInputGrad.h new file mode 100644 index 00000000000..172a19c770d --- /dev/null +++ b/onert-micro/onert-micro/include/pal/common/PALSoftmaxInputGrad.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_EXECUTE_PAL_COMMON_SOFTMAX_INPUT_GRAD_H +#define ONERT_MICRO_EXECUTE_PAL_COMMON_SOFTMAX_INPUT_GRAD_H + +#include "OMStatus.h" +#include "PALUtils.h" + +#include + +namespace onert_micro +{ +namespace train +{ +namespace pal +{ + +void inline SoftmaxInputGrad(const float *dloss_doutput_data, + const core::OMRuntimeShape &dloss_doutput_shape, + const float *calculated_data, float *jacobian_row_data, + float *dloss_dinput_data) +{ + assert(dloss_doutput_shape.dimensionsCount() == 2); + assert(dloss_doutput_shape.dims(0) == 1); + const uint32_t output_dim = dloss_doutput_shape.dims(dloss_doutput_shape.dimensionsCount() - 1); + for (int i = 0; i < output_dim; ++i) + { + for (int j = 0; j < output_dim; ++j) + { + jacobian_row_data[j] = -calculated_data[i] * calculated_data[j]; + } + jacobian_row_data[i] += calculated_data[i]; + float total = 0.f; + for (int j = 0; j < output_dim; ++j) + { + total += jacobian_row_data[j] * dloss_doutput_data[j]; + } + dloss_dinput_data[i] = total; + } +} + +} // namespace pal +} // namespace train +} // namespace onert_micro + +#endif // ONERT_MICRO_EXECUTE_PAL_COMMON_SOFTMAX_INPUT_GRAD_H diff --git a/onert-micro/onert-micro/src/train/kernels/Softmax.cpp b/onert-micro/onert-micro/src/train/kernels/Softmax.cpp new file mode 100644 index 00000000000..b2ab791fa42 --- /dev/null +++ b/onert-micro/onert-micro/src/train/kernels/Softmax.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OMStatus.h" +#include "core/OMUtils.h" +#include "core/OMDataType.h" +#include "train/OMBackpropExecutionBuilder.h" +#include "execute/OMRuntimeKernel.h" +#include "core/memory/OMMemoryManager.h" +#include "PALSoftmaxInputGrad.h" + +using namespace onert_micro; +using namespace onert_micro::core; +using namespace onert_micro::train; + +namespace +{ + +constexpr uint32_t inputTensorIdx = 0; +constexpr uint32_t outputTensorIdx = 0; + +} // namespace + +/* + * - Calculate input gradient - Optional (not required if it is last op) + */ +OMStatus onert_micro::train::train_kernel_CircleSoftmax(const OMBackpropExecuteArgs &args) +{ + // Check is it last layer for training + if (args.is_last_layer) + { + return Ok; + } + + core::OMRuntimeStorage &forward_storage = args.forward_storage; + core::OMRuntimeStorage &backward_storage = args.backward_storage; + core::OMRuntimeContext &context = args.backward_context; + uint16_t op_index = args.kernel_index; + + const circle::Tensor *input; + const circle::Tensor *output; + + uint8_t *dloss_dinput_data; + + uint8_t *output_data; + uint8_t *dloss_doutput_data; + + // Read kernel + { + execute::OMRuntimeKernel runtime_kernel; + runtime_kernel.readKernel(op_index, context); + + input = runtime_kernel.inputs[inputTensorIdx]; + output = runtime_kernel.outputs[outputTensorIdx]; + assert(input != nullptr); + assert(output != nullptr); + + // Read forward storage + { + runtime_kernel.getDataFromStorage(op_index, forward_storage, context); + + output_data = runtime_kernel.outputs_data[outputTensorIdx]; + assert(output_data != nullptr); + } + + // Read backward storage + { + runtime_kernel.getDataFromStorage(op_index, backward_storage, context); + + dloss_dinput_data = runtime_kernel.inputs_data[inputTensorIdx]; + dloss_doutput_data = runtime_kernel.outputs_data[outputTensorIdx]; + + assert(dloss_dinput_data != nullptr); + assert(dloss_doutput_data != nullptr); + } + } + + OMRuntimeShape input_shape(input); + OMRuntimeShape output_shape(output); + + // Check Softmax output and input shape + assert(output_shape.dimensionsCount() == 2); + assert(output_shape.dims(0) == 1); + if (output_shape.dimensionsCount() != 2 or output_shape.dims(0) != 1) + return UnsupportedType; + + // Allocate temporary buffer to save Jacobian row + uint8_t *jacobian_row_data = nullptr; + OMStatus status = core::memory::OMMemoryManager::allocateMemory( + output_shape.flatSize() * sizeof(OMDataType(output->type())), &jacobian_row_data); + assert(status == Ok); + if (status != Ok) + return status; + + // Calculate input grad + pal::SoftmaxInputGrad(core::utils::castInputData(dloss_doutput_data), output_shape, + core::utils::castInputData(output_data), + core::utils::castOutputData(jacobian_row_data), + core::utils::castOutputData(dloss_dinput_data)); + + // Deallocate temporary buffer with Jacobian row + status = core::memory::OMMemoryManager::deallocateMemory(jacobian_row_data); + + return status; +}