Skip to content

Commit

Permalink
[onert-micro] Add Softmax training kernel
Browse files Browse the repository at this point in the history
This pr adds Softmax training kernel.

ONE-DCO-1.0-Signed-off-by: Artem Balyshev <[email protected]>
  • Loading branch information
Artem Balyshev committed Jun 18, 2024
1 parent da89844 commit cd9c10b
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 0 deletions.
61 changes: 61 additions & 0 deletions onert-micro/onert-micro/include/pal/common/PALSoftmaxInputGrad.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ONERT_MICRO_EXECUTE_PAL_COMMON_SOFTMAX_INPUT_GRAD_H
#define ONERT_MICRO_EXECUTE_PAL_COMMON_SOFTMAX_INPUT_GRAD_H

#include "OMStatus.h"
#include "PALUtils.h"

#include <cmath>

namespace onert_micro
{
namespace train
{
namespace pal
{

void inline SoftmaxInputGrad(const float *dloss_doutput_data,
const core::OMRuntimeShape &dloss_doutput_shape,
const float *calculated_data, float *jacobian_row_data,
float *dloss_dinput_data)
{
assert(dloss_doutput_shape.dimensionsCount() == 2);
assert(dloss_doutput_shape.dims(0) == 1);
const uint32_t output_dim = dloss_doutput_shape.dims(dloss_doutput_shape.dimensionsCount() - 1);
for (int i = 0; i < output_dim; ++i)
{
for (int j = 0; j < output_dim; ++j)
{
jacobian_row_data[j] = -calculated_data[i] * calculated_data[j];
}
jacobian_row_data[i] += calculated_data[i];
float total = 0.f;
for (int j = 0; j < output_dim; ++j)
{
total += jacobian_row_data[j] * dloss_doutput_data[j];
}
dloss_dinput_data[i] = total;
}
}

} // namespace pal
} // namespace train
} // namespace onert_micro

#endif // ONERT_MICRO_EXECUTE_PAL_COMMON_SOFTMAX_INPUT_GRAD_H
118 changes: 118 additions & 0 deletions onert-micro/onert-micro/src/train/kernels/Softmax.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "OMStatus.h"
#include "core/OMUtils.h"
#include "core/OMDataType.h"
#include "train/OMBackpropExecutionBuilder.h"
#include "execute/OMRuntimeKernel.h"
#include "core/memory/OMMemoryManager.h"
#include "PALSoftmaxInputGrad.h"

using namespace onert_micro;
using namespace onert_micro::core;
using namespace onert_micro::train;

namespace
{

constexpr uint32_t inputTensorIdx = 0;
constexpr uint32_t outputTensorIdx = 0;

} // namespace

/*
* - Calculate input gradient - Optional (not required if it is last op)
*/
OMStatus onert_micro::train::train_kernel_CircleSoftmax(const OMBackpropExecuteArgs &args)
{
// Check is it last layer for training
if (args.is_last_layer)
{
return Ok;
}

core::OMRuntimeStorage &forward_storage = args.forward_storage;
core::OMRuntimeStorage &backward_storage = args.backward_storage;
core::OMRuntimeContext &context = args.backward_context;
uint16_t op_index = args.kernel_index;

const circle::Tensor *input;
const circle::Tensor *output;

uint8_t *dloss_dinput_data;

uint8_t *output_data;
uint8_t *dloss_doutput_data;

// Read kernel
{
execute::OMRuntimeKernel runtime_kernel;
runtime_kernel.readKernel(op_index, context);

input = runtime_kernel.inputs[inputTensorIdx];
output = runtime_kernel.outputs[outputTensorIdx];
assert(input != nullptr);
assert(output != nullptr);

// Read forward storage
{
runtime_kernel.getDataFromStorage(op_index, forward_storage, context);

output_data = runtime_kernel.outputs_data[outputTensorIdx];
assert(output_data != nullptr);
}

// Read backward storage
{
runtime_kernel.getDataFromStorage(op_index, backward_storage, context);

dloss_dinput_data = runtime_kernel.inputs_data[inputTensorIdx];
dloss_doutput_data = runtime_kernel.outputs_data[outputTensorIdx];

assert(dloss_dinput_data != nullptr);
assert(dloss_doutput_data != nullptr);
}
}

OMRuntimeShape input_shape(input);
OMRuntimeShape output_shape(output);

// Check Softmax output and input shape
assert(output_shape.dimensionsCount() == 2);
assert(output_shape.dims(0) == 1);
if (output_shape.dimensionsCount() != 2 or output_shape.dims(0) != 1)
return UnsupportedType;

// Allocate temporary buffer to save Jacobian row
uint8_t *jacobian_row_data = nullptr;
OMStatus status = core::memory::OMMemoryManager::allocateMemory(
output_shape.flatSize() * sizeof(OMDataType(output->type())), &jacobian_row_data);
assert(status == Ok);
if (status != Ok)
return status;

// Calculate input grad
pal::SoftmaxInputGrad(core::utils::castInputData<float>(dloss_doutput_data), output_shape,
core::utils::castInputData<float>(output_data),
core::utils::castOutputData<float>(jacobian_row_data),
core::utils::castOutputData<float>(dloss_dinput_data));

// Deallocate temporary buffer with Jacobian row
status = core::memory::OMMemoryManager::deallocateMemory(jacobian_row_data);

return status;
}

0 comments on commit cd9c10b

Please sign in to comment.