[onert-micro] Support GRU

This pr adds supporting for circle GRU op. ONE-DCO-1.0-Signed-off-by: Artem Balyshev <a.balyshev@samsung.com>
Samsung · BalyshevArtem · Aug 12, 2024 · Aug 13, 2024 · Aug 12, 2024 · acc086cbc48eec0899dc4bb0b3932a44d2575982
commit acc086cbc48eec0899dc4bb0b3932a44d2575982
diff --git a/onert-micro/CMakeLists.txt b/onert-micro/CMakeLists.txt
@@ -70,7 +70,7 @@ else ()
 
     message(STATUS "FOUND FlatBuffers")
 
-    set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.6/circle_schema.fbs")
+    set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.8/circle_schema.fbs")
 
     # NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
     add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"

diff --git a/onert-micro/onert-micro/include/core/reader/OMCircleReader.h b/onert-micro/onert-micro/include/core/reader/OMCircleReader.h
@@ -55,7 +55,6 @@ class OMCircleReader
   const CircleOperators *operators() const { return _current_subgraph->operators(); }
   const CircleValues *inputs() const { return _current_subgraph->inputs(); }
   const CircleValues *outputs() const { return _current_subgraph->outputs(); }
-  const circle::DataFormat data_format() const { return _current_subgraph->data_format(); }
   const CircleMetadataSet *metadata() const { return _model->metadata(); }
 
   uint32_t num_subgraph() const { return _model->subgraphs()->size(); }

diff --git a/onert-micro/onert-micro/include/execute/OMRuntimeKernel.h b/onert-micro/onert-micro/include/execute/OMRuntimeKernel.h
@@ -23,7 +23,7 @@
 
 #include <cstdint>
 
-constexpr static uint32_t maxInputSize = 5;
+constexpr static uint32_t maxInputSize = 6;
 constexpr static uint32_t maxOutputSize = 5;
 
 namespace onert_micro

diff --git a/onert-micro/onert-micro/include/pal/common/PALGRUCommon.h b/onert-micro/onert-micro/include/pal/common/PALGRUCommon.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_EXECUTE_PAL_GRU_COMMON_H
+#define ONERT_MICRO_EXECUTE_PAL_GRU_COMMON_H
+
+#include "OMStatus.h"
+#include "core/OMRuntimeShape.h"
+
+#include "PALUtils.h"
+#include "ProcessBroadcastShapes.h"
+#include "PALFullyConnected.h"
+#include "PALLogistic.h"
+
+namespace onert_micro
+{
+namespace execute
+{
+namespace pal
+{
+namespace
+{
+void calculateGRU(const float *input_data, const float *weight_input_data,
+                  const float *weight_hidden_data, const float *bias_input_data,
+                  const float *bias_hidden_data, float *output_data,
+                  const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &output_shape,
+                  const core::OMRuntimeShape &weight_input_shape,
+                  const core::OMRuntimeShape &weight_hidden_shape, float *output_input_data,
+                  float *output_hidden_data, const core::OMRuntimeShape &output_shape_fc)
+{
+  core::FullyConnectedParams op_params{};
+  // As FC nodes doesn't have any activations inside GRU, let' use just numeric limits
+  op_params.float_activation_min = std::numeric_limits<float>::lowest();
+  op_params.float_activation_max = std::numeric_limits<float>::max();
+
+  // FC Input
+  FullyConnected(op_params, output_data, weight_input_shape, weight_input_data, bias_input_data,
+                 output_shape_fc, output_input_data);
+
+  // FC Hidden
+  FullyConnected(op_params, input_data, weight_hidden_shape, weight_hidden_data, bias_hidden_data,
+                 output_shape_fc, output_hidden_data);
+
+  int num_elements = output_shape_fc.dims(1) / 3;
+
+  float *second_hidden_part = output_hidden_data + num_elements;
+  float *second_input_part = output_input_data + num_elements;
+
+  float *third_hidden_part = second_hidden_part + num_elements;
+  float *third_input_part = second_input_part + num_elements;
+
+  // Calculate Left part
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_input_data[i] += output_hidden_data[i];
+  }
+
+  Logistic(num_elements, output_input_data, output_input_data);
+
+  // Calculate most left add
+  float *most_left_part_final = output_input_data;
+  float *first_part = output_input_data;
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_data[i] *= most_left_part_final[i];
+    first_part[i] = 1.0f - first_part[i];
+  }
+
+  // Calc third part
+  for (int i = 0; i < num_elements; ++i)
+  {
+    second_hidden_part[i] += second_input_part[i];
+  }
+  Logistic(num_elements, second_hidden_part, second_hidden_part);
+
+  for (int i = 0; i < num_elements; ++i)
+  {
+    second_hidden_part[i] *= third_input_part[i];
+    second_hidden_part[i] += third_hidden_part[i];
+    second_hidden_part[i] = std::tanh(second_hidden_part[i]);
+    second_hidden_part[i] *= first_part[i];
+    output_data[i] += second_hidden_part[i];
+  }
+}
+
+} // namespace
+
+OMStatus GRU(const float *input_data, const float *weight_input_data,
+             const float *weight_hidden_data, const float *bias_input_data,
+             const float *bias_hidden_data, const float *hidden_state_data, float *output_data,
+             float *output_input_data, float *output_hidden_data,
+             const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &output_shape,
+             const core::OMRuntimeShape &weight_input_shape,
+             const core::OMRuntimeShape &weight_hidden_shape)
+{
+  const int32_t time = input_shape.dims(0);
+
+  core::OMRuntimeShape output_shape_fc(2);
+  output_shape_fc.setDim(0, 1);
+  output_shape_fc.setDim(1, weight_hidden_shape.dims(0));
+
+  std::memcpy(output_data, hidden_state_data,
+              output_shape.dims(output_shape.dimensionsCount() - 1) * sizeof(float));
+
+  for (int i = 0; i < time; ++i)
+  {
+    calculateGRU(input_data, weight_input_data, weight_hidden_data, bias_input_data,
+                 bias_hidden_data, output_data, input_shape, output_shape, weight_input_shape,
+                 weight_hidden_shape, output_input_data, output_hidden_data, output_shape_fc);
+    input_data += input_shape.dims(2);
+  }
+  return Ok;
+}
+
+} // namespace pal
+} // namespace execute
+} // namespace onert_micro
+
+#endif // ONERT_MICRO_EXECUTE_PAL_GRU_COMMON_H
diff --git a/onert-micro/onert-micro/include/pal/mcu/KernelsToBuild.lst b/onert-micro/onert-micro/include/pal/mcu/KernelsToBuild.lst
@@ -23,6 +23,7 @@ REGISTER_KERNEL(GATHER_ND, GatherND)
 REGISTER_KERNEL(EXP, Exp)
 REGISTER_KERNEL(GREATER, Greater)
 REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)
+REGISTER_KERNEL(GRU, GRU)
 REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
 REGISTER_KERNEL(ELU, Elu)
 REGISTER_KERNEL(EQUAL, Equal)

diff --git a/onert-micro/onert-micro/include/pal/mcu/PALGRU.h b/onert-micro/onert-micro/include/pal/mcu/PALGRU.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_EXECUTE_PAL_GRU_H
+#define ONERT_MICRO_EXECUTE_PAL_GRU_H
+
+#include "PALGRUCommon.h"
+
+#endif // ONERT_MICRO_EXECUTE_PAL_GRU_H
diff --git a/onert-micro/onert-micro/src/execute/kernels/GRU.cpp b/onert-micro/onert-micro/src/execute/kernels/GRU.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <core/OMDataType.h>
+#include "OMStatus.h"
+
+#include "core/OMUtils.h"
+#include "core/OMKernelData.h"
+#include "core/memory/OMMemoryManager.h"
+
+#include "execute/OMKernelExecutionBuilder.h"
+#include "execute/OMUtils.h"
+#include "execute/OMRuntimeKernel.h"
+
+#include "PALGRU.h"
+
+using namespace onert_micro;
+using namespace onert_micro::core;
+using namespace onert_micro::execute;
+
+namespace
+{
+
+constexpr uint32_t inputTensorIdx = 0;
+constexpr uint32_t hiddenHiddenTensorIdx = 1;
+constexpr uint32_t hiddenHiddenBiasTensorIdx = 2;
+constexpr uint32_t hiddenInputTensorIdx = 3;
+constexpr uint32_t hiddenInputBiasTensorIdx = 4;
+constexpr uint32_t stateTensorIdx = 5;
+
+constexpr uint32_t outputTensorIdx = 0;
+
+} // namespace
+
+// NOTE: doesnt currently support dynamic shapes
+OMStatus onert_micro::execute::execute_kernel_CircleGRU(const OMExecuteArgs &execute_args)
+{
+  core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
+  core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
+  uint16_t op_index = execute_args.kernel_index;
+
+  const circle::Tensor *input;
+  const circle::Tensor *hidden_hidden;
+  const circle::Tensor *hidden_hidden_bias;
+  const circle::Tensor *hidden_input;
+  const circle::Tensor *hidden_input_bias;
+  const circle::Tensor *state;
+
+  const circle::Tensor *output;
+
+  uint8_t *input_data;
+  uint8_t *hidden_hidden_data;
+  uint8_t *hidden_hidden_bias_data;
+  uint8_t *hidden_input_data;
+  uint8_t *hidden_input_bias_data;
+  uint8_t *state_data;
+  uint8_t *output_data;
+
+  // Read kernel
+  {
+    execute::OMRuntimeKernel runtime_kernel;
+    runtime_kernel.readKernel(op_index, runtime_context);
+
+    input = runtime_kernel.inputs[inputTensorIdx];
+    hidden_hidden = runtime_kernel.inputs[hiddenHiddenTensorIdx];
+    hidden_hidden_bias = runtime_kernel.inputs[hiddenHiddenBiasTensorIdx];
+    hidden_input = runtime_kernel.inputs[hiddenInputTensorIdx];
+    hidden_input_bias = runtime_kernel.inputs[hiddenInputBiasTensorIdx];
+    state = runtime_kernel.inputs[stateTensorIdx];
+
+    output = runtime_kernel.outputs[outputTensorIdx];
+    assert(input != nullptr);
+    assert(hidden_hidden != nullptr);
+    assert(hidden_input != nullptr);
+    assert(state != nullptr);
+    // Biases can be nullptr
+    assert(output != nullptr);
+
+    runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
+
+    input_data = runtime_kernel.inputs_data[inputTensorIdx];
+    hidden_hidden_data = runtime_kernel.inputs_data[hiddenHiddenTensorIdx];
+    hidden_hidden_bias_data = runtime_kernel.inputs_data[hiddenHiddenBiasTensorIdx];
+    hidden_input_data = runtime_kernel.inputs_data[hiddenInputTensorIdx];
+    hidden_input_bias_data = runtime_kernel.inputs_data[hiddenInputBiasTensorIdx];
+    state_data = runtime_kernel.inputs_data[stateTensorIdx];
+
+    output_data = runtime_kernel.outputs_data[outputTensorIdx];
+    assert(input_data != nullptr);
+    assert(hidden_hidden_data != nullptr);
+    assert(hidden_input_data != nullptr);
+    assert(state_data != nullptr);
+    // Bias can be nullptr
+    assert(output_data != nullptr);
+  }
+
+  OMStatus status;
+
+  uint8_t *output_hidden_data;
+  uint8_t *output_input_data;
+
+  status =
+    core::memory::OMMemoryManager::allocateMemory(core::OMRuntimeShape(hidden_hidden).flatSize() *
+                                                    sizeof(core::OMDataType(hidden_hidden->type())),
+                                                  &output_hidden_data);
+  if (status != Ok)
+    return status;
+  core::memory::OMMemoryManager::allocateMemory(core::OMRuntimeShape(hidden_input).flatSize() *
+                                                  sizeof(core::OMDataType(hidden_input->type())),
+                                                &output_input_data);
+  if (status != Ok)
+    return status;
+
+  switch (input->type())
+  {
+#ifndef DIS_FLOAT
+    case circle::TensorType_FLOAT32:
+    {
+      status = pal::GRU(core::utils::castInputData<float>(input_data),
+                        core::utils::castInputData<float>(hidden_input_data),
+                        core::utils::castInputData<float>(hidden_hidden_data),
+                        core::utils::castInputData<float>(hidden_input_bias_data),
+                        core::utils::castInputData<float>(hidden_hidden_bias_data),
+                        core::utils::castInputData<float>(state_data),
+                        core::utils::castOutputData<float>(output_data),
+                        core::utils::castOutputData<float>(output_input_data),
+                        core::utils::castOutputData<float>(output_hidden_data),
+                        core::OMRuntimeShape(input), core::OMRuntimeShape(output),
+                        core::OMRuntimeShape(hidden_input), core::OMRuntimeShape(hidden_hidden));
+    }
+    break;
+#endif // DIS_FLOAT
+    default:
+    {
+      status = UnsupportedType;
+      assert(false && "Unsupported type.");
+    }
+  }
+
+  core::memory::OMMemoryManager::deallocateMemory(output_input_data);
+  core::memory::OMMemoryManager::deallocateMemory(output_hidden_data);
+
+  return status;
+}
diff --git a/onert-micro/onert-micro/src/execute/kernels/tests/GRU.test.cpp b/onert-micro/onert-micro/src/execute/kernels/tests/GRU.test.cpp
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO add tests
diff --git a/onert-micro/onert-micro/src/import/kernels/GRU.cpp b/onert-micro/onert-micro/src/import/kernels/GRU.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OMStatus.h"
+
+#include "import/OMKernelConfigureBuilder.h"
+
+#include "core/OMUtils.h"
+#include "core/OMKernelData.h"
+
+#include "execute/OMRuntimeKernel.h"
+
+using namespace onert_micro;
+using namespace onert_micro::core;
+
+namespace
+{
+
+constexpr uint32_t inputTensorIdx = 0;
+constexpr uint32_t hiddenHiddenTensorIdx = 1;
+constexpr uint32_t hiddenHiddenBiasTensorIdx = 2;
+constexpr uint32_t hiddenInputTensorIdx = 3;
+constexpr uint32_t hiddenInputBiasTensorIdx = 4;
+constexpr uint32_t stateTensorIdx = 5;
+
+constexpr uint32_t outputTensorIdx = 0;
+
+} // namespace
+
+OMStatus onert_micro::import::configure_kernel_CircleGRU(const OMConfigureArgs &config_args)
+{
+  core::OMRuntimeContext &runtime_context = config_args.runtime_context;
+  uint16_t op_index = config_args.kernel_index;
+
+  const circle::Tensor *input;
+  const circle::Tensor *hidden_hidden;
+  const circle::Tensor *hidden_hidden_bias;
+  const circle::Tensor *hidden_input;
+  const circle::Tensor *hidden_input_bias;
+  const circle::Tensor *state;
+
+  const circle::Tensor *output;
+
+  // Read kernel
+  execute::OMRuntimeKernel runtime_kernel;
+  runtime_kernel.readKernel(op_index, runtime_context);
+
+  input = runtime_kernel.inputs[inputTensorIdx];
+  hidden_hidden = runtime_kernel.inputs[hiddenHiddenTensorIdx];
+  hidden_hidden_bias = runtime_kernel.inputs[hiddenHiddenBiasTensorIdx];
+  hidden_input = runtime_kernel.inputs[hiddenInputTensorIdx];
+  hidden_input_bias = runtime_kernel.inputs[hiddenInputBiasTensorIdx];
+  state = runtime_kernel.inputs[stateTensorIdx];
+
+  output = runtime_kernel.outputs[outputTensorIdx];
+  assert(input != nullptr);
+  assert(hidden_hidden != nullptr);
+  assert(hidden_input != nullptr);
+  assert(state != nullptr);
+  // Biases can be nullptr
+  assert(output != nullptr);
+
+  OMStatus status = Ok;
+
+  OMRuntimeShape hidden_hidden_shape(hidden_hidden);
+  OMRuntimeShape hidden_input_shape(hidden_input);
+  OMRuntimeShape output_shape(output);
+  OMRuntimeShape state_shape(state);
+
+  status = utils::checkCondition(hidden_hidden_shape.dims(0) == hidden_input_shape.dims(0));
+  if (status != Ok)
+    return status;
+
+  const int32_t div_factor = 3;
+  status =
+    utils::checkCondition(hidden_hidden_shape.dims(0) ==
+                          (div_factor * output_shape.dims(output_shape.dimensionsCount() - 1)));
+  if (status != Ok)
+    return status;
+
+  status = utils::checkCondition(output_shape.dims(output_shape.dimensionsCount() - 1) ==
+                                 state_shape.dims(state_shape.dimensionsCount() - 1));
+  if (status != Ok)
+    return status;
+
+  return status;
+}