[onert] Support float type I/O setting API on quantized model

This commit updates API implementation to support float type I/O setting. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
Samsung · Jul 12, 2024 · c13eff7 · c13eff7
1 parent e68b46e
commit c13eff7
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 25 deletions.
diff --git a/runtime/onert/api/nnfw/include/nnfw.h b/runtime/onert/api/nnfw/include/nnfw.h
@@ -255,6 +255,7 @@ NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
  * When it is called after calling {@link nnfw_prepare} or even after {@link nnfw_run}, this info
  * will be used when {@link nnfw_run}. And the shapes of the tensors are determined on the fly.
  * If this function is called many times for the same index, it is overwritten.
+ * tensor_info's dtype field is ignored.
  *
  * @param[in] session     Session to the input tensor info is to be set
  * @param[in] index       Index of input to be set (0-indexed)
@@ -331,6 +332,9 @@ NNFW_STATUS nnfw_await(nnfw_session *session);
  * reused for many inferences. \p length must be greater or equal than the operand requires. To
  * specify an optional input, you can either not call this for that input or call this with \p
  * buffer of NULL and \p length of 0.
+ * If you set {@link NNFW_TYPE_TENSOR_FLOAT32} type and model has quantized input type on given
+ * index, runtime will set quantized data type model input by converting from float buffer data
+ * internally.
  *
  * @param[in] session Session to the input is to be set
  * @param[in] index   Index of input to be set (0-indexed)
@@ -350,6 +354,9 @@ NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type
  * reused for many inferences. \p length must be greater or equal than the operand requires. An
  * output operand can have unspecified shape and deduced dynamically during the execution. You must
  * provide \p buffer large enough.
+ * If you set {@link NNFW_TYPE_TENSOR_FLOAT32} type and model has quantized output type on given
+ * index, runtime will set dequantized float buffer data from quantize data type model output
+ * internally.
  *
  * @param[in]   session Session from inference output is to be extracted
  * @param[in]   index   Index of output to be set (0-indexed)

diff --git a/runtime/onert/api/nnfw/src/nnfw_api_internal.cc b/runtime/onert/api/nnfw/src/nnfw_api_internal.cc
@@ -548,7 +548,7 @@ NNFW_STATUS nnfw_session::await()
   return NNFW_STATUS_NO_ERROR;
 }
 
-NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const void *buffer,
+NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE type, const void *buffer,
                                     size_t length)
 {
   if (!isStatePreparedOrFinishedRun())
@@ -567,6 +567,10 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
 
   try
   {
+    // Allow float input internal quantization only
+    if (type == NNFW_TYPE_TENSOR_FLOAT32)
+      _execution->setInputType(onert::ir::IOIndex(index),
+                               onert::ir::TypeInfo(onert::ir::DataType::FLOAT32));
     _execution->setInput(onert::ir::IOIndex(index), buffer, length);
   }
   catch (const std::exception &e)
@@ -577,8 +581,7 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
   return NNFW_STATUS_NO_ERROR;
 }
 
-NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *buffer,
-                                     size_t length)
+NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE type, void *buffer, size_t length)
 {
   if (!isStatePreparedOrFinishedRun())
   {
@@ -596,6 +599,10 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
 
   try
   {
+    // Allow float output internal dequantization only
+    if (type == NNFW_TYPE_TENSOR_FLOAT32)
+      _execution->setOutputType(onert::ir::IOIndex(index),
+                                onert::ir::TypeInfo(onert::ir::DataType::FLOAT32));
     _execution->setOutput(onert::ir::IOIndex(index), buffer, length);
   }
   catch (const std::exception &e)

diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
@@ -63,17 +63,8 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
 // TODO Remove default parameter
 void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length)
 {
-  // TODO handle when (!buffer && length != 0) : setting the input as an optional tensor
-
-  // check if size enough for input is passed
-  // if input_shape_sig is set, input_shape_sig overrides shape in info
-  // note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
+  // Length validation in execute(): datatype can be changed by API call
   auto &input_desc = _ctx.desc.inputs.at(index.value());
-  if (length < input_desc->info.total_size())
-  {
-    throw std::runtime_error{"Too small length"};
-  }
-
   input_desc->buffer = buffer;
   input_desc->size = length;
 }
@@ -87,13 +78,10 @@ void Execution::setInput(const ir::IOIndex &index, const ir::Shape &shape, const
 
 void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length)
 {
+  // Length validation in execute()
+  // - datatype can be changed by API call
+  // - shape can be changed by dynamic shape inference
   auto &output_desc = _ctx.desc.outputs.at(index.value());
-  // Check lenght when output shape is valid
-  if (!_ctx.shape_updated && length < output_desc->info.total_size())
-  {
-    throw std::runtime_error{"Too small length"};
-  }
-
   output_desc->buffer = buffer;
   output_desc->size = length;
 }
@@ -133,6 +121,23 @@ void Execution::execute()
 {
   VERBOSE(Execution) << "Start execution" << std::endl;
 
+  // Input length validation check
+  for (const auto &input : _ctx.desc.inputs)
+  {
+    if (input->info.total_size() > input->size)
+      throw std::runtime_error{"Too small input buffer length"};
+  }
+
+  // Output length validation check
+  if (!_ctx.shape_updated)
+  {
+    for (const auto &output : _ctx.desc.outputs)
+    {
+      if (output->info.total_size() > output->size)
+        throw std::runtime_error{"Too small output buffer length"};
+    }
+  }
+
   _executors->execute(_ctx);
   finished = true;
 

diff --git a/tests/nnfw_api/src/GenModelTests/ModelTestDynamicTensor.test.cc b/tests/nnfw_api/src/GenModelTests/ModelTestDynamicTensor.test.cc
@@ -778,8 +778,9 @@ TEST_F(CombinationTest1, neg_combination_of_set_input_tensorinfo_and_nnfw_run)
   cast_in_buf = {10};
   reshape_shape_in_buf = {1, 4};
   expected = {10, 11, 12, 13};
+  setInputOutput(session, cast_in_buf, reshape_shape_in_buf, actual);
   // This should throw an error
-  EXPECT_ANY_THROW(setInputOutput(session, cast_in_buf, reshape_shape_in_buf, actual));
+  EXPECT_EQ(nnfw_run(session), NNFW_STATUS_ERROR);
 
   NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
 }
@@ -989,7 +990,8 @@ TEST_F(CombinationTest2, neg_combination_set_input_tensorinfo_for_two_inputs)
   expected = {110};                               // wrong
   expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {1}}; // wrong
   actual.resize(1);                               // wrong
-  EXPECT_ANY_THROW(run_WITHOUT_set_input_tensorinfo(in0, in1, expected_ti, expected, actual));
+  setInputOutput(session, in0, in1, actual);
+  EXPECT_EQ(nnfw_run(session), NNFW_STATUS_ERROR);
 
   NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
 }
diff --git a/tests/nnfw_api/src/NNPackageTests/AddSessionPrepared.test.cc b/tests/nnfw_api/src/NNPackageTests/AddSessionPrepared.test.cc
@@ -136,8 +136,8 @@ TEST_F(ValidationTestAddSessionPrepared, neg_set_input_001)
 TEST_F(ValidationTestAddSessionPrepared, neg_set_input_002)
 {
   char input[1]; // buffer size is too small
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
-            NNFW_STATUS_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)));
+  EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
 }
 
 TEST_F(ValidationTestAddSessionPrepared, set_output_001)
@@ -155,8 +155,8 @@ TEST_F(ValidationTestAddSessionPrepared, neg_set_output_001)
 TEST_F(ValidationTestAddSessionPrepared, neg_set_output_002)
 {
   char input[1]; // buffer size is too small
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
-            NNFW_STATUS_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)));
+  EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
 }
 
 TEST_F(ValidationTestAddSessionPrepared, neg_get_input_size)