Skip to content

Commit

Permalink
[onert] Support float type I/O setting API on quantized model
Browse files Browse the repository at this point in the history
This commit updates API implementation to support float type I/O setting.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh committed Jul 12, 2024
1 parent e68b46e commit c13eff7
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 25 deletions.
7 changes: 7 additions & 0 deletions runtime/onert/api/nnfw/include/nnfw.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
* When it is called after calling {@link nnfw_prepare} or even after {@link nnfw_run}, this info
* will be used when {@link nnfw_run}. And the shapes of the tensors are determined on the fly.
* If this function is called many times for the same index, it is overwritten.
* tensor_info's dtype field is ignored.
*
* @param[in] session Session to the input tensor info is to be set
* @param[in] index Index of input to be set (0-indexed)
Expand Down Expand Up @@ -331,6 +332,9 @@ NNFW_STATUS nnfw_await(nnfw_session *session);
* reused for many inferences. \p length must be greater or equal than the operand requires. To
* specify an optional input, you can either not call this for that input or call this with \p
* buffer of NULL and \p length of 0.
* If you set {@link NNFW_TYPE_TENSOR_FLOAT32} type and model has quantized input type on given
* index, runtime will set quantized data type model input by converting from float buffer data
* internally.
*
* @param[in] session Session to the input is to be set
* @param[in] index Index of input to be set (0-indexed)
Expand All @@ -350,6 +354,9 @@ NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type
* reused for many inferences. \p length must be greater or equal than the operand requires. An
* output operand can have unspecified shape and deduced dynamically during the execution. You must
* provide \p buffer large enough.
* If you set {@link NNFW_TYPE_TENSOR_FLOAT32} type and model has quantized output type on given
* index, runtime will set dequantized float buffer data from quantize data type model output
* internally.
*
* @param[in] session Session from inference output is to be extracted
* @param[in] index Index of output to be set (0-indexed)
Expand Down
13 changes: 10 additions & 3 deletions runtime/onert/api/nnfw/src/nnfw_api_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ NNFW_STATUS nnfw_session::await()
return NNFW_STATUS_NO_ERROR;
}

NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const void *buffer,
NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE type, const void *buffer,
size_t length)
{
if (!isStatePreparedOrFinishedRun())
Expand All @@ -567,6 +567,10 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo

try
{
// Allow float input internal quantization only
if (type == NNFW_TYPE_TENSOR_FLOAT32)
_execution->setInputType(onert::ir::IOIndex(index),
onert::ir::TypeInfo(onert::ir::DataType::FLOAT32));
_execution->setInput(onert::ir::IOIndex(index), buffer, length);
}
catch (const std::exception &e)
Expand All @@ -577,8 +581,7 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
return NNFW_STATUS_NO_ERROR;
}

NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *buffer,
size_t length)
NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE type, void *buffer, size_t length)
{
if (!isStatePreparedOrFinishedRun())
{
Expand All @@ -596,6 +599,10 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b

try
{
// Allow float output internal dequantization only
if (type == NNFW_TYPE_TENSOR_FLOAT32)
_execution->setOutputType(onert::ir::IOIndex(index),
onert::ir::TypeInfo(onert::ir::DataType::FLOAT32));
_execution->setOutput(onert::ir::IOIndex(index), buffer, length);
}
catch (const std::exception &e)
Expand Down
37 changes: 21 additions & 16 deletions runtime/onert/core/src/exec/Execution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,8 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
// TODO Remove default parameter
void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length)
{
// TODO handle when (!buffer && length != 0) : setting the input as an optional tensor

// check if size enough for input is passed
// if input_shape_sig is set, input_shape_sig overrides shape in info
// note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
// Length validation in execute(): datatype can be changed by API call
auto &input_desc = _ctx.desc.inputs.at(index.value());
if (length < input_desc->info.total_size())
{
throw std::runtime_error{"Too small length"};
}

input_desc->buffer = buffer;
input_desc->size = length;
}
Expand All @@ -87,13 +78,10 @@ void Execution::setInput(const ir::IOIndex &index, const ir::Shape &shape, const

void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length)
{
// Length validation in execute()
// - datatype can be changed by API call
// - shape can be changed by dynamic shape inference
auto &output_desc = _ctx.desc.outputs.at(index.value());
// Check lenght when output shape is valid
if (!_ctx.shape_updated && length < output_desc->info.total_size())
{
throw std::runtime_error{"Too small length"};
}

output_desc->buffer = buffer;
output_desc->size = length;
}
Expand Down Expand Up @@ -133,6 +121,23 @@ void Execution::execute()
{
VERBOSE(Execution) << "Start execution" << std::endl;

// Input length validation check
for (const auto &input : _ctx.desc.inputs)
{
if (input->info.total_size() > input->size)
throw std::runtime_error{"Too small input buffer length"};
}

// Output length validation check
if (!_ctx.shape_updated)
{
for (const auto &output : _ctx.desc.outputs)
{
if (output->info.total_size() > output->size)
throw std::runtime_error{"Too small output buffer length"};
}
}

_executors->execute(_ctx);
finished = true;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -778,8 +778,9 @@ TEST_F(CombinationTest1, neg_combination_of_set_input_tensorinfo_and_nnfw_run)
cast_in_buf = {10};
reshape_shape_in_buf = {1, 4};
expected = {10, 11, 12, 13};
setInputOutput(session, cast_in_buf, reshape_shape_in_buf, actual);
// This should throw an error
EXPECT_ANY_THROW(setInputOutput(session, cast_in_buf, reshape_shape_in_buf, actual));
EXPECT_EQ(nnfw_run(session), NNFW_STATUS_ERROR);

NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
}
Expand Down Expand Up @@ -989,7 +990,8 @@ TEST_F(CombinationTest2, neg_combination_set_input_tensorinfo_for_two_inputs)
expected = {110}; // wrong
expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {1}}; // wrong
actual.resize(1); // wrong
EXPECT_ANY_THROW(run_WITHOUT_set_input_tensorinfo(in0, in1, expected_ti, expected, actual));
setInputOutput(session, in0, in1, actual);
EXPECT_EQ(nnfw_run(session), NNFW_STATUS_ERROR);

NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
}
8 changes: 4 additions & 4 deletions tests/nnfw_api/src/NNPackageTests/AddSessionPrepared.test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ TEST_F(ValidationTestAddSessionPrepared, neg_set_input_001)
TEST_F(ValidationTestAddSessionPrepared, neg_set_input_002)
{
char input[1]; // buffer size is too small
ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
NNFW_STATUS_ERROR);
NNFW_ENSURE_SUCCESS(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)));
EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
}

TEST_F(ValidationTestAddSessionPrepared, set_output_001)
Expand All @@ -155,8 +155,8 @@ TEST_F(ValidationTestAddSessionPrepared, neg_set_output_001)
TEST_F(ValidationTestAddSessionPrepared, neg_set_output_002)
{
char input[1]; // buffer size is too small
ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
NNFW_STATUS_ERROR);
NNFW_ENSURE_SUCCESS(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)));
EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
}

TEST_F(ValidationTestAddSessionPrepared, neg_get_input_size)
Expand Down

0 comments on commit c13eff7

Please sign in to comment.