Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert/odc] Auto-compilation. Add nnfw_run_with_auto_compilation method. #14412

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions runtime/onert/api/nnfw/include/nnfw_experimental.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,58 @@ NNFW_STATUS nnfw_set_codegen_model_path(nnfw_session *session, const char *path)
*/
NNFW_STATUS nnfw_codegen(nnfw_session *session, const char *target, NNFW_CODEGEN_PREF pref);

/**
* @brief Set MinMax records count in auto compilation mode with on-device compiler
*
* This function set MinMax records count for quantization in auto compilation mode.
* To enable automatic compilation mode, use {@link nnfw_run_with_auto_compilation}
*
* @param[in] session nnfw_session
* @param[in] minmax_records_count minmax records count
* @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
*/
NNFW_STATUS nnfw_set_odc_param_minmax_records_count(nnfw_session *session,
int minmax_records_count);

/**
* @brief Delete MinMax file for on-device compiler
*
* @param[in] session nnfw_session
* @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
*/
NNFW_STATUS nnfw_odc_delete_minmax_file(nnfw_session *session);

/**
* @brief Run inference with auto compilation
*
* <p>This function runs inference with automatic compilation and replaces
* the original model with a quantized or compiled model inside.
* During the inference the minmax statistics is collected and after that quantization is performed.
* If quantization was successful, try to code generating for target backend, otherwise run original
float model.
* If compilation was successful, run compiled model, otherwise run quantized model.
* On-device compiler (ODC) provides quantization and compilation functionality.
* Function should be called after model is loaded by {@link nnfw_load_model_from_file},
* session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
* by {@link nnfw_set_input} and {@link nnfw_set_output}.
*
* Additionally the following parameters should be set up :
* 1. Quantization type {@link nnfw_set_quantization_type }
* 2. Quantizated model path {@link nnfw_set_quantized_model_path }
* 3. Minmax records threshold for quantization {@link nnfw_set_odc_param_minmax_records_count }
* 3. File with minMax statistics can be removed by {@link nnfw_odc_delete_minmax_file}
* 4. Compiled model path {@link nnfw_set_codegen_model_path}
* </p>
*
* @param[in] session nnfw_session
* @param[in] target Target backend to generate code as in {@link nnfw_codegen}
* @param[in] pref @c NNFW_CODEGEN_PREF

* @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
*/
NNFW_STATUS nnfw_run_with_auto_compilation(nnfw_session *session, const char *target,
NNFW_CODEGEN_PREF pref);

//////////////////////////////////////////////
// APIs for configuration
//////////////////////////////////////////////
Expand Down
19 changes: 19 additions & 0 deletions runtime/onert/api/nnfw/src/nnfw_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,25 @@ NNFW_STATUS nnfw_codegen(nnfw_session *session, const char *target, NNFW_CODEGEN
return session->codegen(target, pref);
}

NNFW_STATUS nnfw_set_odc_param_minmax_records_count(nnfw_session *session, int minmax_records_count)
{
NNFW_RETURN_ERROR_IF_NULL(session);
return session->set_odc_param_minmax_records_count(minmax_records_count);
}

NNFW_STATUS nnfw_odc_delete_minmax_file(nnfw_session *session)
{
NNFW_RETURN_ERROR_IF_NULL(session);
return session->delete_odc_minmax_file();
}

NNFW_STATUS nnfw_run_with_auto_compilation(nnfw_session *session, const char *target,
NNFW_CODEGEN_PREF pref)
{
NNFW_RETURN_ERROR_IF_NULL(session);
return session->run_with_auto_compilation(target, pref);
}

// Configuration

NNFW_STATUS nnfw_set_prepare_config(nnfw_session *session, const NNFW_PREPARE_CONFIG key,
Expand Down
297 changes: 297 additions & 0 deletions runtime/onert/api/nnfw/src/nnfw_api_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2028,3 +2028,300 @@ NNFW_STATUS nnfw_session::reset_execute_config()

return NNFW_STATUS_NO_ERROR;
}

NNFW_STATUS nnfw_session::set_odc_param_minmax_records_count(int minmax_records_count)
{
if (isStateInitialized() || isStateRunning())
{
std::cerr << "invalid state" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

if (_quant_manager->setMinMaxRecordsThreshold(minmax_records_count))
return NNFW_STATUS_NO_ERROR;
else
return NNFW_STATUS_ERROR;
}

NNFW_STATUS nnfw_session::delete_odc_minmax_file()
{
if (isStateRunning())
{
std::cerr << "invalid state" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

if (_quant_manager->deleteMinMaxFile())
return NNFW_STATUS_NO_ERROR;
else
return NNFW_STATUS_ERROR;
}

// run with auto compilation
NNFW_STATUS nnfw_session::run_with_auto_compilation(const char *target, NNFW_CODEGEN_PREF pref)
{

if (!isStatePreparedOrFinishedRun())
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : "
<< "run should be after preparation" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

// Check quantization and code-generation parameters
std::string target_str{target};
if (_quant_manager->exportModelPath().empty() || _codegen_manager->exportModelPath().empty() ||
target_str.empty() || target_str.substr(target_str.size() - 4) != "-gen")
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : "
<< "quantization and code generation parameters should be set" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

// Odc: auto compilation with hidden switching mechanizm
// Check is model already quantized or compiled
std::ifstream file_quantized_model(_quant_manager->exportModelPath());
std::ifstream file_compiled_model(_codegen_manager->exportModelPath());

if (!file_quantized_model.good() && !file_compiled_model.good())
{
// Run float model and try to quantize it
{
// Save execution options
auto saved_options = _execution->executionOptions();
// turn on minmax recording
_execution->executionOptions().dump_minmax = true;

try
{
_execution->execute();
}
catch (const onert::InsufficientBufferSizeException &e)
{
// Currently insufficient buffer always means output buffer.
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
}
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_ERROR;
}

_state = State::FINISHED_RUN;

// restore min_max option to user defined state
_execution->executionOptions().dump_minmax = saved_options.dump_minmax;

// if enough statistics are collected, then run the quantization
if (_quant_manager->readyForQuantize())
{
try
{
if (isStateInitialized() || isStateRunning())
{
std::cerr << "invalid state" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

auto result = _quant_manager->quantize(_model_path);
if (!result)
return NNFW_STATUS_INVALID_STATE;

// remove minmax file
result = _quant_manager->deleteMinMaxFile();
if (!result)
return NNFW_STATUS_INVALID_STATE;
}
catch (const std::exception &e)
{
std::cerr
<< "Error during nnfw_session::run_with_auto_compilation in quantize operation: "
<< e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
}
}
}
else
{
// run compiled or quantized model
NNFW_STATUS status;

// turn off minmax recording
_execution->executionOptions().dump_minmax = false;

// save initial buffers if quantized model or compiled model is not loaded
if (_autoCompilationState == nnfw_session::AutoCompilationState::INITIAL_STATE)
{
auto dotidx = _codegen_manager->exportModelPath().rfind('.');
if (dotidx == std::string::npos)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : Invalid compiled "
"model path. Please use a "
"path that includes the extension."
<< std::endl;
return NNFW_STATUS_ERROR;
}

std::string compiled_model_type =
_codegen_manager->exportModelPath().substr(dotidx + 1); // + 1 to exclude dot

dotidx = _quant_manager->exportModelPath().rfind('.');
if (dotidx == std::string::npos)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : Invalid quantized "
"model path. Please use a "
"path that includes the extension."
<< std::endl;
return NNFW_STATUS_ERROR;
}
std::string quantized_model_type =
_quant_manager->exportModelPath().substr(dotidx + 1); // + 1 to exclude dot

// Save initial (float) input and output buffers
auto input_size = _compiler_artifact->_executors->inputSize();
auto output_size = _compiler_artifact->_executors->outputSize();

std::vector<const void *> _input_buffers;
std::vector<void *> _output_buffers;

// Save Inputs buffers
for (size_t input_index = 0; input_index < input_size; input_index++)
{
auto io_input_index = onert::ir::IOIndex(input_index);
auto input_Shape = _execution->getInputShape(io_input_index);
auto input_buffer = _execution->getInputBuffer(io_input_index);

_input_buffers.push_back(input_buffer);
}

// Save Outputs buffers
for (size_t output_index = 0; output_index < output_size; output_index++)
{
auto io_output_index = onert::ir::IOIndex(output_index);

auto output_Shape = _execution->getOutputShape(io_output_index);
auto output_buffer = _execution->getOutputBuffer(io_output_index);

_output_buffers.push_back(output_buffer);
}

// Save execution options
auto saved_options = _execution->executionOptions();

// if there is compiled model - try to load it
if (file_compiled_model.good())
{
// load compiled model
status = loadModelFile(_codegen_manager->exportModelPath(), compiled_model_type);
if (status == NNFW_STATUS_NO_ERROR)
{
_autoCompilationState = nnfw_session::AutoCompilationState::COMPILED_MODEL_LOADED;
}
}
else // there is no compiled model - try to compile and load it
{

// avoiding code duplication use existing "codegen" function. Set up _model_path for the
// codegen function.
// TODO: change it if codegen function will be generalized
_model_path = _quant_manager->exportModelPath();

// try to compile and load compiled model
status = codegen(target, pref);
if (status == NNFW_STATUS_NO_ERROR)
{
_autoCompilationState = nnfw_session::AutoCompilationState::COMPILED_MODEL_LOADED;
// TODO delete quantized model
}
}

// loading compiled model is fail - try to load quantized model
if (_autoCompilationState != nnfw_session::AutoCompilationState::COMPILED_MODEL_LOADED)
{
// load quantized model
status = loadModelFile(_quant_manager->exportModelPath(), quantized_model_type);
if (status != NNFW_STATUS_NO_ERROR)
return status;
else
_autoCompilationState = nnfw_session::AutoCompilationState::QUANTIZED_MODEL_LOADED;
}

status = prepare();
if (status != NNFW_STATUS_NO_ERROR)
return status;

// Restore execution options
_execution->executionOptions() = saved_options;

// Restore inputs to the quantized or compiled model
for (uint32_t input_index = 0; input_index < _input_buffers.size(); input_index++)
{
nnfw_tensorinfo ti;
status = input_tensorinfo(input_index, &ti);
if (status != NNFW_STATUS_NO_ERROR)
return status;

ti.dtype = NNFW_TYPE_TENSOR_FLOAT32;
auto input_size_in_bytes = getBufSize(&ti);

status = set_input(input_index, ti.dtype, _input_buffers[input_index], input_size_in_bytes);

if (status != NNFW_STATUS_NO_ERROR)
return status;
}

// Restore outputs to the quantized or compiled model
for (uint32_t output_index = 0; output_index < _output_buffers.size(); output_index++)
{

nnfw_tensorinfo ti;
status = output_tensorinfo(output_index, &ti);
if (status != NNFW_STATUS_NO_ERROR)
return status;

ti.dtype = NNFW_TYPE_TENSOR_FLOAT32;

uint64_t output_size_in_bytes = getBufSize(&ti);

status =
set_output(output_index, ti.dtype, _output_buffers[output_index], output_size_in_bytes);
if (status != NNFW_STATUS_NO_ERROR)
return status;
}
}

// Run quantized model
if (!isStatePreparedOrFinishedRun())
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : "
<< "run should be run after prepare" << std::endl;
return NNFW_STATUS_INVALID_STATE;
}

try
{
_execution->execute();
}
catch (const onert::InsufficientBufferSizeException &e)
{
// Currently insufficient buffer always means output buffer.
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
}
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what()
<< std::endl;
return NNFW_STATUS_ERROR;
}

_state = State::FINISHED_RUN;
}

return NNFW_STATUS_NO_ERROR;
}
Loading