From c2c1e09173b84df8e948cff28efb898aa5e8c5cc Mon Sep 17 00:00:00 2001 From: Grzegorz Kisala Date: Fri, 10 Jan 2025 17:41:24 +0100 Subject: [PATCH] [windows] Add experimental windows build This PR adds experimental windows build **Self evaluation:** 1. Build test: [ ]Passed [ ]Failed [X]Skipped 2. Run test: [ ]Passed [ ]Failed [X]Skipped Signed-off-by: Grzegorz Kisala --- Applications/MNIST/jni/main.cpp | 1 + Applications/MNIST/jni/meson.build | 3 +- Applications/ProductRatings/jni/main.cpp | 5 +- Applications/ProductRatings/jni/meson.build | 3 +- Applications/meson.build | 55 ++--- Applications/utils/jni/bitmap_helpers.cpp | 2 +- api/capi/include/nntrainer_internal.h | 10 +- api/ccapi/include/tensor_dim.h | 2 +- meson.build | 218 ++++++++++-------- meson_options.txt | 14 +- nntrainer-disables.ini | 16 ++ nntrainer/app_context.cpp | 77 +++++-- nntrainer/cl_context.cpp | 3 +- nntrainer/compiler/remap_realizer.h | 1 + nntrainer/compiler/tflite_interpreter.cpp | 4 +- nntrainer/dataset/databuffer.cpp | 1 + nntrainer/dataset/dir_data_producers.cpp | 10 +- nntrainer/layers/acti_func.h | 3 + nntrainer/layers/cl_layers/reshape_cl.cpp | 2 + nntrainer/layers/cl_layers/swiglu_cl.cpp | 2 + nntrainer/layers/common_properties.h | 2 +- nntrainer/layers/embedding.cpp | 10 +- nntrainer/layers/layer_node.cpp | 3 +- nntrainer/layers/preprocess_l2norm_layer.cpp | 2 +- nntrainer/meson.build | 25 +- .../models/dynamic_training_optimization.cpp | 1 + nntrainer/models/model_loader.cpp | 6 +- nntrainer/nntrainer_log.h | 16 +- nntrainer/nntrainer_logger.cpp | 2 +- nntrainer/opencl/opencl_loader.cpp | 194 +++++++++------- nntrainer/opencl/opencl_program.cpp | 28 ++- nntrainer/optimizers/lr_scheduler_cosine.cpp | 3 +- nntrainer/tensor/blas_avx.cpp | 2 + .../cl_operations/attention_kernels_fp16.cpp | 5 + .../cl_operations/blas_kernels_fp16.cpp | 5 + nntrainer/tensor/float_tensor.cpp | 3 +- nntrainer/tensor/half_tensor.cpp | 6 +- nntrainer/tensor/half_tensor.h | 4 + nntrainer/tensor/manager.cpp | 54 ++--- nntrainer/tensor/quantizer.cpp | 85 ++++++- nntrainer/tensor/quantizer.h | 2 +- nntrainer/tensor/swap_device.cpp | 26 +-- nntrainer/tensor/swap_device.h | 10 +- nntrainer/tensor/task_executor.h | 1 - nntrainer/tensor/tensor.cpp | 1 + nntrainer/tensor/tensor.h | 2 +- nntrainer/tensor/tensor_base.cpp | 2 +- nntrainer/tensor/tensor_base.h | 2 +- nntrainer/tensor/tensor_dim.cpp | 2 +- nntrainer/tensor/weight.h | 4 +- nntrainer/utils/ini_wrapper.cpp | 6 +- nntrainer/utils/tracer.cpp | 2 +- nntrainer/utils/util_func.h | 4 +- test/ccapi/unittest_ccapi.cpp | 6 +- test/meson.build | 4 +- test/nntrainer_test_util.cpp | 1 + .../compiler/unittest_interpreter.cpp | 6 +- .../compiler/unittest_tflite_export.cpp | 20 +- test/unittest/meson.build | 8 +- test/unittest/models/models_golden_test.cpp | 12 +- test/unittest/unittest_nntrainer_models.cpp | 2 +- .../unittest/unittest_nntrainer_quantizer.cpp | 157 +++++++++++++ third_party/googletest.wrap | 4 + third_party/iniparser.wrap | 4 + 64 files changed, 772 insertions(+), 404 deletions(-) create mode 100644 nntrainer-disables.ini create mode 100644 test/unittest/unittest_nntrainer_quantizer.cpp create mode 100644 third_party/googletest.wrap create mode 100644 third_party/iniparser.wrap diff --git a/Applications/MNIST/jni/main.cpp b/Applications/MNIST/jni/main.cpp index 26b734413f..99736d65ac 100644 --- a/Applications/MNIST/jni/main.cpp +++ b/Applications/MNIST/jni/main.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/Applications/MNIST/jni/meson.build b/Applications/MNIST/jni/meson.build index 3c2d93ec4a..dc808040b3 100644 --- a/Applications/MNIST/jni/meson.build +++ b/Applications/MNIST/jni/meson.build @@ -1,7 +1,8 @@ res_path = meson.current_source_dir() / '..' / 'res' nntr_mnist_resdir = nntr_app_resdir / 'MNIST' -run_command('cp', '-lr', res_path, nntr_mnist_resdir) +#run_command('cp', '-lr', res_path, nntr_mnist_resdir) +run_command('cmd.exe', 'xcopy', res_path, nntr_mnist_resdir, '/E/H') mnist_sources = [ 'main.cpp' diff --git a/Applications/ProductRatings/jni/main.cpp b/Applications/ProductRatings/jni/main.cpp index 029608889a..6cab0e9796 100644 --- a/Applications/ProductRatings/jni/main.cpp +++ b/Applications/ProductRatings/jni/main.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -81,8 +82,8 @@ bool getData(std::ifstream &F, float *input, float *label, unsigned int id) { return false; std::istringstream buffer(temp); - uint *input_int = (uint *)input; - uint x; + unsigned int *input_int = (unsigned int *)input; + unsigned int x; for (unsigned int j = 0; j < feature_size; ++j) { buffer >> x; input_int[j] = x; diff --git a/Applications/ProductRatings/jni/meson.build b/Applications/ProductRatings/jni/meson.build index 91fc573fe6..5d564cf009 100644 --- a/Applications/ProductRatings/jni/meson.build +++ b/Applications/ProductRatings/jni/meson.build @@ -1,5 +1,6 @@ res_path = meson.current_source_dir() / '..' / 'res' -run_command('cp', '-lr', res_path, nntr_app_resdir / 'ProductRatings') +#run_command('cp', '-lr', res_path, nntr_app_resdir / 'ProductRatings') +run_command('cmd.exe', 'xcopy', res_path, nntr_app_resdir / 'ProductRatings', '/E/H') e = executable('nntrainer_product_ratings', 'main.cpp', diff --git a/Applications/meson.build b/Applications/meson.build index 843347ceb5..6878e6e888 100644 --- a/Applications/meson.build +++ b/Applications/meson.build @@ -1,33 +1,34 @@ nntr_app_resdir = nntrainer_resdir / 'app' -run_command('mkdir', '-p', nntr_app_resdir) +#run_command('mkdir', '-p', nntr_app_resdir) +run_command('cmd.exe', '/C', 'mkdir', nntr_app_resdir) subdir('utils') -subdir('KNN/jni') -subdir('LogisticRegression/jni') -if enable_ccapi - subdir('MNIST/jni') - if get_option('enable-fp16') - subdir('MixedPrecision/jni') - endif -endif -subdir('VGG/jni') -subdir('Resnet/jni') -subdir('YOLOv2/jni') -subdir('YOLOv3/jni') -subdir('LLaMA/jni') -subdir('Multi_input/jni') -subdir('ReinforcementLearning/DeepQ/jni') -subdir('TransferLearning/CIFAR_Classification/jni') -# if enable_capi -# subdir('TransferLearning/Draw_Classification/jni') +# subdir('KNN/jni') +# subdir('LogisticRegression/jni') + if enable_ccapi + subdir('MNIST/jni') +# if get_option('enable-fp16') +# subdir('MixedPrecision/jni') + endif # endif -subdir('Custom') +# subdir('VGG/jni') +# subdir('Resnet/jni') +# subdir('YOLOv2/jni') +# subdir('YOLOv3/jni') +# subdir('LLaMA/jni') +# subdir('Multi_input/jni') +# subdir('ReinforcementLearning/DeepQ/jni') +# subdir('TransferLearning/CIFAR_Classification/jni') +# # if enable_capi +# # subdir('TransferLearning/Draw_Classification/jni') +# # endif +# subdir('Custom') subdir('ProductRatings/jni') -subdir('AlexNet/jni') -subdir('Layers/jni') -if get_option('enable-tflite-backbone') - subdir('SimpleShot') -endif -subdir('PicoGPT/jni') +# subdir('AlexNet/jni') +# subdir('Layers/jni') +# if get_option('enable-tflite-backbone') +# subdir('SimpleShot') +# endif +# subdir('PicoGPT/jni') -subdir('SimpleFC/jni') +# subdir('SimpleFC/jni') diff --git a/Applications/utils/jni/bitmap_helpers.cpp b/Applications/utils/jni/bitmap_helpers.cpp index 0fc64acd50..0e1f28aa2b 100644 --- a/Applications/utils/jni/bitmap_helpers.cpp +++ b/Applications/utils/jni/bitmap_helpers.cpp @@ -23,7 +23,7 @@ limitations under the License. #include #include -#include // NOLINT(build/include_order) +//#include // NOLINT(build/include_order) #include "bitmap_helpers.h" diff --git a/api/capi/include/nntrainer_internal.h b/api/capi/include/nntrainer_internal.h index c4c86389f8..e8f65d6468 100644 --- a/api/capi/include/nntrainer_internal.h +++ b/api/capi/include/nntrainer_internal.h @@ -109,7 +109,7 @@ extern "C" { * @note model mutex must be locked before layer lock, if model lock is needed */ typedef struct { - uint magic; /**< magic number */ + unsigned int magic; /**< magic number */ std::shared_ptr layer; /**< layer object */ bool in_use; /**< in_use flag */ std::mutex m; /**< mutex for the optimizer */ @@ -121,7 +121,7 @@ typedef struct { * optimizer lock is needed */ typedef struct { - uint magic; + unsigned int magic; std::shared_ptr lr_scheduler; bool in_use; std::mutex m; @@ -133,7 +133,7 @@ typedef struct { * needed */ typedef struct { - uint magic; + unsigned int magic; std::shared_ptr optimizer; ml_train_lr_scheduler *lr_scheduler; bool in_use; @@ -146,7 +146,7 @@ typedef struct { * @note model mutex must be locked before dataset lock, if model lock is needed */ typedef struct { - uint magic; /**< magic number */ + unsigned int magic; /**< magic number */ std::array, 3> dataset; /**< dataset object */ bool in_use; /**< in_use flag */ @@ -158,7 +158,7 @@ typedef struct { * @since_tizen 6.0 */ typedef struct { - uint magic; /**< magic number */ + unsigned int magic; /**< magic number */ std::shared_ptr model; /**< model object */ std::unordered_map layers_map; /**< layers map */ diff --git a/api/ccapi/include/tensor_dim.h b/api/ccapi/include/tensor_dim.h index 7dbc5cccb4..d99b58bdb5 100644 --- a/api/ccapi/include/tensor_dim.h +++ b/api/ccapi/include/tensor_dim.h @@ -278,7 +278,7 @@ class TensorDim { /** * @brief get data type size */ - uint getDataTypeSize() const; + unsigned int getDataTypeSize() const; /** * @brief Set the Dim Flag to retrieve effective dimension diff --git a/meson.build b/meson.build index 6550087cc9..b52349b795 100644 --- a/meson.build +++ b/meson.build @@ -3,14 +3,36 @@ project('nntrainer', 'c', 'cpp', license: ['apache-2.0'], meson_version: '>=0.50.0', default_options: [ - 'werror=true', + 'werror=false', 'warning_level=1', - 'c_std=gnu89', - 'cpp_std=c++17', + 'c_std=c17', + 'cpp_std=c++20', 'buildtype=release' - ] + ], + subproject_dir: 'third_party' ) +cmake = import('cmake') + +add_project_arguments(['/MT'], language: ['c','cpp']) + +# googletest +googletest_options = cmake.subproject_options() +googletest_options.append_compile_args(['c','cpp'], '/MT') +googletest_options.add_cmake_defines({'BUILD_SHARED_LIBS': false}) +googletest_subproject = cmake.subproject('googletest', options: googletest_options, required: true) +gmock_dep = googletest_subproject.dependency('gmock') +gtest_dep = googletest_subproject.dependency('gtest') +gtest_main_dep = googletest_subproject.dependency('gtest_main') + +# iniparser +iniparser_options = cmake.subproject_options() +iniparser_options.append_compile_args(['c','cpp'], '/MT') +iniparser_options.add_cmake_defines({'BUILD_DOCS': false}) +iniparser_options.add_cmake_defines({'BUILD_SHARED_LIBS': false}) +iniparser_subproject = cmake.subproject('iniparser', options: iniparser_options, required: true) +iniparser_dep = iniparser_subproject.dependency('iniparser-static') + # Set version info nntrainer_version = meson.project_version() nntrainer_version_split = nntrainer_version.split('.') @@ -40,33 +62,33 @@ if get_option('enable_encoder') add_project_arguments('-DENABLE_ENCODER=1', language: ['c', 'cpp']) endif -warning_flags = [ - '-Wredundant-decls', - '-Wwrite-strings', - '-Wformat', - '-Wformat-nonliteral', - '-Wformat-security', - '-Winit-self', - '-Waddress', - '-Wvla', - '-Wpointer-arith', - '-Wno-error=varargs', - '-Wdefaulted-function-deleted', - '-ftree-vectorize', - '-Wno-maybe-uninitialized', - '-Wno-unused-variable' -] - -warning_c_flags = [ - '-Wmissing-declarations', - '-Wmissing-include-dirs', - '-Wmissing-prototypes', - '-Wnested-externs', - '-Waggregate-return', - '-Wold-style-definition', - '-Wdeclaration-after-statement', - '-Wno-error=varargs' -] +# warning_flags = [ +# '-Wredundant-decls', +# '-Wwrite-strings', +# '-Wformat', +# '-Wformat-nonliteral', +# '-Wformat-security', +# '-Winit-self', +# '-Waddress', +# '-Wvla', +# '-Wpointer-arith', +# '-Wno-error=varargs', +# '-Wdefaulted-function-deleted', +# '-ftree-vectorize', +# '-Wno-maybe-uninitialized', +# '-Wno-unused-variable' +# ] + +# warning_c_flags = [ +# '-Wmissing-declarations', +# '-Wmissing-include-dirs', +# '-Wmissing-prototypes', +# '-Wnested-externs', +# '-Waggregate-return', +# '-Wold-style-definition', +# '-Wdeclaration-after-statement', +# '-Wno-error=varargs' +# ] arch = host_machine.cpu_family() @@ -169,20 +191,20 @@ if get_option('enable-biqgemm') endif endif # end of enable-biqgemm -foreach extra_arg : warning_flags - if cc.has_argument (extra_arg) - add_project_arguments([extra_arg], language: 'c') - endif - if cxx.has_argument (extra_arg) - add_project_arguments([extra_arg], language: 'cpp') - endif -endforeach - -foreach extra_arg : warning_c_flags - if cc.has_argument (extra_arg) - add_project_arguments([extra_arg], language: 'c') - endif -endforeach +# foreach extra_arg : warning_flags +# if cc.has_argument (extra_arg) +# add_project_arguments([extra_arg], language: 'c') +# endif +# if cxx.has_argument (extra_arg) +# add_project_arguments([extra_arg], language: 'cpp') +# endif +# endforeach + +# foreach extra_arg : warning_c_flags +# if cc.has_argument (extra_arg) +# add_project_arguments([extra_arg], language: 'c') +# endif +# endforeach # Set install path nntrainer_prefix = get_option('prefix') @@ -217,7 +239,8 @@ endif # handle resources nntrainer_resdir = meson.build_root() / 'res' -run_command('mkdir', '-p', nntrainer_resdir) +#run_command('mkdir', '-p', nntrainer_resdir) +#run_command('md', nntrainer_resdir) if get_option('install-app') # add a script to install resources from installs to application_install_dir @@ -325,16 +348,16 @@ if get_option('enable-logging') extra_defines += '-D__LOGGING__=1' endif -gmock_dep = dependency('gmock', static: true, main: false, required: false) -gtest_dep = dependency('gtest', static: true, main: false, required: false) -gtest_main_dep = dependency('gtest', static: true, main: true, required: false) +# gmock_dep = dependency('gmock', static: true, main: false, required: false) +# gtest_dep = dependency('gtest', static: true, main: false, required: false) +# gtest_main_dep = dependency('gtest', static: true, main: true, required: false) benchmark_dep = dependency('benchmark', static : true, main : false, required : false) if get_option('enable-test') # and get_option('platform') != 'android' extra_defines += '-DENABLE_TEST=1' - if gtest_dep.version().version_compare('<1.10.0') - extra_defines += '-DGTEST_BACKPORT=1' - endif + # if gtest_dep.version().version_compare('<1.10.0') + # extra_defines += '-DGTEST_BACKPORT=1' + # endif test_timeout = get_option('test-timeout') endif @@ -342,43 +365,50 @@ if get_option('reduce-tolerance') extra_defines += '-DREDUCE_TOLERANCE=1' endif -libm_dep = cxx.find_library('m') # cmath library -libdl_dep = cxx.find_library('dl') # DL library +libm_dep = cxx.find_library('m', required : false) # cmath library +libdl_dep = cxx.find_library('dl', required : false) # DL library thread_dep = dependency('threads') # pthread for tensorflow-lite -iniparser_dep = dependency('iniparser', required : false, version : '>=4.1') # iniparser -if get_option('platform') == 'android' - message('preparing iniparser') - run_command(meson.source_root() / 'jni' / 'prepare_iniparser.sh', meson.build_root(), check: true) - iniparser_root = meson.build_root() / 'iniparser' - iniparser_dep = declare_dependency(include_directories: [ 'iniparser/src' ]) -endif - -if not iniparser_dep.found() - message('falling back to find libiniparser library and header files') - libiniparser_dep = cxx.find_library('iniparser') - sysroot = run_command( - cxx.cmd_array() + ['-print-sysroot'] - ).stdout().split('\n')[0] - - if sysroot.startswith('/') - sysroot_inc_cflags_template = '-I@0@/usr/include@1@' - sysroot_inc = sysroot_inc_cflags_template.format(sysroot, '') - add_project_arguments(sysroot_inc, language: ['c', 'cpp']) - sysroot_inc_cflags_iniparser = sysroot_inc_cflags_template.format(sysroot, - '/iniparser') - else - sysroot_inc_cflags_iniparser = '-I/usr/include/iniparser' - endif - - if libiniparser_dep.found() and cxx.has_header('iniparser.h', \ - args : sysroot_inc_cflags_iniparser) - iniparser_dep = declare_dependency (dependencies : libiniparser_dep, - compile_args : sysroot_inc_cflags_iniparser) - else - error('Failed to resolve dependency on iniparser') - endif -endif +# if host_machine.system() == 'windows' +# iniparser_inc = meson.project_source_root()/'third_party'/'iniparser'/'src' +# message(iniparser_inc) +# _iniparser_dep = declare_dependency(link_with: proj_iniparser.target('iniparser-static'), version: '4.2.4', compile_args: f'-I@iniparser_inc@') +# meson.override_dependency('iniparser', _iniparser_dep) +# endif + +# iniparser_dep = dependency('iniparser', required : false, version : '>=4.1') # iniparser +# if get_option('platform') == 'android' +# message('preparing iniparser') +# run_command(meson.source_root() / 'jni' / 'prepare_iniparser.sh', meson.build_root(), check: true) +# iniparser_root = meson.build_root() / 'iniparser' +# iniparser_dep = declare_dependency(include_directories: [ 'iniparser/src' ]) +# endif + +# if not iniparser_dep.found() +# message('falling back to find libiniparser library and header files') +# libiniparser_dep = cxx.find_library('iniparser') +# sysroot = run_command( +# cxx.cmd_array() + ['-print-sysroot'] +# ).stdout().split('\n')[0] + +# if sysroot.startswith('/') +# sysroot_inc_cflags_template = '-I@0@/usr/include@1@' +# sysroot_inc = sysroot_inc_cflags_template.format(sysroot, '') +# add_project_arguments(sysroot_inc, language: ['c', 'cpp']) +# sysroot_inc_cflags_iniparser = sysroot_inc_cflags_template.format(sysroot, +# '/iniparser') +# else +# sysroot_inc_cflags_iniparser = '-I/usr/include/iniparser' +# endif + +# if libiniparser_dep.found() and cxx.has_header('iniparser.h', \ +# args : sysroot_inc_cflags_iniparser) +# iniparser_dep = declare_dependency (dependencies : libiniparser_dep, +# compile_args : sysroot_inc_cflags_iniparser) +# else +# error('Failed to resolve dependency on iniparser') +# endif +# endif if get_option('platform') == 'android' message('preparing ml api') @@ -484,16 +514,17 @@ if get_option('enable-app') if get_option('platform') == 'android' warning('android app is not supported for now, building app skipped') else - # this is needed for reinforcement application. We can move this to reinforecement app dependency - jsoncpp_dep = dependency('jsoncpp') # jsoncpp - libcurl_dep = dependency('libcurl') - if not tflite_dep.found() - error('Tensorflow-Lite dependency not found') - endif + # # this is needed for reinforcement application. We can move this to reinforecement app dependency + # jsoncpp_dep = dependency('jsoncpp') # jsoncpp + # libcurl_dep = dependency('libcurl') + # if not tflite_dep.found() + # error('Tensorflow-Lite dependency not found') + # endif subdir('Applications') endif endif +if false if get_option('platform') != 'android' nnstreamer_dep = dependency('nnstreamer') message('building nnstreamer') @@ -501,6 +532,7 @@ if get_option('platform') != 'android' else warning('android nnstreamer-filter and nnstreamer-trainer are not yet supported, building them is skipped') endif +endif if get_option('platform') == 'android' subdir('jni') diff --git a/meson_options.txt b/meson_options.txt index ee3119b4e8..b3ed4c34fa 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,18 +1,18 @@ option('platform', type: 'combo', choices: ['none', 'tizen', 'yocto', 'android'], value: 'none') option('enable-app', type: 'boolean', value: true) -option('install-app', type: 'boolean', value: true) +option('install-app', type: 'boolean', value: false) option('use_gym', type: 'boolean', value: false) option('enable-capi', type: 'feature', value: 'auto') option('enable-ccapi', type: 'boolean', value: true) -option('enable-test', type: 'boolean', value: true) +option('enable-test', type: 'boolean', value: false) option('enable-logging', type: 'boolean', value: true) option('enable-tizen-feature-check', type: 'boolean', value: true) option('enable-nnstreamer-backbone', type: 'boolean', value: false) -option('enable-tflite-backbone', type: 'boolean', value: true) +option('enable-tflite-backbone', type: 'boolean', value: false) option('enable-profile', type: 'boolean', value: false) option('enable-trace', type: 'boolean', value: false) option('enable-debug', type: 'boolean', value: false) -option('enable-tflite-interpreter', type: 'boolean', value: true) +option('enable-tflite-interpreter', type: 'boolean', value: false) option('enable-memory-swap', type: 'boolean', value: false) option('memory-swap-path', type: 'string', value: '') option('test-timeout', type: 'integer', value: 60) @@ -36,12 +36,12 @@ option('reduce-tolerance', type: 'boolean', value: true) option('enable-long-test', type: 'boolean', value: false) # backend options -option('enable-blas', type: 'boolean', value: true) +option('enable-blas', type: 'boolean', value: false) option('enable-fp16', type: 'boolean', value: false) option('enable-cublas', type: 'boolean', value: false) option('enable-openmp', type: 'boolean', value: true) option('enable-neon', type: 'boolean', value: false) -option('enable-avx', type: 'boolean', value: true) +option('enable-avx', type: 'boolean', value: false) option('enable-opencl', type: 'boolean', value: false) option('enable-biqgemm', type: 'boolean', value: false) option('biqgemm-path', type: 'string', value: '../BiQGEMM') @@ -50,7 +50,7 @@ option('enable-benchmarks', type: 'boolean', value : false) # ml-api dependency (to enable, install capi-inference from github.com/nnstreamer/api ) # To inter-operate with nnstreamer and ML-API packages, you need to enable this. # If this is disabled, related options (capi-ml-*) are ignored. -option('ml-api-support', type: 'feature', value: 'auto') +option('ml-api-support', type: 'feature', value: 'disabled') # @todo : make them use 'feature' and depend on ml-api-support option('enable-nnstreamer-tensor-filter', type: 'feature', value: 'auto') option('enable-nnstreamer-tensor-trainer', type: 'feature', value: 'auto') diff --git a/nntrainer-disables.ini b/nntrainer-disables.ini new file mode 100644 index 0000000000..c2e56f2824 --- /dev/null +++ b/nntrainer-disables.ini @@ -0,0 +1,16 @@ +[project options] + +enable-blas=false +enable-tflite-backbone=false +enable-memory-swap=false +enable-nnstreamer-backbone=false +enable-tflite-interpreter=false +enable-app=false +ml-api-support='disabled' +enable-test=true +install-app=false +enable-avx=false + +[built-in options] +werror=false + diff --git a/nntrainer/app_context.cpp b/nntrainer/app_context.cpp index 61890fc9d8..1a9d1b8c0f 100644 --- a/nntrainer/app_context.cpp +++ b/nntrainer/app_context.cpp @@ -11,8 +11,13 @@ * @bug No known bugs except for NYI items * */ -#include +//#include +#ifdef _WIN32 +#include "windows.h" +#else #include +#endif +#include #include #include #include @@ -224,7 +229,7 @@ std::mutex factory_mutex; * @brief finalize global context * */ -static void fini_global_context_nntrainer(void) __attribute__((destructor)); +static void fini_global_context_nntrainer(void); static void fini_global_context_nntrainer(void) {} @@ -418,14 +423,11 @@ AppContext &AppContext::Global() { } void AppContext::setWorkingDirectory(const std::string &base) { - DIR *dir = opendir(base.c_str()); - - if (!dir) { + if (!std::filesystem::is_directory(base)) { std::stringstream ss; ss << func_tag << "path is not directory or has no permission: " << base; throw std::invalid_argument(ss.str().c_str()); } - closedir(dir); char *ret = getRealpath(base.c_str(), nullptr); @@ -495,6 +497,21 @@ int AppContext::registerLayer(const std::string &library_path, const std::string &base_path) { const std::string full_path = getFullPath(library_path, base_path); +#if defined(_WIN32) + HMODULE handle = LoadLibraryA(full_path.c_str()); + + NNTR_THROW_IF(handle == nullptr, std::invalid_argument) + << func_tag << "open plugin failed"; + + nntrainer::LayerPluggable *pluggable = + reinterpret_cast( + GetProcAddress((HMODULE)handle, "ml_train_layer_pluggable")); + + auto close_dl = [handle] { FreeLibrary((HINSTANCE)handle); }; + + NNTR_THROW_IF_CLEANUP(pluggable == nullptr, std::invalid_argument, close_dl) + << func_tag << "loading symbol failed"; +#else void *handle = dlopen(full_path.c_str(), RTLD_LAZY | RTLD_LOCAL); const char *error_msg = dlerror(); @@ -505,11 +522,14 @@ int AppContext::registerLayer(const std::string &library_path, reinterpret_cast( dlsym(handle, "ml_train_layer_pluggable")); - error_msg = dlerror(); auto close_dl = [handle] { dlclose(handle); }; + + error_msg = dlerror(); + NNTR_THROW_IF_CLEANUP(error_msg != nullptr || pluggable == nullptr, std::invalid_argument, close_dl) << func_tag << "loading symbol failed, reason: " << error_msg; +#endif auto layer = pluggable->createfunc(); NNTR_THROW_IF_CLEANUP(layer == nullptr, std::invalid_argument, close_dl) @@ -534,6 +554,21 @@ int AppContext::registerOptimizer(const std::string &library_path, const std::string &base_path) { const std::string full_path = getFullPath(library_path, base_path); +#if defined(_WIN32) + HMODULE handle = LoadLibraryA(full_path.c_str()); + + NNTR_THROW_IF(handle == nullptr, std::invalid_argument) + << func_tag << "open plugin failed"; + + nntrainer::OptimizerPluggable *pluggable = + reinterpret_cast( + GetProcAddress((HMODULE)handle, "ml_train_optimizer_pluggable")); + + auto close_dl = [handle] { FreeLibrary((HINSTANCE)handle); }; + + NNTR_THROW_IF_CLEANUP(pluggable == nullptr, std::invalid_argument, close_dl) + << func_tag << "loading symbol failed"; +#else void *handle = dlopen(full_path.c_str(), RTLD_LAZY | RTLD_LOCAL); const char *error_msg = dlerror(); @@ -549,6 +584,7 @@ int AppContext::registerOptimizer(const std::string &library_path, NNTR_THROW_IF_CLEANUP(error_msg != nullptr || pluggable == nullptr, std::invalid_argument, close_dl) << func_tag << "loading symbol failed, reason: " << error_msg; +#endif auto optimizer = pluggable->createfunc(); NNTR_THROW_IF_CLEANUP(optimizer == nullptr, std::invalid_argument, close_dl) @@ -571,40 +607,37 @@ int AppContext::registerOptimizer(const std::string &library_path, std::vector AppContext::registerPluggableFromDirectory(const std::string &base_path) { - DIR *dir = opendir(base_path.c_str()); - NNTR_THROW_IF(dir == nullptr, std::invalid_argument) - << func_tag << "failed to open the directory: " << base_path; + bool directory_exist = std::filesystem::is_directory(base_path); - struct dirent *entry; + NNTR_THROW_IF(!directory_exist, std::invalid_argument) + << func_tag << "failed to open the directory: " << base_path; std::vector keys; - while ((entry = readdir(dir)) != NULL) { - if (endswith(entry->d_name, solib_suffix)) { - if (endswith(entry->d_name, layerlib_suffix)) { + for (auto &entry : std::filesystem::directory_iterator(base_path)) { + std::cout << entry.path() << std::endl; + std::string entry_name = entry.path().string(); + + if (endswith(entry_name, solib_suffix)) { + if (endswith(entry_name, layerlib_suffix)) { try { - int key = registerLayer(entry->d_name, base_path); + int key = registerLayer(entry_name, base_path); keys.emplace_back(key); } catch (std::exception &e) { - closedir(dir); throw; } - } else if (endswith(entry->d_name, optimizerlib_suffix)) { + } else if (endswith(entry_name, optimizerlib_suffix)) { try { - int key = registerOptimizer(entry->d_name, base_path); + int key = registerOptimizer(entry_name, base_path); keys.emplace_back(key); } catch (std::exception &e) { - closedir(dir); throw; } } } } - if (dir != NULL) - closedir(dir); - return keys; } diff --git a/nntrainer/cl_context.cpp b/nntrainer/cl_context.cpp index f09116091a..4d9ecbb1da 100644 --- a/nntrainer/cl_context.cpp +++ b/nntrainer/cl_context.cpp @@ -227,7 +227,7 @@ bool ClContext::clCreateKernel(std::string &kernel_string, size_t binary_size = fs.tellg(); fs.seekg(0, std::ios::beg); - unsigned char chunk[binary_size]; + unsigned char *chunk = new unsigned char[binary_size]; fs.read((char *)chunk, binary_size); result = program.CreateCLProgramWithBinary( @@ -236,6 +236,7 @@ bool ClContext::clCreateKernel(std::string &kernel_string, opencl::Program::DEFAULT_KERNEL_PATH + "/" + kernel_name + "_kernel.bin", ""); + delete[] chunk; } else { result = program.CreateCLProgram(context_inst_.GetContext(), diff --git a/nntrainer/compiler/remap_realizer.h b/nntrainer/compiler/remap_realizer.h index 76e26a0e91..a35e2af684 100644 --- a/nntrainer/compiler/remap_realizer.h +++ b/nntrainer/compiler/remap_realizer.h @@ -14,6 +14,7 @@ #include #include +#include #include #include diff --git a/nntrainer/compiler/tflite_interpreter.cpp b/nntrainer/compiler/tflite_interpreter.cpp index 81132b0cfe..5036131f09 100644 --- a/nntrainer/compiler/tflite_interpreter.cpp +++ b/nntrainer/compiler/tflite_interpreter.cpp @@ -57,11 +57,9 @@ void builder2file(const flatbuffers::FlatBufferBuilder &builder, NNTR_THROW_IF(!tflite::VerifyModelBuffer(v), std::invalid_argument) << FUNC_TAG << "Verifying serialized model failed"; std::ofstream os(out, std::ios_base::binary); - const size_t error_buflen = 100; - char error_buf[error_buflen]; NNTR_THROW_IF(!os.good(), std::invalid_argument) << FUNC_TAG - << "failed to open, reason: " << strerror_r(errno, error_buf, error_buflen); + << "failed to open, reason: " << strerror(errno); std::streamsize sz = static_cast(builder.GetSize()); NNTR_THROW_IF(sz < 0, std::invalid_argument) diff --git a/nntrainer/dataset/databuffer.cpp b/nntrainer/dataset/databuffer.cpp index f58ce05e98..fb7d60972e 100644 --- a/nntrainer/dataset/databuffer.cpp +++ b/nntrainer/dataset/databuffer.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/nntrainer/dataset/dir_data_producers.cpp b/nntrainer/dataset/dir_data_producers.cpp index 3c62923ea2..7ef2484e7e 100644 --- a/nntrainer/dataset/dir_data_producers.cpp +++ b/nntrainer/dataset/dir_data_producers.cpp @@ -35,8 +35,8 @@ * @param width width * @param height height */ -static void readImage(const std::string path, float *input, uint width, - uint height) { +static void readImage(const std::string path, float *input, unsigned int width, + unsigned int height) { FILE *f = fopen(path.c_str(), "rb"); if (f == nullptr) @@ -50,12 +50,12 @@ static void readImage(const std::string path, float *input, uint width, size_t row_padded = (width * 3 + 3) & (~3); unsigned char *data = new unsigned char[row_padded]; - for (uint i = 0; i < height; i++) { + for (unsigned int i = 0; i < height; i++) { result = fread(data, sizeof(unsigned char), row_padded, f); NNTR_THROW_IF(result != row_padded, std::invalid_argument) << "Cannot read bmp pixel data"; - for (uint j = 0; j < width; j++) { + for (unsigned int j = 0; j < width; j++) { input[height * i + j] = (float)data[j * 3 + 2]; @@ -104,7 +104,7 @@ DirDataProducer::finalize(const std::vector &input_dims, const auto &dir_path = std::get(*dir_data_props).get(); for (const auto &entry : std::filesystem::directory_iterator(dir_path)) - class_names.push_back(entry.path()); + class_names.push_back(entry.path().string()); num_class = class_names.size(); diff --git a/nntrainer/layers/acti_func.h b/nntrainer/layers/acti_func.h index 3a8dc0fe0d..02ec4eb31d 100644 --- a/nntrainer/layers/acti_func.h +++ b/nntrainer/layers/acti_func.h @@ -16,6 +16,9 @@ #define __ACTI_FUNC_H__ #ifdef __cplusplus +#define _USE_MATH_DEFINES +#include + #include #include diff --git a/nntrainer/layers/cl_layers/reshape_cl.cpp b/nntrainer/layers/cl_layers/reshape_cl.cpp index 4c7e417573..ab00386029 100644 --- a/nntrainer/layers/cl_layers/reshape_cl.cpp +++ b/nntrainer/layers/cl_layers/reshape_cl.cpp @@ -163,6 +163,7 @@ void ReshapeLayerCl::ReshapeProcess(Tensor const &input, Tensor &output) { } } +#ifdef ENABLE_FP16 void ReshapeLayerCl::copy_cl_fp16(const __fp16 *input, __fp16 *res, unsigned int input_batch_size, unsigned int input_channels, @@ -241,6 +242,7 @@ void ReshapeLayerCl::copy_cl_fp16(const __fp16 *input, __fp16 *res, } while (false); } +#endif void ReshapeLayerCl::copy_cl(const float *input, float *res, unsigned int input_batch_size, diff --git a/nntrainer/layers/cl_layers/swiglu_cl.cpp b/nntrainer/layers/cl_layers/swiglu_cl.cpp index cacbcf892a..d3fd5a8e29 100644 --- a/nntrainer/layers/cl_layers/swiglu_cl.cpp +++ b/nntrainer/layers/cl_layers/swiglu_cl.cpp @@ -160,6 +160,7 @@ void SwiGLULayerCl::swiglu_cl(const float *matAdata, const float *vecXdata, } while (false); } +#ifdef ENABLE_FP16 void SwiGLULayerCl::swiglu_cl_fp16(const __fp16 *matAdata, const __fp16 *vecXdata, __fp16 *vecYdata, unsigned int dim1, unsigned int dim2) { @@ -229,6 +230,7 @@ void SwiGLULayerCl::swiglu_cl_fp16(const __fp16 *matAdata, } while (false); } +#endif void SwiGLULayerCl::calcDerivative(nntrainer::RunLayerContext &context) { std::throw_with_nested(std::runtime_error("Training is not supported yet.")); diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h index 462b614bf9..24aeb7bb6f 100644 --- a/nntrainer/layers/common_properties.h +++ b/nntrainer/layers/common_properties.h @@ -591,7 +591,7 @@ class OutDim : public nntrainer::PositiveIntegerProperty { * will be zero * */ -class ZeroIdxMask : public nntrainer::Property { +class ZeroIdxMask : public nntrainer::Property { public: static constexpr const char *key = "zero_idx_mask"; /**< unique key to access */ diff --git a/nntrainer/layers/embedding.cpp b/nntrainer/layers/embedding.cpp index 5ddb6f7f4c..1121d280ce 100644 --- a/nntrainer/layers/embedding.cpp +++ b/nntrainer/layers/embedding.cpp @@ -98,7 +98,7 @@ void EmbeddingLayer::forwarding(RunLayerContext &context, bool training) { Tensor batchsliced_hidden = hidden_.getBatchSlice(b, 1); for (unsigned int i = 0; i < input_.width(); ++i) { - uint embed_idx = static_cast(in_data[i]); + unsigned int embed_idx = static_cast(in_data[i]); if (embed_idx >= in_dim) { throw std::invalid_argument("input word index is greater than in_dim"); } @@ -140,7 +140,7 @@ void EmbeddingLayer::incremental_forwarding(RunLayerContext &context, Tensor batchsliced_hidden = hidden_.getBatchSlice(b, 1); for (unsigned int i = from; i < to; ++i) { - uint embed_idx = static_cast(in_data[i]); + unsigned int embed_idx = static_cast(in_data[i]); if (embed_idx >= in_dim) { throw std::invalid_argument("input word index is greater than in_dim"); } @@ -174,7 +174,7 @@ void EmbeddingLayer::calcGradient(RunLayerContext &context) { // This is to calculate gradient with current implementation of optimizer. // In order to accelerate, we need to better way like using index to weight. - /// @todo + /// @todo // Current nntrainer gradient Tensor shape is identical to its // weight shape. However, this creates a sparse Tensor since we are only using // certain indices of the Tensor that we are interested in. Since we have such @@ -187,7 +187,7 @@ void EmbeddingLayer::calcGradient(RunLayerContext &context) { if (djdw.getDataType() == TensorDim::DataType::FP32) { for (unsigned int i = 0; i < input_.width(); ++i) { - uint embed_idx = ((float *)(in_data))[i]; + unsigned int embed_idx = ((float *)(in_data))[i]; // Assume padding is 0 and index always start from 1. // If in_data[i] - 1 < 0, then it skips. // if (embed_idx == 0) @@ -203,7 +203,7 @@ void EmbeddingLayer::calcGradient(RunLayerContext &context) { } else if (djdw.getDataType() == TensorDim::DataType::FP16) { #ifdef ENABLE_FP16 for (unsigned int i = 0; i < input_.width(); ++i) { - uint embed_idx = ((float *)(in_data))[i]; + unsigned int embed_idx = ((float *)(in_data))[i]; // Assume padding is 0 and index always start from 1. // If in_data[i] - 1 < 0, then it skips. // if (embed_idx == 0) diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index a045c8c7c7..decb2f39e2 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -144,7 +144,8 @@ getComputeEngine(const std::vector &props) { if (nntrainer::istrequal(key, "engine")) { constexpr const auto data = std::data(props::ComputeEngineTypeInfo::EnumList); - for (uint i = 0; i < props::ComputeEngineTypeInfo::EnumList.size(); ++i) { + for (size_t i = 0; i < props::ComputeEngineTypeInfo::EnumList.size(); + ++i) { if (nntrainer::istrequal(value.c_str(), props::ComputeEngineTypeInfo::EnumStr[i])) { return data[i]; diff --git a/nntrainer/layers/preprocess_l2norm_layer.cpp b/nntrainer/layers/preprocess_l2norm_layer.cpp index e8b9dd5eaa..d9f6d77b81 100644 --- a/nntrainer/layers/preprocess_l2norm_layer.cpp +++ b/nntrainer/layers/preprocess_l2norm_layer.cpp @@ -43,7 +43,7 @@ void PreprocessL2NormLayer::forwarding(RunLayerContext &context, auto &input_ = context.getInput(SINGLE_INOUT_IDX); - for (uint b = 0; b < input_.batch(); ++b) { + for (unsigned int b = 0; b < input_.batch(); ++b) { auto input_slice = input_.getBatchSlice(b, 1); auto hidden_slice = hidden_.getBatchSlice(b, 1); input_slice.multiply(1 / input_slice.l2norm(), hidden_slice); diff --git a/nntrainer/meson.build b/nntrainer/meson.build index ed15b8f2a7..2988eccbeb 100644 --- a/nntrainer/meson.build +++ b/nntrainer/meson.build @@ -77,26 +77,29 @@ if get_option('platform') == 'android' nntrainer_dep = declare_dependency(include_directories: nntrainer_inc) else # Build libraries - nntrainer_shared = shared_library('nntrainer', - nntrainer_sources, - dependencies: nntrainer_base_deps, - include_directories: nntrainer_inc, - install: true, - install_dir: nntrainer_libdir - ) + # nntrainer_shared = shared_library('nntrainer', + # nntrainer_sources, + # dependencies: nntrainer_base_deps, + # include_directories: nntrainer_inc, + # install: true, + # install_dir: nntrainer_libdir, + # name_suffix: 'dll' + # ) nntrainer_static = static_library('nntrainer', nntrainer_sources, dependencies: nntrainer_base_deps, include_directories: nntrainer_inc, install: true, - install_dir: nntrainer_libdir + install_dir: nntrainer_libdir, + name_prefix : '', + name_suffix: 'lib' ) - nntrainer_lib = nntrainer_shared - if get_option('default_library') == 'static' + # nntrainer_lib = nntrainer_shared + # if get_option('default_library') == 'static' nntrainer_lib = nntrainer_static - endif + # endif nntrainer_dep = declare_dependency(link_with: nntrainer_lib, dependencies: nntrainer_base_deps, diff --git a/nntrainer/models/dynamic_training_optimization.cpp b/nntrainer/models/dynamic_training_optimization.cpp index a37a569446..365e3cc85b 100644 --- a/nntrainer/models/dynamic_training_optimization.cpp +++ b/nntrainer/models/dynamic_training_optimization.cpp @@ -12,6 +12,7 @@ */ #include +#include #include #include diff --git a/nntrainer/models/model_loader.cpp b/nntrainer/models/model_loader.cpp index 286ed17e43..049895cd1c 100644 --- a/nntrainer/models/model_loader.cpp +++ b/nntrainer/models/model_loader.cpp @@ -472,16 +472,14 @@ int ModelLoader::loadFromConfig(std::string config, NeuralNetwork &model) { auto config_realpath_char = getRealpath(config.c_str(), nullptr); if (config_realpath_char == nullptr) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; ml_loge("failed to resolve config path to absolute path, reason: %s", - strerror_r(errno, error_buf, error_buflen)); + strerror(errno)); return ML_ERROR_INVALID_PARAMETER; } std::string config_realpath(config_realpath_char); free(config_realpath_char); - auto pos = config_realpath.find_last_of("/"); + auto pos = config_realpath.find_last_of("\\"); // TODO GK if (pos == std::string::npos) { ml_loge("resolved model path does not contain any path separator. %s", config_realpath.c_str()); diff --git a/nntrainer/nntrainer_log.h b/nntrainer/nntrainer_log.h index 751cdfe864..164bf6d7a8 100644 --- a/nntrainer/nntrainer_log.h +++ b/nntrainer/nntrainer_log.h @@ -56,27 +56,27 @@ #include #if !defined(ml_logi) -#define ml_logi(format, args...) \ +#define ml_logi(format, ...) \ __nntrainer_log_print(NNTRAINER_LOG_INFO, "(%s:%s:%d) " format, __FILE__, \ - __func__, __LINE__, ##args) + __func__, __LINE__, __VA_ARGS__) #endif #if !defined(ml_logw) -#define ml_logw(format, args...) \ +#define ml_logw(format, ...) \ __nntrainer_log_print(NNTRAINER_LOG_WARN, "(%s:%s:%d) " format, __FILE__, \ - __func__, __LINE__, ##args) + __func__, __LINE__, __VA_ARGS__) #endif #if !defined(ml_loge) -#define ml_loge(format, args...) \ +#define ml_loge(format, ...) \ __nntrainer_log_print(NNTRAINER_LOG_ERROR, "(%s:%s:%d) " format, __FILE__, \ - __func__, __LINE__, ##args) + __func__, __LINE__, __VA_ARGS__) #endif #if !defined(ml_logd) -#define ml_logd(format, args...) \ +#define ml_logd(format, ...) \ __nntrainer_log_print(NNTRAINER_LOG_DEBUG, "(%s:%s:%d) " format, __FILE__, \ - __func__, __LINE__, ##args) + __func__, __LINE__, __VA_ARGS__) #endif #endif diff --git a/nntrainer/nntrainer_logger.cpp b/nntrainer/nntrainer_logger.cpp index fd3ea2c7f9..055545ecff 100644 --- a/nntrainer/nntrainer_logger.cpp +++ b/nntrainer/nntrainer_logger.cpp @@ -30,7 +30,7 @@ #include #include #include -#include +//#include #include namespace nntrainer { diff --git a/nntrainer/opencl/opencl_loader.cpp b/nntrainer/opencl/opencl_loader.cpp index 8b39eace80..e98d2daafc 100644 --- a/nntrainer/opencl/opencl_loader.cpp +++ b/nntrainer/opencl/opencl_loader.cpp @@ -13,113 +13,131 @@ #include "opencl_loader.h" +#ifdef _WIN32 +#include "windows.h" +#else #include +#endif #include #include namespace nntrainer::opencl { +#ifdef _WIN32 +#define LoadFunction(function) \ + function = reinterpret_cast(GetProcAddress((HMODULE)libopencl, #function)); +#else #define LoadFunction(function) \ function = reinterpret_cast(dlsym(libopencl, #function)); +#endif -/** - * @brief Declaration of loading function for OpenCL APIs - * - * @param libopencl - */ -void LoadOpenCLFunctions(void *libopencl); + /** + * @brief Declaration of loading function for OpenCL APIs + * + * @param libopencl + */ + void LoadOpenCLFunctions(void *libopencl); -static bool open_cl_initialized = false; + static bool open_cl_initialized = false; -/** - * @brief Loading OpenCL libraries and required function - * - * @return true if successfull or false otherwise - */ -bool LoadOpenCL() { - // check if already loaded - if (open_cl_initialized) { - return true; - } + /** + * @brief Loading OpenCL libraries and required function + * + * @return true if successfull or false otherwise + */ + bool LoadOpenCL() { + // check if already loaded + if (open_cl_initialized) { + return true; + } - void *libopencl = nullptr; - static const char *kClLibName = "libOpenCL.so"; + void *libopencl = nullptr; + static const char *kClLibName = "libOpenCL.so"; +#ifdef _WIN32 + libopencl = LoadLibraryA(kClLibName); +#else libopencl = dlopen(kClLibName, RTLD_NOW | RTLD_LOCAL); - if (libopencl) { - LoadOpenCLFunctions(libopencl); - open_cl_initialized = true; - return true; - } +#endif + + if (libopencl) { + LoadOpenCLFunctions(libopencl); + open_cl_initialized = true; + return true; + } +#ifdef _WIN32 + ml_loge("Can not open OpenCL library on this device"); +#else // record error std::string error(dlerror()); ml_loge("Can not open OpenCL library on this device - %s", error.c_str()); - return false; -} +#endif + return false; + } -/** - * @brief Utility to load the required OpenCL APIs - * - * @param libopencl - */ -void LoadOpenCLFunctions(void *libopencl) { - LoadFunction(clGetPlatformIDs); - LoadFunction(clGetDeviceIDs); - LoadFunction(clGetDeviceInfo); - LoadFunction(clCreateContext); - LoadFunction(clCreateCommandQueue); - LoadFunction(clCreateBuffer); - LoadFunction(clEnqueueWriteBuffer); - LoadFunction(clEnqueueReadBuffer); - LoadFunction(clEnqueueMapBuffer); - LoadFunction(clEnqueueUnmapMemObject); - LoadFunction(clEnqueueWriteBufferRect); - LoadFunction(clEnqueueReadBufferRect); - LoadFunction(clCreateProgramWithSource); - LoadFunction(clCreateProgramWithBinary); - LoadFunction(clBuildProgram); - LoadFunction(clGetProgramInfo); - LoadFunction(clGetProgramBuildInfo); - LoadFunction(clRetainProgram); - LoadFunction(clCreateKernel); - LoadFunction(clSetKernelArg); - LoadFunction(clEnqueueNDRangeKernel); - LoadFunction(clGetEventProfilingInfo); - LoadFunction(clRetainContext); - LoadFunction(clReleaseContext); - LoadFunction(clRetainCommandQueue); - LoadFunction(clReleaseCommandQueue); - LoadFunction(clReleaseMemObject); -} + /** + * @brief Utility to load the required OpenCL APIs + * + * @param libopencl + */ + void LoadOpenCLFunctions(void *libopencl) { + LoadFunction(clGetPlatformIDs); + LoadFunction(clGetDeviceIDs); + LoadFunction(clGetDeviceInfo); + LoadFunction(clCreateContext); + LoadFunction(clCreateCommandQueue); + LoadFunction(clCreateBuffer); + LoadFunction(clEnqueueWriteBuffer); + LoadFunction(clEnqueueReadBuffer); + LoadFunction(clEnqueueMapBuffer); + LoadFunction(clEnqueueUnmapMemObject); + LoadFunction(clEnqueueWriteBufferRect); + LoadFunction(clEnqueueReadBufferRect); + LoadFunction(clCreateProgramWithSource); + LoadFunction(clCreateProgramWithBinary); + LoadFunction(clBuildProgram); + LoadFunction(clGetProgramInfo); + LoadFunction(clGetProgramBuildInfo); + LoadFunction(clRetainProgram); + LoadFunction(clCreateKernel); + LoadFunction(clSetKernelArg); + LoadFunction(clEnqueueNDRangeKernel); + LoadFunction(clGetEventProfilingInfo); + LoadFunction(clRetainContext); + LoadFunction(clReleaseContext); + LoadFunction(clRetainCommandQueue); + LoadFunction(clReleaseCommandQueue); + LoadFunction(clReleaseMemObject); + } -PFN_clGetPlatformIDs clGetPlatformIDs; -PFN_clGetDeviceIDs clGetDeviceIDs; -PFN_clGetDeviceInfo clGetDeviceInfo; -PFN_clCreateContext clCreateContext; -PFN_clCreateCommandQueue clCreateCommandQueue; -PFN_clCreateBuffer clCreateBuffer; -PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer; -PFN_clEnqueueReadBuffer clEnqueueReadBuffer; -PFN_clEnqueueMapBuffer clEnqueueMapBuffer; -PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; -PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; -PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect; -PFN_clCreateProgramWithSource clCreateProgramWithSource; -PFN_clCreateProgramWithBinary clCreateProgramWithBinary; -PFN_clBuildProgram clBuildProgram; -PFN_clGetProgramInfo clGetProgramInfo; -PFN_clGetProgramBuildInfo clGetProgramBuildInfo; -PFN_clRetainProgram clRetainProgram; -PFN_clCreateKernel clCreateKernel; -PFN_clSetKernelArg clSetKernelArg; -PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; -PFN_clGetEventProfilingInfo clGetEventProfilingInfo; -PFN_clRetainContext clRetainContext; -PFN_clReleaseContext clReleaseContext; -PFN_clRetainCommandQueue clRetainCommandQueue; -PFN_clReleaseCommandQueue clReleaseCommandQueue; -PFN_clReleaseMemObject clReleaseMemObject; + PFN_clGetPlatformIDs clGetPlatformIDs; + PFN_clGetDeviceIDs clGetDeviceIDs; + PFN_clGetDeviceInfo clGetDeviceInfo; + PFN_clCreateContext clCreateContext; + PFN_clCreateCommandQueue clCreateCommandQueue; + PFN_clCreateBuffer clCreateBuffer; + PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer; + PFN_clEnqueueReadBuffer clEnqueueReadBuffer; + PFN_clEnqueueMapBuffer clEnqueueMapBuffer; + PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; + PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; + PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect; + PFN_clCreateProgramWithSource clCreateProgramWithSource; + PFN_clCreateProgramWithBinary clCreateProgramWithBinary; + PFN_clBuildProgram clBuildProgram; + PFN_clGetProgramInfo clGetProgramInfo; + PFN_clGetProgramBuildInfo clGetProgramBuildInfo; + PFN_clRetainProgram clRetainProgram; + PFN_clCreateKernel clCreateKernel; + PFN_clSetKernelArg clSetKernelArg; + PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; + PFN_clGetEventProfilingInfo clGetEventProfilingInfo; + PFN_clRetainContext clRetainContext; + PFN_clReleaseContext clReleaseContext; + PFN_clRetainCommandQueue clRetainCommandQueue; + PFN_clReleaseCommandQueue clReleaseCommandQueue; + PFN_clReleaseMemObject clReleaseMemObject; -} // namespace nntrainer::opencl + } // namespace nntrainer::opencl diff --git a/nntrainer/opencl/opencl_program.cpp b/nntrainer/opencl/opencl_program.cpp index 3e8d7debf2..64dcbd9c03 100644 --- a/nntrainer/opencl/opencl_program.cpp +++ b/nntrainer/opencl/opencl_program.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include "opencl_loader.h" @@ -70,10 +71,10 @@ bool Program::GetProgramInfo(cl_device_id device_id) { cl_int error_code = CL_SUCCESS; // Read the binary size - size_t binaries_size[num_devices]; - error_code = - clGetProgramInfo(program_, CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * num_devices, binaries_size, nullptr); + std::vector binaries_size(num_devices); + error_code = clGetProgramInfo(program_, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * num_devices, + binaries_size.data(), nullptr); if (error_code != CL_SUCCESS) { ml_loge("Failed to get program binary size. OpenCL error code: %d. %s", @@ -95,9 +96,10 @@ bool Program::GetProgramInfo(cl_device_id device_id) { } // getting the kernel names - char kernel_names[kernel_names_size]; - error_code = clGetProgramInfo(program_, CL_PROGRAM_KERNEL_NAMES, - kernel_names_size, kernel_names, nullptr); + std::vector kernel_names(kernel_names_size); + error_code = + clGetProgramInfo(program_, CL_PROGRAM_KERNEL_NAMES, kernel_names_size, + kernel_names.data(), nullptr); if (error_code != CL_SUCCESS) { ml_loge("Failed to get program kernel names. OpenCL error code: %d. %s", @@ -105,19 +107,21 @@ bool Program::GetProgramInfo(cl_device_id device_id) { (GetProgramBuildInfo(device_id, CL_PROGRAM_BUILD_LOG)).c_str()); return false; } else { - ml_logi("Saving kernel binary for: %s", std::string(kernel_names).c_str()); + ml_logi("Saving kernel binary for: %s", + std::string(kernel_names.data()).c_str()); } // Read the binary size_t binaries_ptr_alloc_size = sizeof(unsigned char *) * num_devices; - unsigned char *binaries_ptr[num_devices]; + std::vector binaries_ptr(num_devices); for (unsigned int i = 0; i < num_devices; ++i) { binaries_ptr[i] = new unsigned char[binaries_size[i]]; } - error_code = clGetProgramInfo(program_, CL_PROGRAM_BINARIES, - binaries_ptr_alloc_size, binaries_ptr, nullptr); + error_code = + clGetProgramInfo(program_, CL_PROGRAM_BINARIES, binaries_ptr_alloc_size, + binaries_ptr.data(), nullptr); if (error_code != CL_SUCCESS) { ml_loge("Failed to get program binary data. OpenCL error code: %d. %s", @@ -135,7 +139,7 @@ bool Program::GetProgramInfo(cl_device_id device_id) { // All kernels in the program will be saved in the binary file for (unsigned int i = 0; i < num_devices; ++i) { std::ofstream fs(Program::DEFAULT_KERNEL_PATH + "/" + - std::string(kernel_names) + "_kernel.bin", + std::string(kernel_names.data()) + "_kernel.bin", std::ios::out | std::ios::binary | std::ios::app); if (!fs) { ml_loge( diff --git a/nntrainer/optimizers/lr_scheduler_cosine.cpp b/nntrainer/optimizers/lr_scheduler_cosine.cpp index aff4bc47bc..51ce9504b2 100644 --- a/nntrainer/optimizers/lr_scheduler_cosine.cpp +++ b/nntrainer/optimizers/lr_scheduler_cosine.cpp @@ -11,7 +11,8 @@ * */ -#include +#define _USE_MATH_DEFINES +#include #include #include diff --git a/nntrainer/tensor/blas_avx.cpp b/nntrainer/tensor/blas_avx.cpp index 7042dc6c70..6945190c3c 100644 --- a/nntrainer/tensor/blas_avx.cpp +++ b/nntrainer/tensor/blas_avx.cpp @@ -11,6 +11,8 @@ * */ +#include + #include #include #include diff --git a/nntrainer/tensor/cl_operations/attention_kernels_fp16.cpp b/nntrainer/tensor/cl_operations/attention_kernels_fp16.cpp index c1284b0a9c..5355ad31a9 100644 --- a/nntrainer/tensor/cl_operations/attention_kernels_fp16.cpp +++ b/nntrainer/tensor/cl_operations/attention_kernels_fp16.cpp @@ -16,6 +16,8 @@ namespace nntrainer { +#ifdef ENABLE_FP16 + void rotary_emb_cl(__fp16 *in, __fp16 *out, std::vector> freqs_cos, std::vector> freqs_sin, @@ -226,4 +228,7 @@ void rotary_emb_cl(__fp16 *in, __fp16 *out, } while (false); } + +#endif + } // namespace nntrainer diff --git a/nntrainer/tensor/cl_operations/blas_kernels_fp16.cpp b/nntrainer/tensor/cl_operations/blas_kernels_fp16.cpp index bdff42c135..85e2540bdc 100644 --- a/nntrainer/tensor/cl_operations/blas_kernels_fp16.cpp +++ b/nntrainer/tensor/cl_operations/blas_kernels_fp16.cpp @@ -16,6 +16,8 @@ namespace nntrainer { +#ifdef ENABLE_FP16 + void sgemv_cl(const __fp16 *matAdata, const __fp16 *vecXdata, __fp16 *vecYdata, bool TransA, unsigned int dim1, unsigned int dim2, unsigned int lda) { @@ -524,4 +526,7 @@ void transpose_cl_axis(const __fp16 *in, __fp16 *res, } while (false); } + +#endif + } // namespace nntrainer diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp index 9c31c40f2c..548e9a393c 100644 --- a/nntrainer/tensor/float_tensor.cpp +++ b/nntrainer/tensor/float_tensor.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -910,7 +911,7 @@ void FloatTensor::filter_mask(const Tensor &mask_len, bool reverse) { for (unsigned int b = 0; b < batch(); b++) { float *addr = (float *)getAddress(getIndex(b, 0, 0, 0)); - const uint *mask_len_val = mask_len.getAddress(b, 0, 0, 0); + const unsigned int *mask_len_val = mask_len.getAddress(b, 0, 0, 0); std::fill(addr, addr + (*mask_len_val), en_mask_val); } } diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp index bdc5090410..68309221a3 100644 --- a/nntrainer/tensor/half_tensor.cpp +++ b/nntrainer/tensor/half_tensor.cpp @@ -19,6 +19,8 @@ namespace nntrainer { +#ifdef ENABLE_FP16 + HalfTensor::HalfTensor(std::string name_, Tformat fm) : TensorBase(name_, fm, Tdatatype::FP16) {} @@ -717,7 +719,7 @@ void HalfTensor::filter_mask(const Tensor &mask_len, bool reverse) { for (unsigned int b = 0; b < batch(); b++) { _FP16 *addr = (_FP16 *)getAddress(getIndex(b, 0, 0, 0)); - const uint *mask_len_val = mask_len.getAddress(b, 0, 0, 0); + const unsigned int *mask_len_val = mask_len.getAddress(b, 0, 0, 0); std::fill(addr, addr + (*mask_len_val), (_FP16)en_mask_val); } } @@ -1180,4 +1182,6 @@ bool HalfTensor::isValid() const { return is_valid(dim.getDataLen(), Tdatatype::FP16, (_FP16 *)getData()); } +#endif + } // namespace nntrainer diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h index 206a8482de..45808d4d47 100644 --- a/nntrainer/tensor/half_tensor.h +++ b/nntrainer/tensor/half_tensor.h @@ -23,6 +23,8 @@ namespace nntrainer { +#ifdef ENABLE_FP16 + /** * @class HalfTensor class * @brief HalfTensor class for 16-bit floating point calculation @@ -505,6 +507,8 @@ class HalfTensor : public TensorBase { bool isValid() const override; }; +#endif + } // namespace nntrainer #endif /* __cplusplus */ diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index 1e0b1ddce2..28452569d0 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -24,9 +24,9 @@ #include #include #include -#include +//#include #include -#include +//#include #include #include @@ -90,19 +90,20 @@ MMapedMemory::MMapedMemory(size_t size, bool allocate_fd_) : buf_ = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); #endif } else { - buf_ = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, - fd_, 0); - } - - if (buf_ == MAP_FAILED) { -#ifdef __ANDROID__ - if (fd_ != -1) { - // unlink / close the given fd here - close(fd_); - } -#endif - - throw std::runtime_error("[MMapedMemory] mmap failed"); + // TODO GK +// buf_ = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, +// fd_, 0); +// } + +// if (buf_ == MAP_FAILED) { +// #ifdef __ANDROID__ +// if (fd_ != -1) { +// // unlink / close the given fd here +// close(fd_); +// } +// #endif + +// throw std::runtime_error("[MMapedMemory] mmap failed"); } fd = fd_; @@ -118,17 +119,18 @@ MMapedMemory::~MMapedMemory() noexcept { assert(buf_size > 0 && fd > 0); #endif - if (fd != -1) { - if (close(fd) < 0) { - ml_logw("[MMapedMemory] closing fd failed on destruction please check"); - } - } - - if (buf != nullptr) { - if (munmap(buf, buf_size) < 0) { - ml_logw("[MMapedMemory] munmap failed on destruction please check"); - } - } + // TODO GK + // if (fd != -1) { + // if (close(fd) < 0) { + // ml_logw("[MMapedMemory] closing fd failed on destruction please check"); + // } + // } + + // if (buf != nullptr) { + // if (munmap(buf, buf_size) < 0) { + // ml_logw("[MMapedMemory] munmap failed on destruction please check"); + // } + // } /// keeping the invariant although this is not necessary as of now fd = -1; diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp index 22ef10e0de..08ea039fc5 100644 --- a/nntrainer/tensor/quantizer.cpp +++ b/nntrainer/tensor/quantizer.cpp @@ -8,10 +8,24 @@ * @bug No known bugs except for NYI items */ +#include #include namespace nntrainer { +/** + * @brief Helper function for clipping + * + * @tparam T data type + * @param val value to clip + * @param lower lower bound + * @param upper upper bound + * @return T cliped data + */ +template T clip(const T &val, const T &lower, const T &upper) { + return std::max(lower, std::min(val, upper)); +} + /** * @brief PerTensorAffineQuantizer class */ @@ -21,20 +35,83 @@ std::unique_ptr PerTensorAffineQuantizer::create() { Tensor PerTensorAffineQuantizer::quantize(const Tensor &input, Tdatatype qtype) { - /// @todo NYI - return input; + // Currently only full precision floating point is supported + NNTR_THROW_IF(input.getDataType() != Tdatatype::FP32, std::invalid_argument) + << "[Quantizer::quantize] Tensor data type is not floating point"; + + NNTR_THROW_IF(qtype == Tdatatype::FP32, std::invalid_argument) + << "[Quantizer::quantize] Cannot quantize to full precision floating point"; + + // 1. Calculate quantization parameters + calculateQParams(input, qtype); + + // 2. Create output tensor with same dimension but different data type + TensorDim dim = input.getDim(); + dim.setDataType(qtype); + Tensor output(dim); + + /// @todo this is a naive impl. need optimization + for (unsigned int b = 0; b < output.batch(); ++b) { + for (unsigned int c = 0; c < output.channel(); ++c) { + for (unsigned int h = 0; h < output.height(); ++h) { + for (unsigned int w = 0; w < output.width(); ++w) { + output.setValue( + b, c, h, w, + clip(std::lround(input.getValue(b, c, h, w) / scale + zero_point), + quant_min, quant_max)); + } + } + } + } + + return output; } Tensor PerTensorAffineQuantizer::dequantize(const Tensor &input, Tdatatype dtype) { - /// @todo NYI - return input; + Tensor output = input.clone(dtype); + + /// @todo this is a naive impl. need optimization + for (unsigned int b = 0; b < output.batch(); ++b) { + for (unsigned int c = 0; c < output.channel(); ++c) { + for (unsigned int h = 0; h < output.height(); ++h) { + for (unsigned int w = 0; w < output.width(); ++w) { + output.setValue(b, c, h, w, + (input.getValue(b, c, h, w) - zero_point) * + scale); + } + } + } + } + + return output; } QScheme PerTensorAffineQuantizer::qscheme() const { return QScheme::PER_TENSOR_AFFINE; } +void PerTensorAffineQuantizer::calculateQParams(const Tensor &input, + Tdatatype qtype) { + unsigned int N; + + if (qtype == Tdatatype::QINT8) { + N = 8; + } else if (qtype == Tdatatype::QINT4) { + N = 4; + } else { + throw std::invalid_argument("Error: Unsupported data type."); + } + + quant_max = std::pow(2, N - 1) - 1; + quant_min = -std::pow(2, N - 1); + + /// @todo for quint8, zero point calculation should be added + float max_val = input.max_abs(); + scale = max_val / ((quant_max - quant_min) / 2.0f); + scale = std::max(scale, std::numeric_limits::epsilon()); +} + /** * @brief PerChannelAffineQuantizer class */ diff --git a/nntrainer/tensor/quantizer.h b/nntrainer/tensor/quantizer.h index dcd6a9baed..8ec075c840 100644 --- a/nntrainer/tensor/quantizer.h +++ b/nntrainer/tensor/quantizer.h @@ -193,7 +193,7 @@ class PerTensorAffineQuantizer : public UniformQuantizer { /** * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, Tdatatype qtype) override; }; /** diff --git a/nntrainer/tensor/swap_device.cpp b/nntrainer/tensor/swap_device.cpp index c5f4c2a7b3..23ba84a709 100644 --- a/nntrainer/tensor/swap_device.cpp +++ b/nntrainer/tensor/swap_device.cpp @@ -16,22 +16,28 @@ #include #include #include -#include #include -#include #include #include #include +#if defined(_WIN32) +#include +#define O_SYNC 0UL +#else +#include +#include +#endif + namespace nntrainer { void SwapDevice::start(size_t size) { if (fd > 0) return; - fd = - open(dev_path.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_SYNC, (mode_t)0666); + fd = open(dev_path.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0666UL); + NNTR_THROW_IF(fd < 0, std::runtime_error) << "SwapDevice: open file: " << dev_path; @@ -64,11 +70,8 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) { char *ptr = static_cast( mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, off)); - const size_t error_buflen = 100; - char error_buf[error_buflen]; NNTR_THROW_IF(ptr == (void *)-1, std::runtime_error) - << "SwapDevice: mmap: " - << std::string(strerror_r(errno, error_buf, error_buflen)); + << "SwapDevice: mmap: " << std::string(strerror(errno)); void *buf = static_cast(ptr + diff); mapped[buf] = std::make_tuple(ptr, len, offset, (ssize_t)size); @@ -125,11 +128,8 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) { } ret = munmap(std::get(info), std::get(info)); - const size_t error_buflen = 100; - char error_buf[error_buflen]; NNTR_THROW_IF(ret == -1, std::runtime_error) - << "SwapDevice: munmap: " - << std::string(strerror_r(errno, error_buf, error_buflen)); + << "SwapDevice: munmap: " << std::string(strerror(errno)); mapped.erase(ptr); @@ -159,7 +159,7 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) { free(ptr); allocated.erase(ptr); -#ifndef __ANDROID__ +#if !defined(__ANDROID__) && !defined(_WIN32) malloc_trim(0); #endif diff --git a/nntrainer/tensor/swap_device.h b/nntrainer/tensor/swap_device.h index 720c8d04b3..6fce40b8d8 100644 --- a/nntrainer/tensor/swap_device.h +++ b/nntrainer/tensor/swap_device.h @@ -18,18 +18,22 @@ #include #include #include -#include +//#include #include #include #include -#include +//#include #include /* Uncomment this to use mmap for swap data */ -#define USE_MMAP +//#define USE_MMAP namespace nntrainer { +#if defined(_WIN32) +using ssize_t = std::make_signed_t; +#endif + /** * @class SwapDevice * @brief A device used to storing data with long access time diff --git a/nntrainer/tensor/task_executor.h b/nntrainer/tensor/task_executor.h index 35f9fd9c14..8461b277a1 100644 --- a/nntrainer/tensor/task_executor.h +++ b/nntrainer/tensor/task_executor.h @@ -22,7 +22,6 @@ #include #include #include -#include #include diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index b0cbae110d..42a1fff644 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -9,6 +9,7 @@ * @bug No known bugs except for NYI items */ +#include #include #include #include diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h index 0e79ff10e6..85fc2ea8f4 100644 --- a/nntrainer/tensor/tensor.h +++ b/nntrainer/tensor/tensor.h @@ -1669,7 +1669,7 @@ class Tensor { */ bool isValid() const { return itensor->isValid(); }; - static constexpr float epsilon = 1e-5; + static constexpr float epsilon = 1e-5f; private: std::shared_ptr itensor; diff --git a/nntrainer/tensor/tensor_base.cpp b/nntrainer/tensor/tensor_base.cpp index 0711504f8b..dad9d6e15d 100644 --- a/nntrainer/tensor/tensor_base.cpp +++ b/nntrainer/tensor/tensor_base.cpp @@ -186,7 +186,7 @@ TensorBase::computeBroadcastInfo(const Tensor &m) const { BroadcastInfo e; e.tensor_type = getTensorType(); - uint continuity[4] = {0, 1, 2, 3}; + unsigned int continuity[4] = {0, 1, 2, 3}; if (getFormat() == Tformat::NHWC) { continuity[1] = 2; continuity[2] = 3; diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h index cc6ad0c2b8..58598a1684 100644 --- a/nntrainer/tensor/tensor_base.h +++ b/nntrainer/tensor/tensor_base.h @@ -692,7 +692,7 @@ class TensorBase { */ virtual bool isValid() const = 0; - static constexpr float epsilon = 1e-5; + static constexpr float epsilon = 1e-5f; protected: TensorDim dim; diff --git a/nntrainer/tensor/tensor_dim.cpp b/nntrainer/tensor/tensor_dim.cpp index cf403b9af1..7dc0c59dad 100644 --- a/nntrainer/tensor/tensor_dim.cpp +++ b/nntrainer/tensor/tensor_dim.cpp @@ -145,7 +145,7 @@ TensorDim &TensorDim::operator=(TensorDim &&rhs) noexcept { return *this; } -uint TensorDim::getDataTypeSize() const { +unsigned int TensorDim::getDataTypeSize() const { switch (t_type.data_type) { case TensorDim::DataType::FP16: #ifdef ENABLE_FP16 diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h index 4db4b106ed..ef001cc02b 100644 --- a/nntrainer/tensor/weight.h +++ b/nntrainer/tensor/weight.h @@ -364,9 +364,9 @@ class Weight : public Var_Grad { const float getLossScale() { return loss_scale; }; private: - static constexpr float epsilon = 1e-6; /**< epsilon for zero comparison */ + static constexpr float epsilon = 1e-6f; /**< epsilon for zero comparison */ static constexpr float epsilon_decay = - 1e-8; /**< epsilon for zero comparison */ + 1e-8f; /**< epsilon for zero comparison */ WeightRegularizer regularizer; /**< regularizer for this variable */ float regularizer_constant; /**< constant factor for regularization */ diff --git a/nntrainer/utils/ini_wrapper.cpp b/nntrainer/utils/ini_wrapper.cpp index 7e1fb5374a..ab37fc23cb 100644 --- a/nntrainer/utils/ini_wrapper.cpp +++ b/nntrainer/utils/ini_wrapper.cpp @@ -128,10 +128,8 @@ void IniWrapper::save_ini(const std::string &ini_name) const { void IniWrapper::erase_ini() const noexcept { if (remove(getIniName().c_str())) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; - std::cerr << "remove ini " << getIniName() << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + std::cerr << "remove ini " << getIniName() + << "failed, reason: " << strerror(errno); } } diff --git a/nntrainer/utils/tracer.cpp b/nntrainer/utils/tracer.cpp index e437fd250c..152740ffa0 100644 --- a/nntrainer/utils/tracer.cpp +++ b/nntrainer/utils/tracer.cpp @@ -21,7 +21,7 @@ #include #include #include -#include +//#include namespace { diff --git a/nntrainer/utils/util_func.h b/nntrainer/utils/util_func.h index 2ab6409f4c..aad33a203e 100644 --- a/nntrainer/utils/util_func.h +++ b/nntrainer/utils/util_func.h @@ -200,11 +200,9 @@ template T checkedOpenStream(const std::string &path, std::ios_base::openmode mode) { T model_file(path, mode); if (!model_file.good()) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; std::stringstream ss; ss << "[parseutil] requested file not opened, file path: " << path - << " reason: " << strerror_r(errno, error_buf, error_buflen); + << " reason: " << strerror(errno); if (errno == EPERM || errno == EACCES) { throw nntrainer::exception::permission_denied(ss.str().c_str()); } else { diff --git a/test/ccapi/unittest_ccapi.cpp b/test/ccapi/unittest_ccapi.cpp index a8d3fb4bd6..7e6a1d017c 100644 --- a/test/ccapi/unittest_ccapi.cpp +++ b/test/ccapi/unittest_ccapi.cpp @@ -449,10 +449,8 @@ TEST(nntrainer_ccapi, save_ini_p) { EXPECT_EQ(model->initialize(), ML_ERROR_NONE); auto saved_ini_name = s.getIniName() + "_saved"; if (remove(saved_ini_name.c_str())) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; - std::cerr << "remove ini " << saved_ini_name << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + std::cerr << "remove ini " << saved_ini_name + << "failed, reason: " << strerror(errno); } model->save(saved_ini_name, ml::train::ModelFormat::MODEL_FORMAT_INI); diff --git a/test/meson.build b/test/meson.build index b3d99e92cf..f9bbda8a8c 100644 --- a/test/meson.build +++ b/test/meson.build @@ -1,5 +1,7 @@ nntrainer_test_resdir = nntrainer_resdir / 'test' -run_command('mkdir', '-p', nntrainer_test_resdir) +#run_command('mkdir', '-p', nntrainer_test_resdir) +run_command('cmd.exe', '/C', 'mkdir', nntrainer_test_resdir) + nntrainer_test_inc = include_directories('./include') diff --git a/test/nntrainer_test_util.cpp b/test/nntrainer_test_util.cpp index 7ff307558d..2d6d86834d 100644 --- a/test/nntrainer_test_util.cpp +++ b/test/nntrainer_test_util.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/test/unittest/compiler/unittest_interpreter.cpp b/test/unittest/compiler/unittest_interpreter.cpp index 23c631d1c5..9aeeb1f13a 100644 --- a/test/unittest/compiler/unittest_interpreter.cpp +++ b/test/unittest/compiler/unittest_interpreter.cpp @@ -98,11 +98,7 @@ TEST_P(nntrainerInterpreterTest, graphSerializeAfterDeserialize) { graphEqual(g, new_g); - const size_t error_buflen = 100; - char error_buf[error_buflen]; - - EXPECT_EQ(remove(out_file_path.c_str()), 0) - << strerror_r(errno, error_buf, error_buflen); + EXPECT_EQ(remove(out_file_path.c_str()), 0) << strerror(errno); } TEST_P(nntrainerInterpreterTest, deserialize_01_n) { diff --git a/test/unittest/compiler/unittest_tflite_export.cpp b/test/unittest/compiler/unittest_tflite_export.cpp index 27e27826b1..3423fcc970 100644 --- a/test/unittest/compiler/unittest_tflite_export.cpp +++ b/test/unittest/compiler/unittest_tflite_export.cpp @@ -177,12 +177,9 @@ TEST(nntrainerInterpreterTflite, simple_fc) { EXPECT_NEAR(out[i], ans[i], 0.000001f); if (remove("simple_fc.tflite")) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; std::cerr << "remove tflite " << "simple_fc.tflite" - << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + << "failed, reason: " << strerror(errno); } } @@ -234,12 +231,9 @@ TEST(nntrainerInterpreterTflite, flatten_test) { EXPECT_NEAR(out[i], ans[i], 0.000001f); if (remove("flatten_test.tflite")) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; std::cerr << "remove tflite " << "flatten_test.tflite" - << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + << "failed, reason: " << strerror(errno); } } @@ -307,12 +301,9 @@ TEST(nntrainerInterpreterTflite, part_of_resnet_0) { EXPECT_NEAR(out[i], ans[i], 0.000001f); if (remove("part_of_resnet.tflite")) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; std::cerr << "remove ini " << "part_of_resnet.tflite" - << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + << "failed, reason: " << strerror(errno); } } @@ -389,11 +380,8 @@ TEST(nntrainerInterpreterTflite, MNIST_FULL_TEST) { std::cout << "out : " << out[i] << " ans : " << ans[i] << std::endl; } if (remove("MNIST_FULL_TEST.tflite")) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; std::cerr << "remove tflite " << "MNIST_FULL_TEST.tflite" - << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + << "failed, reason: " << strerror(errno); } } diff --git a/test/unittest/meson.build b/test/unittest/meson.build index 7e114ba3e7..864f426010 100644 --- a/test/unittest/meson.build +++ b/test/unittest/meson.build @@ -27,11 +27,13 @@ dest_path = nntrainer_test_resdir foreach target: unzip_target _src_path = src_path / target[0] - run_command('mkdir', '-p', dest_path / target[1]) + #run_command('mkdir', '-p', dest_path / target[1]) + run_command('cmd.exe', '/C', 'mkdir', dest_path / target[1]) run_command(['tar', 'xzf', _src_path, '-C', dest_path / target[1]]) endforeach -run_command(['cp', '-l', src_path / 'label.dat', dest_path / 'label.dat']) +#run_command(['cp', '-l', src_path / 'label.dat', dest_path / 'label.dat']) +run_command('cmd.exe', '/C', 'copy', src_path / 'label.dat', dest_path / 'label.dat') test_target = [ ['unittest_nntrainer_activations', []], @@ -39,7 +41,7 @@ test_target = [ ['unittest_nntrainer_internal', []], ['unittest_nntrainer_lazy_tensor', []], ['unittest_nntrainer_tensor', []], - ['unittest_nntrainer_tensor_nhwc', []], + ['unittest_nntrainer_quantizer', []], ['unittest_util_func', []], ['unittest_nntrainer_modelfile', []], ['unittest_nntrainer_models', [ diff --git a/test/unittest/models/models_golden_test.cpp b/test/unittest/models/models_golden_test.cpp index 58445b38f3..e279d93b54 100644 --- a/test/unittest/models/models_golden_test.cpp +++ b/test/unittest/models/models_golden_test.cpp @@ -65,10 +65,8 @@ TEST_P(nntrainerModelTest, model_test_save_load_compare) { new nntrainer::NeuralNetwork()); nn->load(saved_ini_name, ml::train::ModelFormat::MODEL_FORMAT_INI); if (remove(saved_ini_name.c_str())) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; - std::cerr << "remove ini " << saved_ini_name << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + std::cerr << "remove ini " << saved_ini_name + << "failed, reason: " << strerror(errno); } return nn; }; @@ -97,10 +95,8 @@ TEST_P(nntrainerModelTest, model_test_save_load_verify) { new nntrainer::NeuralNetwork()); nn->load(saved_ini_name, ml::train::ModelFormat::MODEL_FORMAT_INI); if (remove(saved_ini_name.c_str())) { - const size_t error_buflen = 100; - char error_buf[error_buflen]; - std::cerr << "remove ini " << saved_ini_name << "failed, reason: " - << strerror_r(errno, error_buf, error_buflen); + std::cerr << "remove ini " << saved_ini_name + << "failed, reason: " << strerror(errno); } return nn; }; diff --git a/test/unittest/unittest_nntrainer_models.cpp b/test/unittest/unittest_nntrainer_models.cpp index 55306a8263..d27141df4d 100644 --- a/test/unittest/unittest_nntrainer_models.cpp +++ b/test/unittest/unittest_nntrainer_models.cpp @@ -958,7 +958,7 @@ auto mkResNet18Tc(const unsigned int iteration, nntrainer::IniWrapper("ResNet18", layers), nntrainer::TensorDim({batch_size, 1,1, num_class}), iteration, options); } -GTEST_PARAMETER_TEST( +GTEST_PARAMETER_TEST ( nntrainerModelAutoTests, nntrainerModelTest, ::testing::ValuesIn( { mkModelIniTc(fc_sigmoid_mse, "3:1:1:10", 10, ModelTestOption::ALL), diff --git a/test/unittest/unittest_nntrainer_quantizer.cpp b/test/unittest/unittest_nntrainer_quantizer.cpp new file mode 100644 index 0000000000..ab4a1b6fd3 --- /dev/null +++ b/test/unittest/unittest_nntrainer_quantizer.cpp @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2024 Donghyeon Jeong + * + * @file unittest_nntrainer_quantizer.cpp + * @date 16 December 2024 + * @brief Unit test utility for quantizer. + * @see https://github.com/nnstreamer/nntrainer + * @author Donghyeon Jeong + * @bug No known bugs + */ +#include + +#include "nntrainer_test_util.h" +#include "util_func.h" +#include +#include +#include +#include + +TEST(nntrainer_Quantizer, per_tensor_affine_01_n) { + nntrainer::Tensor input(3, 2, 4, 5); + input.setRandNormal(1.235f, 0.04f); + + std::unique_ptr quantizer = + nntrainer::Quantization::createQuantizer( + nntrainer::QScheme::PER_TENSOR_AFFINE); + + EXPECT_THROW(quantizer->quantize(input, nntrainer::Tdatatype::FP32), + std::invalid_argument); +} + +TEST(nntrainer_Quantizer, per_tensor_affine_02_n) { + nntrainer::Tensor input(3, 3, 24, 24); + input.setRandNormal(3.812f, 0.15f); + + std::unique_ptr quantizer = + nntrainer::Quantization::createQuantizer( + nntrainer::QScheme::PER_TENSOR_AFFINE); + + nntrainer::Tensor quantized_tensor = + quantizer->quantize(input, nntrainer::Tdatatype::QINT8); + + EXPECT_THROW(quantizer->dequantize(input, nntrainer::Tdatatype::QINT8), + std::invalid_argument); +} + +TEST(nntrainer_Quantizer, per_tensor_affine_03_p) { + float input_data[] = {-0.16924214, -0.10338581, 0.31561565, -0.00533330, + 0.44809300, -0.15348488, 0.14003623, -0.07908171, + -0.21415669, -0.35267806, 0.46354777, -0.35009885, + -0.07760239, -0.28348053, -0.37242615, 0.30941701}; + nntrainer::Tensor input({1, 1, 4, 4}, input_data); + + int8_t qdata[] = {-47, -28, 87, -1, 123, -42, 39, -22, + -59, -97, 127, -96, -21, -78, -102, 85}; + nntrainer::Tensor quant_answer( + {1, 1, 4, 4, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata); + + float output_data[] = {-0.17087643, -0.10179872, 0.31630316, -0.00363567, + 0.44718724, -0.15269808, 0.14179108, -0.07998471, + -0.21450445, -0.35265985, 0.46172991, -0.34902418, + -0.07634904, -0.28358215, -0.37083820, 0.30903184}; + nntrainer::Tensor float_answer({1, 1, 4, 4}, output_data); + + // Per tensor affine quantizer + std::unique_ptr quantizer = + nntrainer::Quantization::createQuantizer( + nntrainer::QScheme::PER_TENSOR_AFFINE); + + // Perform Quantization + nntrainer::Tensor quantized_tensor = + quantizer->quantize(input, nntrainer::Tdatatype::QINT8); + ASSERT_EQ(quantized_tensor, quant_answer); + + // Perform Deuantization + nntrainer::Tensor output = + quantizer->dequantize(quantized_tensor, nntrainer::Tdatatype::FP32); + ASSERT_EQ(output, float_answer); +} + +TEST(nntrainer_Quantizer, per_tensor_affine_04_p) { + float input_data[] = { + -0.29562217, 0.02348283, 0.04334664, 0.03752254, 0.17764580, + 0.04449826, 0.15144463, -0.15716791, -0.07842141, 0.34517670, + 0.16458672, -0.09487095, -0.28020513, 0.32698259, -0.24903688, + -0.33132783, 0.13940062, 0.18400775, -0.26359966, 0.30900121, + 0.08309542, -0.09066082, 0.08950174, -0.29709017, -0.26397359, + -0.16240828, -0.18758762, -0.31878781, 0.06728745, -0.04749811, + 0.16789703, 0.02212419, 0.10671097, -0.28938687, 0.16250020, + -0.09017495, 0.24699482, -0.26789218, 0.16414545, 0.22879964, + -0.15821624, -0.23149055, 0.26526868, -0.11006282, -0.20480227, + 0.29863110, 0.24005184, -0.09062263, 0.22294718, 0.32583672, + -0.10362835, 0.03243832, 0.24707781, 0.27685603, 0.03360258, + -0.00209959, 0.27976128, -0.24468939, -0.19273037, -0.25921509, + -0.20489319, 0.33036807, 0.27226517, -0.25207010}; + nntrainer::Tensor input({1, 1, 8, 8}, input_data); + + int8_t qdata[] = {-109, 9, 16, 14, 66, 16, 56, -58, -29, 127, 61, + -35, -104, 121, -92, -122, 51, 68, -97, 114, 31, -33, + 33, -110, -98, -60, -69, -118, 25, -18, 62, 8, 39, + -107, 60, -33, 91, -99, 61, 85, -58, -86, 98, -41, + -76, 110, 89, -33, 82, 120, -38, 12, 91, 102, 12, + -1, 103, -90, -71, -96, -76, 122, 101, -93}; + nntrainer::Tensor quant_answer( + {1, 1, 8, 8, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata); + + float output_data[] = { + -0.29509223, 0.02436541, 0.04331629, 0.03790175, 0.17867969, + 0.04331629, 0.15160701, -0.15702155, -0.07851078, 0.34382305, + 0.16514336, -0.09475438, -0.28155589, 0.32757944, -0.24906866, + -0.33028671, 0.13807067, 0.18409424, -0.26260501, 0.30862856, + 0.08392531, -0.08933984, 0.08933984, -0.29779950, -0.26531228, + -0.16243608, -0.18680149, -0.31945765, 0.06768170, -0.04873083, + 0.16785063, 0.02165814, 0.10558346, -0.28967768, 0.16243608, + -0.08933984, 0.24636140, -0.26801956, 0.16514336, 0.23011778, + -0.15702155, -0.23282506, 0.26531228, -0.11099799, -0.20575237, + 0.29779950, 0.24094686, -0.08933984, 0.22199598, 0.32487217, + -0.10287619, 0.03248722, 0.24636140, 0.27614135, 0.03248722, + -0.00270727, 0.27884862, -0.24365413, -0.19221604, -0.25989774, + -0.20575237, 0.33028671, 0.27343407, -0.25177592}; + nntrainer::Tensor float_answer({1, 1, 8, 8}, output_data); + + // Per tensor affine quantizer + std::unique_ptr quantizer = + nntrainer::Quantization::createQuantizer( + nntrainer::QScheme::PER_TENSOR_AFFINE); + + // Perform Quantization + nntrainer::Tensor quantized_tensor = + quantizer->quantize(input, nntrainer::Tdatatype::QINT8); + ASSERT_EQ(quantized_tensor, quant_answer); + + // Perform Deuantization + nntrainer::Tensor output = + quantizer->dequantize(quantized_tensor, nntrainer::Tdatatype::FP32); + ASSERT_EQ(output, float_answer); +} + +int main(int argc, char **argv) { + int result = -1; + + try { + testing::InitGoogleTest(&argc, argv); + } catch (...) { + std::cerr << "Error during InitGoogleTest" << std::endl; + return 0; + } + + try { + result = RUN_ALL_TESTS(); + } catch (...) { + std::cerr << "Error during RUN_ALL_TESTS()" << std::endl; + } + + return result; +} diff --git a/third_party/googletest.wrap b/third_party/googletest.wrap new file mode 100644 index 0000000000..ab3f9aa147 --- /dev/null +++ b/third_party/googletest.wrap @@ -0,0 +1,4 @@ +[wrap-git] +url = https://github.com/google/googletest.git +directory=googletest +revision = v1.15.2 \ No newline at end of file diff --git a/third_party/iniparser.wrap b/third_party/iniparser.wrap new file mode 100644 index 0000000000..2110853354 --- /dev/null +++ b/third_party/iniparser.wrap @@ -0,0 +1,4 @@ +[wrap-git] +url = https://github.com/ndevilla/iniparser.git +directory=iniparser +revision = v4.2.4 \ No newline at end of file