[Tizen7.0] Tizen7.0 Backporting

- This commit adds some updates for Tizen7.0 backporting - Type mismatch bug is fixed. - Unused variable is removed. - Missing header files are added in spec file. Self evaluation: Build test: [X]Passed [ ]Failed [ ]Skipped Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Eunju Yang <[email protected]>
nnstreamer · Aug 28, 2024 · 96bb3df · 96bb3df
1 parent 4500005
commit 96bb3df
Show file tree

Hide file tree

Showing 7 changed files with 16 additions and 10 deletions.
diff --git a/meson.build b/meson.build
@@ -88,6 +88,7 @@ if get_option('enable-fp16')
      # comaptible with armv8.0 machines.
      if cxx.has_argument('-mfp16-format=ieee')
        add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
+       add_project_arguments('-march=armv8.2-a+fp16', language: ['c', 'cpp'])
      else
        message ('The compiler does not support -mfp16-format=ieee. However, according to https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Half-Precision.html, gcc may use IEEE fp16 anyway. Thus, we will proceed without the option for FP16 support.')
      endif

diff --git a/meson_options.txt b/meson_options.txt
@@ -40,7 +40,7 @@ option('enable-blas', type: 'boolean', value: true)
 option('enable-fp16', type: 'boolean', value: false)
 option('enable-cublas', type: 'boolean', value: false)
 option('enable-openmp', type: 'boolean', value: true)
-option('enable-neon', type: 'boolean', value: false)
+option('enable-neon', type: 'boolean', value: true)
 option('enable-avx', type: 'boolean', value: false)
 option('enable-opencl', type: 'boolean', value: false)
 

diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp
@@ -541,9 +541,10 @@ bool RunLayerContext::validate(bool skip_input, bool skip_label) {
         } else if (val->getVariableRef().getTensorType().data_type ==
                    TensorDim::DataType::FP16) {
 #ifdef ENABLE_FP16
-          tensor_map[val->getName()] = val->getVariableRef().getData<_FP16>();
+          tensor_map[val->getName()] =
+            val->getVariableRef().template getData<_FP16>();
           tensor_map[val->getGradientName()] =
-            val->getGradientRef().getData<_FP16>();
+            val->getGradientRef().template getData<_FP16>();
 #else
           throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif

diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h
@@ -438,9 +438,6 @@ class RunLayerContext {
       d.setDataType(o_t);
       w = Tensor(d, true);
     }
-    unsigned int o_ax = getWeightObject(idx).getOutputAxis();
-
-    // t_w.dequantize(w, o_ax);
 
     return;
   }

diff --git a/nntrainer/layers/meson.build b/nntrainer/layers/meson.build
@@ -51,6 +51,7 @@ layer_headers = [
   'layer_context.h',
   'layer_devel.h',
   'layer_impl.h',
+  'acti_func.h',
   'common_properties.h',
 ]
 

diff --git a/nntrainer/tensor/hgemm/hgemm_pack.cpp b/nntrainer/tensor/hgemm/hgemm_pack.cpp
@@ -367,10 +367,10 @@ void packing_B8(unsigned int K, unsigned int N, const __fp16 *src,
                 unsigned int ldb, const __fp16 *dst) {
   assert(K != 0 && N != 0 && N % 8 == 0);
 
-  for (int i = 0; i < K; i++) {
+  for (unsigned int i = 0; i < K; i++) {
     const __fp16 *a_off = src + i * ldb;
     __fp16 *b_off = (__fp16 *)dst + i * 8;
-    for (int j = 0; j < N; j += 8) {
+    for (unsigned int j = 0; j < N; j += 8) {
       float16x8_t v = vld1q_f16(a_off);
       a_off += 8;
 
@@ -384,10 +384,10 @@ void packing_B16(unsigned int K, unsigned int N, const __fp16 *src,
                  unsigned int ldb, const __fp16 *dst) {
   assert(K != 0 && N != 0 && N % 16 == 0);
 
-  for (int i = 0; i < K; i++) {
+  for (unsigned int i = 0; i < K; i++) {
     const __fp16 *a_off = src + i * ldb;
     __fp16 *b_off = (__fp16 *)dst + i * 16;
-    for (int j = 0; j < N; j += 16) {
+    for (unsigned int j = 0; j < N; j += 16) {
       float16x8_t v0_7 = vld1q_f16(a_off);
       float16x8_t v8_15 = vld1q_f16(a_off + 8);
       a_off += 16;

diff --git a/packaging/nntrainer.spec b/packaging/nntrainer.spec
@@ -567,8 +567,14 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/
 %{_includedir}/nntrainer/util_simd.h
 %if 0%{?enable_fp16}
 %{_includedir}/nntrainer/util_simd_neon.h
+%{_includedir}/nntrainer/blas_neon.h
+%{_includedir}/nntrainer/hgemm.h
+%{_includedir}/nntrainer/hgemm_util.h
 %endif
 
+%{_includedir}/nntrainer/acti_func.h
+
+
 %files devel-static
 %{_libdir}/libnntrainer*.a
 %exclude %{_libdir}/libcapi*.a