Skip to content

Commit

Permalink
[Tizen7.0] Tizen7.0 Backporting
Browse files Browse the repository at this point in the history
- This commit adds some updates for Tizen7.0 backporting
- Type mismatch bug is fixed.
- Unused variable is removed.
- Missing header files are added in spec file.

Self evaluation:

Build test: [X]Passed [ ]Failed [ ]Skipped
Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Eunju Yang <[email protected]>
  • Loading branch information
EunjuYang committed Aug 28, 2024
1 parent 4500005 commit 96bb3df
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 10 deletions.
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ if get_option('enable-fp16')
# comaptible with armv8.0 machines.
if cxx.has_argument('-mfp16-format=ieee')
add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
add_project_arguments('-march=armv8.2-a+fp16', language: ['c', 'cpp'])
else
message ('The compiler does not support -mfp16-format=ieee. However, according to https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Half-Precision.html, gcc may use IEEE fp16 anyway. Thus, we will proceed without the option for FP16 support.')
endif
Expand Down
2 changes: 1 addition & 1 deletion meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ option('enable-blas', type: 'boolean', value: true)
option('enable-fp16', type: 'boolean', value: false)
option('enable-cublas', type: 'boolean', value: false)
option('enable-openmp', type: 'boolean', value: true)
option('enable-neon', type: 'boolean', value: false)
option('enable-neon', type: 'boolean', value: true)
option('enable-avx', type: 'boolean', value: false)
option('enable-opencl', type: 'boolean', value: false)

Expand Down
5 changes: 3 additions & 2 deletions nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,9 +541,10 @@ bool RunLayerContext::validate(bool skip_input, bool skip_label) {
} else if (val->getVariableRef().getTensorType().data_type ==
TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
tensor_map[val->getName()] = val->getVariableRef().getData<_FP16>();
tensor_map[val->getName()] =
val->getVariableRef().template getData<_FP16>();
tensor_map[val->getGradientName()] =
val->getGradientRef().getData<_FP16>();
val->getGradientRef().template getData<_FP16>();
#else
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
Expand Down
3 changes: 0 additions & 3 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,6 @@ class RunLayerContext {
d.setDataType(o_t);
w = Tensor(d, true);
}
unsigned int o_ax = getWeightObject(idx).getOutputAxis();

// t_w.dequantize(w, o_ax);

return;
}
Expand Down
1 change: 1 addition & 0 deletions nntrainer/layers/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ layer_headers = [
'layer_context.h',
'layer_devel.h',
'layer_impl.h',
'acti_func.h',
'common_properties.h',
]

Expand Down
8 changes: 4 additions & 4 deletions nntrainer/tensor/hgemm/hgemm_pack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,10 @@ void packing_B8(unsigned int K, unsigned int N, const __fp16 *src,
unsigned int ldb, const __fp16 *dst) {
assert(K != 0 && N != 0 && N % 8 == 0);

for (int i = 0; i < K; i++) {
for (unsigned int i = 0; i < K; i++) {
const __fp16 *a_off = src + i * ldb;
__fp16 *b_off = (__fp16 *)dst + i * 8;
for (int j = 0; j < N; j += 8) {
for (unsigned int j = 0; j < N; j += 8) {
float16x8_t v = vld1q_f16(a_off);
a_off += 8;

Expand All @@ -384,10 +384,10 @@ void packing_B16(unsigned int K, unsigned int N, const __fp16 *src,
unsigned int ldb, const __fp16 *dst) {
assert(K != 0 && N != 0 && N % 16 == 0);

for (int i = 0; i < K; i++) {
for (unsigned int i = 0; i < K; i++) {
const __fp16 *a_off = src + i * ldb;
__fp16 *b_off = (__fp16 *)dst + i * 16;
for (int j = 0; j < N; j += 16) {
for (unsigned int j = 0; j < N; j += 16) {
float16x8_t v0_7 = vld1q_f16(a_off);
float16x8_t v8_15 = vld1q_f16(a_off + 8);
a_off += 16;
Expand Down
6 changes: 6 additions & 0 deletions packaging/nntrainer.spec
Original file line number Diff line number Diff line change
Expand Up @@ -567,8 +567,14 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/
%{_includedir}/nntrainer/util_simd.h
%if 0%{?enable_fp16}
%{_includedir}/nntrainer/util_simd_neon.h
%{_includedir}/nntrainer/blas_neon.h
%{_includedir}/nntrainer/hgemm.h
%{_includedir}/nntrainer/hgemm_util.h
%endif

%{_includedir}/nntrainer/acti_func.h


%files devel-static
%{_libdir}/libnntrainer*.a
%exclude %{_libdir}/libcapi*.a
Expand Down

0 comments on commit 96bb3df

Please sign in to comment.