From 31015a298d75fa2bac2580ce7f3de033474051a5 Mon Sep 17 00:00:00 2001 From: sh-mug <52068539+sh-mug@users.noreply.github.com> Date: Thu, 6 Apr 2023 23:44:22 +0900 Subject: [PATCH] implement tanh activation function --- nngen/onnx/__init__.py | 1 + nngen/onnx/act_func.py | 3 + nngen/operator/__init__.py | 1 + nngen/operator/tanh.py | 118 ++++++++ nngen/quantizer/__init__.py | 2 + nngen/quantizer/tanh.py | 8 + nngen/verify/tanh.py | 51 ++++ ...st_matrix_conv2d_int16_3x3_stride1_tanh.py | 88 ++++++ ...st_matrix_conv2d_int32_3x3_stride1_tanh.py | 88 ++++++ ...est_matrix_conv2d_int8_3x3_stride1_tanh.py | 88 ++++++ ...nx_matrix_conv2d_tanh_int16_3x3_stride1.py | 67 +++++ ...nx_matrix_conv2d_tanh_int32_3x3_stride1.py | 67 +++++ ...nnx_matrix_conv2d_tanh_int8_3x3_stride1.py | 67 +++++ tests/onnx_matrix_tanh/onnx_matrix_tanh.py | 284 ++++++++++++++++++ .../test_onnx_matrix_tanh_int16.py | 49 +++ .../test_onnx_matrix_tanh_int32.py | 49 +++ .../test_onnx_matrix_tanh_int8.py | 49 +++ 17 files changed, 1080 insertions(+) create mode 100644 nngen/operator/tanh.py create mode 100644 nngen/quantizer/tanh.py create mode 100644 nngen/verify/tanh.py create mode 100644 tests/matrix_conv2d/test_matrix_conv2d_int16_3x3_stride1_tanh.py create mode 100644 tests/matrix_conv2d/test_matrix_conv2d_int32_3x3_stride1_tanh.py create mode 100644 tests/matrix_conv2d/test_matrix_conv2d_int8_3x3_stride1_tanh.py create mode 100644 tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int16_3x3_stride1.py create mode 100644 tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int32_3x3_stride1.py create mode 100644 tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int8_3x3_stride1.py create mode 100644 tests/onnx_matrix_tanh/onnx_matrix_tanh.py create mode 100644 tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int16.py create mode 100644 tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int32.py create mode 100644 tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int8.py diff --git a/nngen/onnx/__init__.py b/nngen/onnx/__init__.py index fd870978..cc9c26df 100644 --- a/nngen/onnx/__init__.py +++ b/nngen/onnx/__init__.py @@ -53,6 +53,7 @@ 'Relu': act_func.Relu, 'LeakyRelu': act_func.LeakyRelu, 'Sigmoid': act_func.Sigmoid, + 'Tanh': act_func.Tanh, 'BatchNormalization': batchnormalization.BatchNormalization, 'Shape': shape.Shape, 'Reshape': reshape.Reshape, diff --git a/nngen/onnx/act_func.py b/nngen/onnx/act_func.py index efefa79e..1ea05b07 100644 --- a/nngen/onnx/act_func.py +++ b/nngen/onnx/act_func.py @@ -67,3 +67,6 @@ def LeakyRelu(visitor, node): def Sigmoid(visitor, node): return _act_func(operator.sigmoid, visitor, node) + +def Tanh(visitor, node): + return _act_func(operator.tanh, visitor, node) diff --git a/nngen/operator/__init__.py b/nngen/operator/__init__.py index e351f588..6a08468c 100644 --- a/nngen/operator/__init__.py +++ b/nngen/operator/__init__.py @@ -7,6 +7,7 @@ from .relu import relu, relu6 from .leaky_relu import leaky_relu, get_leaky_relu_op, leaky_relu_base from .sigmoid import sigmoid +from .tanh import tanh from .matmul import matmul from .conv2d import conv2d from .log_weight_conv2d import log_weight_conv2d diff --git a/nngen/operator/tanh.py b/nngen/operator/tanh.py new file mode 100644 index 00000000..c979eceb --- /dev/null +++ b/nngen/operator/tanh.py @@ -0,0 +1,118 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import functools +import math +import numpy as np +from collections import OrderedDict + +import nngen.basic_types as bt +from nngen.quantizer import util + +class tanh(bt._ActFuncOperator): + + def __init__(self, features, + lut_addrwidth=8, lut_clip=6.0, range_rate=0.95, + dtype=None, name=None, par=1): + + shape = None + if features.dtype is not None and features.dtype.width < 8: + lut_addrwidth = features.dtype.width + + self.lut_addrwidth = lut_addrwidth + self.lut_clip = lut_clip + self.range_rate = range_rate + bt._ActFuncOperator.__init__(self, features, + dtype=dtype, shape=shape, name=name, par=par) + + def _get_expected_scale_factor(self): + return (2 ** (self.lut_addrwidth - 1)) / self.lut_clip + + def _get_features_scale_shamt(self): + expected_scale_factor = self._get_expected_scale_factor() + + features_scale = np.array([expected_scale_factor / self.args[0].scale_factor]) + q_features_scale, scale_factor = util.quantize_linear_scale(features_scale, 32) + q_features_scale = int(q_features_scale[0]) + q_features_shamt = round(math.log(scale_factor, 2)) + return q_features_scale, q_features_shamt + + def get_local_control_param_values(self): + q_features_scale, q_features_shamt = self._get_features_scale_shamt() + return OrderedDict([('features_scale_cparam', q_features_scale), + ('features_shamt_cparam', q_features_shamt)]) + + def get_stream_hash(self): + base = bt._ActFuncOperator.get_stream_hash(self) + return (base, self.lut_addrwidth, self.lut_clip, self.range_rate) + + def op(self, strm, *args, **kwargs): + features_signed = self.args[0].get_signed() + + features_scale = strm.ReinterpretCast(self.features_scale_cparam, + width=self.features_scale_cparam.width + 1, + signed=features_signed) + mul = strm.Times(args[0], features_scale) + mul.width = mul.width + features_scale.width + + features_shamt = strm.ReinterpretCast(self.features_shamt_cparam, + width=self.features_shamt_cparam.width, + signed=False) + sra = strm.Sra(mul, features_shamt) + lut_addr = strm.Slice(sra, self.lut_addrwidth - 1, 0) + + out_width = self.dtype.width + out_point = self.dtype.point + out_signed = self.dtype.signed + if out_signed: + out_scale = round((2 ** (out_width - 1)) * self.range_rate) + else: + out_scale = round((2 ** out_width) * self.range_rate) + + def _tanh(x): + return int((np.tanh(x) * out_scale).astype(np.int64)) + + addr_scale = 1 / self._get_expected_scale_factor() + patterns_p = [_tanh(i * addr_scale) + for i in range(2 ** (self.lut_addrwidth - 1))] + patterns_n = [_tanh((-i - 1) * addr_scale) + for i in range(2 ** (self.lut_addrwidth - 1))] + patterns_n.reverse() + + patterns = patterns_p + patterns_n + + lut = strm.LUT(lut_addr, patterns, out_width, out_point, out_signed) + + p_th = 2 ** (self.lut_addrwidth - 1) - 1 + n_th = -1 * p_th + + if out_point == 0: + th_scale = out_scale + elif out_point > 0: + th_scale = out_scale >> out_point + else: + th_scale = out_scale << (-1 * out_point) + + p = strm.Mux(sra > p_th, th_scale, lut) + n = strm.Mux(sra < n_th, 0, lut) + out = strm.Mux(sra >= 0, p, n) + + return out + + def get_eval_method(self): + import nngen.verify as verify + + name = self.__class__.__name__ + method = getattr(verify, name, None) + + features_scale, features_shamt = self._get_features_scale_shamt() + + method = functools.partial(method, + lut_addrwidth=self.lut_addrwidth, + lut_clip=self.lut_clip, + range_rate=self.range_rate, + features_dtype=self.args[0].dtype, + features_scale=features_scale, + features_shamt=features_shamt) + return method diff --git a/nngen/quantizer/__init__.py b/nngen/quantizer/__init__.py index dd095580..2d44d35a 100644 --- a/nngen/quantizer/__init__.py +++ b/nngen/quantizer/__init__.py @@ -12,6 +12,7 @@ from . import matmul from . import normalize from . import sigmoid +from . import tanh from . import exp from . import reduce @@ -26,6 +27,7 @@ 'scaled_multiply': normalize.scaled_multiply, 'scaled_div': normalize.scaled_div, 'sigmoid': sigmoid.sigmoid, + 'tanh': tanh.tanh, 'exp': exp.exp, 'argmax': reduce.argmax, 'argmin': reduce.argmin, diff --git a/nngen/quantizer/tanh.py b/nngen/quantizer/tanh.py new file mode 100644 index 00000000..5025957d --- /dev/null +++ b/nngen/quantizer/tanh.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +from . import sigmoid + +def tanh(visitor, node): + sigmoid(visitor, node) diff --git a/nngen/verify/tanh.py b/nngen/verify/tanh.py new file mode 100644 index 00000000..7d3dc6ca --- /dev/null +++ b/nngen/verify/tanh.py @@ -0,0 +1,51 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import numpy as np + + +def tanh(features, + lut_addrwidth=8, lut_clip=6.0, range_rate=0.95, + dtype=None, name=None, par=1, + features_dtype=None, features_scale=1, features_shamt=0): + + features_point = 0 if features_dtype is None else features_dtype.point + out_point = 0 if dtype is None else dtype.point + out_shift = out_point - features_point + + mul = features * features_scale + sra = mul >> features_shamt + + if dtype is None: + raise ValueError('tanh requires dtype to determine the value range.') + + out_width = dtype.width + out_point = dtype.point + out_signed = dtype.signed + if out_signed: + out_scale = round((2 ** (out_width - 1)) * range_rate) + else: + out_scale = round((2 ** out_width) * range_rate) + + def _tanh(x): + return (np.tanh(x) * out_scale).astype(np.int64) + + addr_scale = lut_clip / (2 ** (lut_addrwidth - 1)) + lut = _tanh(sra * addr_scale) + + p_th = 2 ** (lut_addrwidth - 1) - 1 + n_th = -1 * p_th + + if out_point == 0: + th_scale = out_scale + elif out_point > 0: + th_scale = out_scale >> out_point + else: + th_scale = out_scale << (-1 * out_point) + + p = np.where(sra > p_th, th_scale, lut) + n = np.where(sra < n_th, 0, lut) + out = np.where(sra >= 0, p, n) + + return out diff --git a/tests/matrix_conv2d/test_matrix_conv2d_int16_3x3_stride1_tanh.py b/tests/matrix_conv2d/test_matrix_conv2d_int16_3x3_stride1_tanh.py new file mode 100644 index 00000000..41308621 --- /dev/null +++ b/tests/matrix_conv2d/test_matrix_conv2d_int16_3x3_stride1_tanh.py @@ -0,0 +1,88 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import matrix_conv2d + + +act_shape = (1, 7, 7, 15) +weight_shape = (7, 3, 3, 15) +bias_shape = None +scale_shape = None +act_dtype = ng.int16 +weight_dtype = ng.int16 +bias_dtype = ng.int32 +scale_dtype = ng.int16 +out_dtype = ng.int16 +stride = (1, 1, 1, 1) +rshift_mul = None +rshift_sum = None +rshift_out = None +act_func = ng.tanh +par_ich = 1 +par_och = 1 +par_col = 1 +par_row = 1 +concur_och = None +stationary = 'filter' +input_ram_size = None +filter_ram_size = None +bias_ram_size = None +scale_ram_size = None +out_ram_size = None +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = matrix_conv2d.run(act_shape, weight_shape, + bias_shape, scale_shape, + act_dtype, weight_dtype, + bias_dtype, scale_dtype, + out_dtype, + stride, + rshift_mul, rshift_sum, rshift_out, + act_func, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + input_ram_size, filter_ram_size, + bias_ram_size, scale_ram_size, + out_ram_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = matrix_conv2d.run(act_shape, weight_shape, + bias_shape, scale_shape, + act_dtype, weight_dtype, + bias_dtype, scale_dtype, + out_dtype, + stride, + rshift_mul, rshift_sum, rshift_out, + act_func, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + input_ram_size, filter_ram_size, + bias_ram_size, scale_ram_size, + out_ram_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/matrix_conv2d/test_matrix_conv2d_int32_3x3_stride1_tanh.py b/tests/matrix_conv2d/test_matrix_conv2d_int32_3x3_stride1_tanh.py new file mode 100644 index 00000000..3437e930 --- /dev/null +++ b/tests/matrix_conv2d/test_matrix_conv2d_int32_3x3_stride1_tanh.py @@ -0,0 +1,88 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import matrix_conv2d + + +act_shape = (1, 7, 7, 15) +weight_shape = (7, 3, 3, 15) +bias_shape = None +scale_shape = None +act_dtype = ng.int32 +weight_dtype = ng.int32 +bias_dtype = ng.int32 +scale_dtype = ng.int32 +out_dtype = ng.int32 +stride = (1, 1, 1, 1) +rshift_mul = None +rshift_sum = None +rshift_out = None +act_func = ng.tanh +par_ich = 1 +par_och = 1 +par_col = 1 +par_row = 1 +concur_och = None +stationary = 'filter' +input_ram_size = None +filter_ram_size = None +bias_ram_size = None +scale_ram_size = None +out_ram_size = None +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = matrix_conv2d.run(act_shape, weight_shape, + bias_shape, scale_shape, + act_dtype, weight_dtype, + bias_dtype, scale_dtype, + out_dtype, + stride, + rshift_mul, rshift_sum, rshift_out, + act_func, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + input_ram_size, filter_ram_size, + bias_ram_size, scale_ram_size, + out_ram_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = matrix_conv2d.run(act_shape, weight_shape, + bias_shape, scale_shape, + act_dtype, weight_dtype, + bias_dtype, scale_dtype, + out_dtype, + stride, + rshift_mul, rshift_sum, rshift_out, + act_func, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + input_ram_size, filter_ram_size, + bias_ram_size, scale_ram_size, + out_ram_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/matrix_conv2d/test_matrix_conv2d_int8_3x3_stride1_tanh.py b/tests/matrix_conv2d/test_matrix_conv2d_int8_3x3_stride1_tanh.py new file mode 100644 index 00000000..a1855fe4 --- /dev/null +++ b/tests/matrix_conv2d/test_matrix_conv2d_int8_3x3_stride1_tanh.py @@ -0,0 +1,88 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import matrix_conv2d + + +act_shape = (1, 7, 7, 15) +weight_shape = (7, 3, 3, 15) +bias_shape = None +scale_shape = None +act_dtype = ng.int8 +weight_dtype = ng.int8 +bias_dtype = ng.int32 +scale_dtype = ng.int8 +out_dtype = ng.int8 +stride = (1, 1, 1, 1) +rshift_mul = None +rshift_sum = None +rshift_out = None +act_func = ng.tanh +par_ich = 1 +par_och = 1 +par_col = 1 +par_row = 1 +concur_och = None +stationary = 'filter' +input_ram_size = None +filter_ram_size = None +bias_ram_size = None +scale_ram_size = None +out_ram_size = None +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = matrix_conv2d.run(act_shape, weight_shape, + bias_shape, scale_shape, + act_dtype, weight_dtype, + bias_dtype, scale_dtype, + out_dtype, + stride, + rshift_mul, rshift_sum, rshift_out, + act_func, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + input_ram_size, filter_ram_size, + bias_ram_size, scale_ram_size, + out_ram_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = matrix_conv2d.run(act_shape, weight_shape, + bias_shape, scale_shape, + act_dtype, weight_dtype, + bias_dtype, scale_dtype, + out_dtype, + stride, + rshift_mul, rshift_sum, rshift_out, + act_func, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + input_ram_size, filter_ram_size, + bias_ram_size, scale_ram_size, + out_ram_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int16_3x3_stride1.py b/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int16_3x3_stride1.py new file mode 100644 index 00000000..6d7a3137 --- /dev/null +++ b/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int16_3x3_stride1.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import onnx_matrix_conv2d + + +act_shape = (1, 7, 7, 3) +weight_shape = (9, 3, 3, 3) +act_dtype = ng.int16 +weight_dtype = ng.int16 +stride = 1 +padding = 0 +with_batchnorm = False +act_func = 'Sigmoid' +disable_fusion = False +par_ich = 1 +par_och = 1 +par_col = 1 +par_row = 1 +concur_och = None +stationary = 'filter' +chunk_size = 64 +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = onnx_matrix_conv2d.run(act_shape, weight_shape, + act_dtype, weight_dtype, + stride, padding, + with_batchnorm, act_func, disable_fusion, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + chunk_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = onnx_matrix_conv2d.run(act_shape, weight_shape, + act_dtype, weight_dtype, + stride, padding, + with_batchnorm, act_func, disable_fusion, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + chunk_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int32_3x3_stride1.py b/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int32_3x3_stride1.py new file mode 100644 index 00000000..e93dbf87 --- /dev/null +++ b/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int32_3x3_stride1.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import onnx_matrix_conv2d + + +act_shape = (1, 7, 7, 3) +weight_shape = (9, 3, 3, 3) +act_dtype = ng.int32 +weight_dtype = ng.int32 +stride = 1 +padding = 0 +with_batchnorm = False +act_func = 'Sigmoid' +disable_fusion = False +par_ich = 1 +par_och = 1 +par_col = 1 +par_row = 1 +concur_och = None +stationary = 'filter' +chunk_size = 64 +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = onnx_matrix_conv2d.run(act_shape, weight_shape, + act_dtype, weight_dtype, + stride, padding, + with_batchnorm, act_func, disable_fusion, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + chunk_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = onnx_matrix_conv2d.run(act_shape, weight_shape, + act_dtype, weight_dtype, + stride, padding, + with_batchnorm, act_func, disable_fusion, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + chunk_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int8_3x3_stride1.py b/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int8_3x3_stride1.py new file mode 100644 index 00000000..3ae43372 --- /dev/null +++ b/tests/onnx_matrix_conv2d/test_onnx_matrix_conv2d_tanh_int8_3x3_stride1.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import onnx_matrix_conv2d + + +act_shape = (1, 7, 7, 3) +weight_shape = (9, 3, 3, 3) +act_dtype = ng.int8 +weight_dtype = ng.int8 +stride = 1 +padding = 0 +with_batchnorm = False +act_func = 'Sigmoid' +disable_fusion = False +par_ich = 1 +par_och = 1 +par_col = 1 +par_row = 1 +concur_och = None +stationary = 'filter' +chunk_size = 64 +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = onnx_matrix_conv2d.run(act_shape, weight_shape, + act_dtype, weight_dtype, + stride, padding, + with_batchnorm, act_func, disable_fusion, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + chunk_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = onnx_matrix_conv2d.run(act_shape, weight_shape, + act_dtype, weight_dtype, + stride, padding, + with_batchnorm, act_func, disable_fusion, + par_ich, par_och, par_col, par_row, + concur_och, stationary, + chunk_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/onnx_matrix_tanh/onnx_matrix_tanh.py b/tests/onnx_matrix_tanh/onnx_matrix_tanh.py new file mode 100644 index 00000000..9430a89a --- /dev/null +++ b/tests/onnx_matrix_tanh/onnx_matrix_tanh.py @@ -0,0 +1,284 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys +import functools +import math +import numpy as np + +import torch +import torchvision +import torchvision.transforms as transforms +import torch.nn as nn +import torch.nn.functional as F +import torch.autograd + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng + +from veriloggen import * +import veriloggen.thread as vthread +import veriloggen.types.axi as axi + + +class MatrixSigmoid(nn.Module): + def __init__(self): + super(MatrixSigmoid, self).__init__() + self.tanh = nn.Sigmoid() + + def forward(self, x): + z = self.tanh(x) + return z + + +def run(act_shape=(15, 31), + act_dtype=ng.int32, + par=1, + chunk_size=64, + axi_datawidth=32, silent=False, + filename=None, simtype='iverilog', outputfile=None): + + # pytorch model + model = MatrixSigmoid() + + # Pytorch to ONNX + onnx_filename = 'onnx_matrix_tanh.onnx' + dummy_input = torch.randn(*act_shape) + input_names = ['act'] + output_names = ['out'] + model.eval() + torch.onnx.export(model, dummy_input, onnx_filename, + input_names=input_names, output_names=output_names) + + # -------------------- + # (1) Represent a DNN model as a dataflow by NNgen operators + # -------------------- + + # ONNX to NNgen + dtypes = {'act': act_dtype, + 'out': act_dtype} + + (outputs, placeholders, variables, + constants, operators) = ng.from_onnx(onnx_filename, + value_dtypes=dtypes, + default_placeholder_dtype=act_dtype, + default_variable_dtype=ng.int32, + default_constant_dtype=ng.int32, + default_operator_dtype=act_dtype, + default_scale_dtype=ng.int32, + default_bias_dtype=ng.int32, + disable_fusion=False) + + # -------------------- + # (2) Assign quantized weights to the NNgen operators + # -------------------- + + if act_dtype.width > 8: + act_scale_factor = 128 + else: + act_scale_factor = int(round(2 ** (act_dtype.width - 1) * 0.5)) + + input_scale_factors = {'act': act_scale_factor} + + ng.quantize(outputs, input_scale_factors) + + # -------------------- + # (3) Assign hardware attributes + # -------------------- + + for op in operators.values(): + if isinstance(op, ng.tanh): + op.attribute(par=par) + + # -------------------- + # (4) Verify the DNN model behavior by executing the NNgen dataflow as a software + # -------------------- + + act = placeholders['act'] + out = outputs['out'] + + # verification data + # random data + #std = 0.2 + #mean = 0.5 + std = 3.0 + mean = 0.0 + img = np.random.normal(size=act.length).astype(np.float32).reshape(act.shape) + img = img * std + mean + + # execution on pytorch + model_input = img + + if act.perm is not None: + model_input = np.transpose(model_input, act.reversed_perm) + + model.eval() + model_out = model(torch.from_numpy(model_input)).detach().numpy() + if act.perm is not None and len(model_out.shape) == len(act.shape): + model_out = np.transpose(model_out, act.perm) + scaled_model_out = model_out * out.scale_factor + + # software-based verification + vact = img * act_scale_factor + vact = np.clip(vact, + -1.0 * (2 ** (act.dtype.width - 1) - 1), + 1.0 * (2 ** (act.dtype.width - 1) - 1)) + vact = np.round(vact).astype(np.int64) + + eval_outs = ng.eval([out], act=vact) + vout = eval_outs[0] + + mean_square_error = np.sum((vout - scaled_model_out) ** 2) / vout.size + corrcoef = np.corrcoef(model_out.reshape([-1]), vout.reshape([-1])) + + # breakpoint() + + # -------------------- + # (5) Convert the NNgen dataflow to a hardware description (Verilog HDL and IP-XACT) + # -------------------- + + targ = ng.to_veriloggen([out], 'onnx_matrix_tanh', silent=silent, + config={'maxi_datawidth': axi_datawidth}) + + # -------------------- + # (6) Simulate the generated hardware by Veriloggen and Verilog simulator + # -------------------- + + if simtype is None: + sys.exit() + + # to memory image + param_data = ng.export_ndarray([out]) + param_bytes = len(param_data) + + variable_addr = int(math.ceil((act.addr + act.memory_size) / chunk_size)) * chunk_size + check_addr = int(math.ceil((variable_addr + param_bytes) / chunk_size)) * chunk_size + tmp_addr = int(math.ceil((check_addr + out.memory_size) / chunk_size)) * chunk_size + + memimg_datawidth = 32 + mem = np.zeros([1024 * 1024 * 8 // (memimg_datawidth // 8)], dtype=np.int64) + mem = mem + [100] + + # placeholder + axi.set_memory(mem, vact, memimg_datawidth, + act_dtype.width, act.addr, + max(int(math.ceil(axi_datawidth / act_dtype.width)), par)) + + # parameters (variable and constant) + axi.set_memory(mem, param_data, memimg_datawidth, + 8, variable_addr) + + # verification data + axi.set_memory(mem, vout, memimg_datawidth, + act_dtype.width, check_addr, + max(int(math.ceil(axi_datawidth / act_dtype.width)), par)) + + # test controller + m = Module('test') + params = m.copy_params(targ) + ports = m.copy_sim_ports(targ) + clk = ports['CLK'] + resetn = ports['RESETN'] + rst = m.Wire('RST') + rst.assign(Not(resetn)) + + # AXI memory model + if outputfile is None: + outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out' + + memimg_name = 'memimg_' + outputfile + + memory = axi.AxiMemoryModel(m, 'memory', clk, rst, + datawidth=axi_datawidth, + memimg=mem, memimg_name=memimg_name, + memimg_datawidth=memimg_datawidth) + memory.connect(ports, 'maxi') + + # AXI-Slave controller + _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True) + _saxi.connect(ports, 'saxi') + + # timer + time_counter = m.Reg('time_counter', 32, initval=0) + seq = Seq(m, 'seq', clk, rst) + seq( + time_counter.inc() + ) + + num_rep = functools.reduce(lambda x, y: x * y, out.shape[:-1], 1) + + def ctrl(): + for i in range(100): + pass + + ng.sim.set_global_addrs(_saxi, tmp_addr) + + start_time = time_counter.value + ng.sim.start(_saxi) + + print('# start') + + ng.sim.wait(_saxi) + end_time = time_counter.value + + print('# end') + print('# execution cycles: %d' % (end_time - start_time)) + + # verify + ok = True + for i in range(num_rep): + for j in range(out.shape[-1]): + orig = memory.read_word(i * out.aligned_shape[-1] + j, + out.addr, act_dtype.width) + check = memory.read_word(i * out.aligned_shape[-1] + j, + check_addr, act_dtype.width) + + if vthread.verilog.NotEql(orig, check): + print('NG', i, j, orig, check) + ok = False + # else: + # print('OK', i, j, orig, check) + + if ok: + print('# verify: PASSED') + else: + print('# verify: FAILED') + + vthread.finish() + + th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl) + fsm = th.start() + + uut = m.Instance(targ, 'uut', + params=m.connect_params(targ), + ports=m.connect_ports(targ)) + + # simulation.setup_waveform(m, uut) + simulation.setup_clock(m, clk, hperiod=5) + init = simulation.setup_reset(m, resetn, m.make_reset(), period=100, polarity='low') + + init.add( + Delay(1000000), + Systask('finish'), + ) + + # output source code + if filename is not None: + m.to_verilog(filename) + + # run simulation + sim = simulation.Simulator(m, sim=simtype) + rslt = sim.run(outputfile=outputfile) + lines = rslt.splitlines() + if simtype == 'verilator' and lines[-1].startswith('-'): + rslt = '\n'.join(lines[:-1]) + return rslt + + +if __name__ == '__main__': + rslt = run(silent=False, filename='tmp.v') + print(rslt) diff --git a/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int16.py b/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int16.py new file mode 100644 index 00000000..810ae56b --- /dev/null +++ b/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int16.py @@ -0,0 +1,49 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import onnx_matrix_tanh + + +act_shape = (15, 31) +act_dtype = ng.int16 +par = 1 +chunk_size = 64 +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = onnx_matrix_tanh.run(act_shape, + act_dtype, + par, + chunk_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = onnx_matrix_tanh.run(act_shape, + act_dtype, + par, + chunk_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int32.py b/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int32.py new file mode 100644 index 00000000..5d9e1212 --- /dev/null +++ b/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int32.py @@ -0,0 +1,49 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import onnx_matrix_tanh + + +act_shape = (15, 31) +act_dtype = ng.int32 +par = 1 +chunk_size = 64 +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = onnx_matrix_tanh.run(act_shape, + act_dtype, + par, + chunk_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = onnx_matrix_tanh.run(act_shape, + act_dtype, + par, + chunk_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt) diff --git a/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int8.py b/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int8.py new file mode 100644 index 00000000..40f335c8 --- /dev/null +++ b/tests/onnx_matrix_tanh/test_onnx_matrix_tanh_int8.py @@ -0,0 +1,49 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# the next line can be removed after installation +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))) + +import nngen as ng +import veriloggen + +import onnx_matrix_tanh + + +act_shape = (15, 31) +act_dtype = ng.int8 +par = 1 +chunk_size = 64 +axi_datawidth = 32 + + +def test(request, silent=True): + veriloggen.reset() + + simtype = request.config.getoption('--sim') + + rslt = onnx_matrix_tanh.run(act_shape, + act_dtype, + par, + chunk_size, + axi_datawidth, silent, + filename=None, simtype=simtype, + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + + verify_rslt = rslt.splitlines()[-1] + assert(verify_rslt == '# verify: PASSED') + + +if __name__ == '__main__': + rslt = onnx_matrix_tanh.run(act_shape, + act_dtype, + par, + chunk_size, + axi_datawidth, silent=False, + filename='tmp.v', + outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out') + print(rslt)