diff --git a/nntrainer/tensor/char_tensor.cpp b/nntrainer/tensor/char_tensor.cpp index 921e8413e..2038547d4 100644 --- a/nntrainer/tensor/char_tensor.cpp +++ b/nntrainer/tensor/char_tensor.cpp @@ -64,7 +64,6 @@ CharTensor::CharTensor( NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument) << "invalid scale factor size " << scales.size(); - /// @note 4 * scale_size() assumes scale factors are in full-precision fp. MemoryData *mem_data = new MemoryData( (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]())); data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { @@ -268,6 +267,56 @@ void CharTensor::initialize(Initializer init) { initialize(); } +int CharTensor::multiply_i(float const &value) { + // multiply value to scale factors + float *g_scale = (float *)getScale(); + + sscal(scale_size(), value, g_scale, 1); + return ML_ERROR_NONE; +} + +Tensor &CharTensor::multiply(Tensor const &input, Tensor &output, + const float scale) const { + NNTR_THROW_IF(input.getFormat() != this->getFormat(), std::invalid_argument) + << "Tensor Format of " << getName() << ":" + << ((bool)(this->getFormat()) ? "NHWC" : "NCHW") << " is not match. (" + << ((bool)(input.getFormat()) ? "NHWC" : "NCHW") << ")"; + + NNTR_THROW_IF(!contiguous || !input.getContiguous() || + !output.getContiguous(), + std::invalid_argument) + << getName() << " is not contiguous, cannot multiply"; + + float lhs_scale = *(float *)getScale(); + float rhs_scale = *input.getScale(); + + /// @note current impl assumes pre-established quantization parameters are set + /// @todo 1. verify result_scale is valid 2. calculate qparams if not given + NNTR_THROW_IF(std::fpclassify(lhs_scale) == FP_ZERO || + std::fpclassify(rhs_scale) == FP_ZERO || + std::fpclassify(scale) == FP_ZERO, + std::invalid_argument) + << "scale factors not set, cannot multiply"; + + float multiplier = lhs_scale * rhs_scale / scale; + + int8_t *lhs = (int8_t *)getData(); + int8_t *rhs = input.getData(); + int8_t *result = output.getData(); + + for (unsigned int i = 0; i < size(); ++i) { + int32_t accum_val = + static_cast(lhs[i]) * static_cast(rhs[i]); + + result[i] = + std::max(-128, std::min((int)std::lround(multiplier * accum_val), 127)); + } + + *output.getScale() = scale; + + return output; +} + void CharTensor::copy(const Tensor &from) { reshape(from.getDim()); copy(from.getData()); diff --git a/nntrainer/tensor/char_tensor.h b/nntrainer/tensor/char_tensor.h index 1a76017f0..3ac22fb77 100644 --- a/nntrainer/tensor/char_tensor.h +++ b/nntrainer/tensor/char_tensor.h @@ -195,6 +195,25 @@ class CharTensor : public TensorBase { */ void initialize(Initializer init) override; + /** + * @copydoc Tensor::multiply_i(float const &value) + */ + int multiply_i(float const &value) override; + + /** + * @copydoc Tensor::multiply(Tensor const &m, Tensor &output, const + * float scale = 0.0) + * + * @note multiply only works under the following conditions. + * 1. appropriate scale must be provided (feature to automatically determine + * the scale factor will be added in the future update.) + * 2. should have same data type QINT8. + * 3. should have same size (broadcasting is currently not supported) + * 4. only per-tensor quantization qscheme is supported + */ + Tensor &multiply(Tensor const &m, Tensor &output, + const float scale = 0.0) const override; + /** * @copydoc Tensor::copy(const Tensor &from) */ diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp index 4202d164a..f090a4ffa 100644 --- a/test/unittest/unittest_nntrainer_tensor.cpp +++ b/test/unittest/unittest_nntrainer_tensor.cpp @@ -1037,6 +1037,89 @@ TEST(nntrainer_Tensor, multiply_08_n) { EXPECT_THROW(input.multiply(test, output), std::invalid_argument); } +/** + * @brief Test elementwise multiplication of qint8 + * @note Compare quantized int 8 mutiplication result with float multiplication + */ +TEST(nntrainer_Quantizer, multiply_09_p) { + size_t batch = 1; + size_t channel = 1; + size_t height = 4; + size_t width = 4; + + // float tensor A and B (original data) + float dataA[] = {-0.16924214, -0.10338581, 0.31561565, -0.00533330, + 0.44809300, -0.15348488, 0.14003623, -0.07908171, + -0.21415669, -0.35267806, 0.46354777, -0.35009885, + -0.07760239, -0.28348053, -0.37242615, 0.30941701}; + nntrainer::Tensor A({batch, channel, height, width}, dataA); + + float dataB[] = {-0.27615008, 0.43723762, -0.34135219, -0.01534167, + -0.32217509, 0.43340221, 0.11122712, -0.46792096, + -0.48326263, -0.26464382, 0.48709807, -0.18793547, + 0.02684793, -0.10355628, 0.06903752, -0.07670835}; + nntrainer::Tensor B({batch, channel, height, width}, dataB); + + // quantized tensor qA and qB (quantized data - per tensor affine) + std::vector qdataA = {-47, -28, 87, -1, 123, -42, 39, -22, + -59, -97, 127, -96, -21, -78, -102, 85}; + float scaleA = 0.00363567f; + int8_t *arrayA = reinterpret_cast(&scaleA); + for (unsigned int i = 0; i < 4; ++i) { + qdataA.push_back(arrayA[i]); + } + nntrainer::Tensor qA({batch, channel, height, width, nntrainer::Tformat::NCHW, + nntrainer::Tdatatype::QINT8}, + qdataA.data()); + + std::vector qdataB = {-72, 114, -89, -4, -84, 113, 29, -122, + -126, -69, 127, -49, 7, -27, 18, -20}; + float scaleB = 0.0038354177f; + int8_t *arrayB = reinterpret_cast(&scaleB); + for (unsigned int i = 0; i < 4; ++i) { + qdataB.push_back(arrayB[i]); + } + nntrainer::Tensor qB({batch, channel, height, width, nntrainer::Tformat::NCHW, + nntrainer::Tdatatype::QINT8}, + qdataB.data()); + + // output tensors to store result + nntrainer::Tensor C(batch, channel, height, width); + nntrainer::Tensor qC(batch, channel, height, width, nntrainer::Tformat::NCHW, + nntrainer::Tdatatype::QINT8); + + // perform multiplication + EXPECT_NO_THROW(A.multiply(B, C)); + EXPECT_NO_THROW(qA.multiply(qB, qC, 0.001927134f)); + + // compare multiplication result + /// @todo change line 1098 - 1104 to clone() after #2834 + // nntrainer::Tensor dequantizedC = qC.clone(nntrainer::Tdatatype::FP32); + nntrainer::Tensor dequantizedC(batch, channel, height, width); + float *data = dequantizedC.getData(); + int8_t *qdata = qC.getData(); + + for (unsigned int i = 0; i < dequantizedC.size(); ++i) { + data[i] = qdata[i]; + } + + // dequantize + dequantizedC.multiply_i(0.001927134f); + + const float eps = 1e-3; + + for (unsigned int b = 0; b < batch; b++) { + for (unsigned c = 0; c < channel; c++) { + for (unsigned h = 0; h < height; h++) { + for (unsigned w = 0; w < width; w++) { + EXPECT_NEAR(C.getValue(b, c, h, w), dequantizedC.getValue(b, c, h, w), + eps); + } + } + } + } +} + TEST(nntrainer_Tensor, multiply_float_01_p) { int batch = 3; int channel = 1;