nnstreamer · djeong20 · Dec 31, 2024 · EunjuYang · Dec 31, 2024 · djeong20
@@ -64,7 +64,6 @@ CharTensor::CharTensor(
   NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
     << "invalid scale factor size " << scales.size();
 
-  /// @note 4 * scale_size() assumes scale factors are in full-precision fp.
   MemoryData *mem_data = new MemoryData(
     (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]()));
   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
@@ -268,6 +267,56 @@ void CharTensor::initialize(Initializer init) {
   initialize();
 }
 
+int CharTensor::multiply_i(float const &value) {
+  // multiply value to scale factors
+  float *g_scale = (float *)getScale();
+
+  sscal(scale_size(), value, g_scale, 1);
+  return ML_ERROR_NONE;
+}
+
+Tensor &CharTensor::multiply(Tensor const &input, Tensor &output,
+                             const float scale) const {
+  CREATE_IF_EMPTY_DIMS(output, dim, nullptr, q_scheme());
+
+  NNTR_THROW_IF(q_scheme() != input.q_scheme(), std::invalid_argument)
+    << "[Tensor] Cannot multiply tensors with different quantization schemes.";
+
+  /// @note remove after vector scale multiply is implemented
+  NNTR_THROW_IF(q_scheme() != QScheme::PER_TENSOR_AFFINE, std::invalid_argument)
+    << "Multiplication other than per tensor affine quantization scheme is "
+       "NYI.";
+
+  float lhs_scale = *(float *)getScale();
+  float rhs_scale = *input.getScale<float>();
+
+  /// @note current impl assumes pre-established quantization parameters are set
+  /// @todo 1. verify result_scale is valid 2. calculate qparams if not given
+  NNTR_THROW_IF(std::fpclassify(lhs_scale) == FP_ZERO ||
+                  std::fpclassify(rhs_scale) == FP_ZERO ||
+                  std::fpclassify(scale) == FP_ZERO,
+                std::invalid_argument)
+    << "scale factors not set, cannot multiply";
+
+  float multiplier = lhs_scale * rhs_scale / scale;
+
+  int8_t *lhs = (int8_t *)getData();
+  int8_t *rhs = input.getData<int8_t>();
+  int8_t *result = output.getData<int8_t>();
+
+  for (unsigned int i = 0; i < size(); ++i) {
+    int32_t accum_val =
+      static_cast<int32_t>(lhs[i]) * static_cast<int32_t>(rhs[i]);
+
+    result[i] =
+      std::max(-128, std::min((int)std::lround(multiplier * accum_val), 127));
+  }
+
+  *output.getScale<float>() = scale;
+
+  return output;
+}
+
 void CharTensor::copy(const Tensor &from) {
   reshape(from.getDim());
   copy(from.getData());

@@ -195,6 +195,25 @@ class CharTensor : public TensorBase {
    */
   void initialize(Initializer init) override;
 
+  /**
+   * @copydoc Tensor::multiply_i(float const &value)
+   */
+  int multiply_i(float const &value) override;
+
+  /**
+   * @copydoc Tensor::multiply(Tensor const &m, Tensor &output, const
+   * float scale = 0.0)
+   *
+   * @note multiply only works under the following conditions.
+   * 1. appropriate scale must be provided (feature to automatically determine
+   * the scale factor will be added in the future update.)
+   * 2. should have same data type QINT8.
+   * 3. should have same size (broadcasting is currently not supported)
+   * 4. only per-tensor quantization qscheme is supported
+   */
+  Tensor &multiply(Tensor const &m, Tensor &output,
+                   const float scale = 0.0) const override;
+
   /**
    * @copydoc Tensor::copy(const Tensor &from)
    */

@@ -1037,6 +1037,89 @@ TEST(nntrainer_Tensor, multiply_08_n) {
   EXPECT_THROW(input.multiply(test, output), std::invalid_argument);
 }
 
+/**
+ * @brief Test elementwise multiplication of qint8
+ * @note Compare quantized int 8 mutiplication result with float multiplication
+ */
+TEST(nntrainer_Quantizer, multiply_09_p) {
+  size_t batch = 1;
+  size_t channel = 1;
+  size_t height = 4;
+  size_t width = 4;
+
+  // float tensor A and B (original data)
+  float dataA[] = {-0.16924214, -0.10338581, 0.31561565,  -0.00533330,
+                   0.44809300,  -0.15348488, 0.14003623,  -0.07908171,
+                   -0.21415669, -0.35267806, 0.46354777,  -0.35009885,
+                   -0.07760239, -0.28348053, -0.37242615, 0.30941701};
+  nntrainer::Tensor A({batch, channel, height, width}, dataA);
+
+  float dataB[] = {-0.27615008, 0.43723762,  -0.34135219, -0.01534167,
+                   -0.32217509, 0.43340221,  0.11122712,  -0.46792096,
+                   -0.48326263, -0.26464382, 0.48709807,  -0.18793547,
+                   0.02684793,  -0.10355628, 0.06903752,  -0.07670835};
+  nntrainer::Tensor B({batch, channel, height, width}, dataB);
+
+  // quantized tensor qA and qB (quantized data - per tensor affine)
+  std::vector<int8_t> qdataA = {-47, -28, 87,  -1,  123, -42, 39,   -22,
+                                -59, -97, 127, -96, -21, -78, -102, 85};
+  float scaleA = 0.00363567f;
+  int8_t *arrayA = reinterpret_cast<int8_t *>(&scaleA);
+  for (unsigned int i = 0; i < 4; ++i) {
+    qdataA.push_back(arrayA[i]);
+  }
+  nntrainer::Tensor qA({batch, channel, height, width, nntrainer::Tformat::NCHW,
+                        nntrainer::Tdatatype::QINT8},
+                       qdataA.data());
+
+  std::vector<int8_t> qdataB = {-72,  114, -89, -4,  -84, 113, 29, -122,
+                                -126, -69, 127, -49, 7,   -27, 18, -20};
+  float scaleB = 0.0038354177f;
+  int8_t *arrayB = reinterpret_cast<int8_t *>(&scaleB);
+  for (unsigned int i = 0; i < 4; ++i) {
+    qdataB.push_back(arrayB[i]);
+  }
+  nntrainer::Tensor qB({batch, channel, height, width, nntrainer::Tformat::NCHW,
+                        nntrainer::Tdatatype::QINT8},
+                       qdataB.data());
+
+  // output tensors to store result
+  nntrainer::Tensor C(batch, channel, height, width);
+  nntrainer::Tensor qC(batch, channel, height, width, nntrainer::Tformat::NCHW,
+                       nntrainer::Tdatatype::QINT8);
+
+  // perform multiplication
+  EXPECT_NO_THROW(A.multiply(B, C));
+  EXPECT_NO_THROW(qA.multiply(qB, qC, 0.001927134f));
+
+  // compare multiplication result
+  /// @todo change line 1098 - 1104 to clone() after #2834
+  // nntrainer::Tensor dequantizedC = qC.clone(nntrainer::Tdatatype::FP32);
+  nntrainer::Tensor dequantizedC(batch, channel, height, width);
+  float *data = dequantizedC.getData<float>();
+  int8_t *qdata = qC.getData<int8_t>();
+
+  for (unsigned int i = 0; i < dequantizedC.size(); ++i) {
+    data[i] = qdata[i];
+  }
+
+  // dequantize
+  dequantizedC.multiply_i(0.001927134f);
+
+  const float eps = 1e-3;
+
+  for (unsigned int b = 0; b < batch; b++) {
+    for (unsigned c = 0; c < channel; c++) {
+      for (unsigned h = 0; h < height; h++) {
+        for (unsigned w = 0; w < width; w++) {
+          EXPECT_NEAR(C.getValue(b, c, h, w), dequantizedC.getValue(b, c, h, w),
+                      eps);
+        }
+      }
+    }
+  }
+}
+
 TEST(nntrainer_Tensor, multiply_float_01_p) {
   int batch = 3;
   int channel = 1;