[CharTensor] Enable memory data to store scale factors based on quant…

…ization schemes This pull request aims to modify the existing codebase such that the memory data of CharTensor can now store scale factors based on different quantization schemes. Additionally, this change allows the Tensor class to specify the desired quantization scheme while creating a new CharTensor instance. The scale factors are determined either during the quantization process using a specific quantizer or they can be manually initialized if both the quantized data and the corresponding scale factors are provided as inputs. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong <[email protected]>
nnstreamer · Dec 30, 2024 · a5ec4e6 · a5ec4e6
1 parent 24a868d
commit a5ec4e6
Show file tree

Hide file tree

Showing 7 changed files with 237 additions and 74 deletions.
diff --git a/nntrainer/tensor/char_tensor.cpp b/nntrainer/tensor/char_tensor.cpp
@@ -17,18 +17,18 @@
 
 namespace nntrainer {
 
-CharTensor::CharTensor(std::string name_, Tformat fm) :
+CharTensor::CharTensor(std::string name_, Tformat fm, QScheme qscheme_) :
   TensorBase(name_, fm, Tdatatype::QINT8) {}
 
 CharTensor::CharTensor(const TensorDim &d, bool alloc_now, Initializer init,
-                       std::string name) :
-  TensorBase(d, alloc_now, init, name) {
+                       std::string name, QScheme qscheme_) :
+  TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
   if (alloc_now)
     allocate();
 }
 
-CharTensor::CharTensor(const TensorDim &d, const void *buf) :
-  CharTensor(d, true) {
+CharTensor::CharTensor(const TensorDim &d, const void *buf, QScheme qscheme_) :
+  CharTensor(d, true, Initializer::NONE, "", qscheme_) {
   if (d.getDataLen() != 0) {
     if (buf != nullptr)
       copy(buf);
@@ -37,7 +37,7 @@ CharTensor::CharTensor(const TensorDim &d, const void *buf) :
 
 CharTensor::CharTensor(
   std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
-  Tformat fm) {
+  std::vector<float> const &scales, Tformat fm, QScheme qscheme_) {
   if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
     throw std::out_of_range(
       "[Tensor] trying to initialize CharTensor from empty vector");
@@ -59,9 +59,14 @@ CharTensor::CharTensor(
   strides = dim.computeStrides();
   contiguous = true;
   initializer = Initializer::NONE;
+  qscheme = qscheme_;
 
-  MemoryData *mem_data =
-    new MemoryData((void *)(new int8_t[dim.getDataLen()]()));
+  NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
+    << "invalid scale factor size " << scales.size();
+
+  /// @note 4 * scale_size() assumes scale factors are in full-precision fp.
+  MemoryData *mem_data = new MemoryData(
+    (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]()));
   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
     delete[] mem_data->getAddr<int8_t>();
   });
@@ -84,16 +89,31 @@ CharTensor::CharTensor(
           for (unsigned int l = 0; l < channel(); ++l)
             this->setValue(i, l, j, k, d[i][j][k][l]);
   }
+
+  // copy scale factors
+  scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
 }
 
 bool CharTensor::operator==(const CharTensor &rhs) const {
+  if (qscheme != rhs.qscheme)
+    return false;
+
+  // compare quantized data
   const int8_t *_data = (int8_t *)getData();
   const int8_t *_rdata = (int8_t *)rhs.getData();
   for (size_t i = 0; i < size(); ++i) {
     if (_data[i] != _rdata[i])
       return false;
   }
 
+  // compare scale factors
+  const float *_scales = (float *)getScale();
+  const float *_rscales = (float *)rhs.getScale();
+  for (size_t i = 0; i < scale_size(); ++i) {
+    if (_scales[i] != _rscales[i])
+      return false;
+  }
+
   return true;
 }
 
@@ -109,7 +129,8 @@ void CharTensor::allocate() {
     /// allocate new memory for the tensor data
     MemoryData *mem_data;
 
-    mem_data = new MemoryData((void *)(new int8_t[dim.getDataLen()]{}));
+    mem_data = new MemoryData(
+      (void *)(new int8_t[dim.getDataLen() + 4 * scale_size()]{}));
     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
       delete[] mem_data->template getAddr<int8_t>();
       delete mem_data;
@@ -141,6 +162,25 @@ void *CharTensor::getData(size_t idx) const {
   return data->getAddr<int8_t>() + offset + idx;
 }
 
+void *CharTensor::getScale() const {
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return ((int8_t *)getData()) + size();
+}
+
+void *CharTensor::getScale(size_t idx) const {
+  NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
+    << "Tensor::getScale() index is not valid";
+
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return ((float *)getScale()) + idx;
+}
+
 void *CharTensor::getAddress(unsigned int i) {
   size_t index = getIndex(batch(), channel(), height(), width());
   if (i > index) {
@@ -349,7 +389,35 @@ void CharTensor::print(std::ostream &out) const {
     out.copyfmt(init);
   }
 
-  /// @todo print quantization information
+  /// print quantization information
+  const float *q_scales = (float *)getScale();
+
+  if (scale_size() > 50) {
+    out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
+        << q_scales[2] << " ... " << q_scales[len - 3] << ' '
+        << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
+    return;
+  }
+
+  out << "Scale factors: ";
+  for (unsigned i = 0; i < scale_size(); ++i) {
+    out << q_scales[i] << " ";
+  }
+  out << std::endl;
+}
+
+size_t CharTensor::scale_size() const {
+  switch (qscheme) {
+  case QScheme::PER_TENSOR_AFFINE:
+    return 1;
+    break;
+  case QScheme::PER_CHANNEL_AFFINE:
+    return width();
+    break;
+  default:
+    break;
+  }
+  return 0;
 }
 
 void CharTensor::copy(const void *buf) {
@@ -360,19 +428,22 @@ void CharTensor::copy(const void *buf) {
     return;
   }
 
-  /// @todo need to optimize
+  /// @todo need to optimize after #2834
   for (unsigned int i = 0; i < size(); ++i) {
     ((int8_t *)getData())[i] = ((int8_t *)buf)[i];
   }
+
+  float *scales = (float *)(((int8_t *)buf) + size());
+  scopy(scale_size(), scales, 1, (float *)getScale(), 1);
 }
 
 void CharTensor::save_quantization_info(std::ostream &file) {
-  checkedWrite(file, (char *)&axis, sizeof(uint8_t),
+  checkedWrite(file, (char *)&qscheme, sizeof(uint8_t),
                "[CharTensor::save] failed to write quantization information");
 }
 
 void CharTensor::read_quantization_info(std::ifstream &file) {
-  checkedRead(file, (char *)&axis, sizeof(uint8_t),
+  checkedRead(file, (char *)&qscheme, sizeof(uint8_t),
               "[CharTensor::read] failed to read quantization information");
 }
 

diff --git a/nntrainer/tensor/char_tensor.h b/nntrainer/tensor/char_tensor.h
@@ -12,6 +12,7 @@
 #define __CHAR_TENSOR_H__
 #ifdef __cplusplus
 
+#include <quantizer.h>
 #include <tensor_base.h>
 
 namespace nntrainer {
@@ -25,7 +26,8 @@ class CharTensor : public TensorBase {
   /**
    * @brief     Basic Constructor of Tensor
    */
-  CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW);
+  CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW,
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new CharTensor object
@@ -34,27 +36,33 @@ class CharTensor : public TensorBase {
    * @param alloc_now Allocate memory to this tensor or not
    * @param init Initializer for the tensor
    * @param name Name of the tensor
+   * @param qscheme_ Quantization scheme of the tensor
    */
   CharTensor(const TensorDim &d, bool alloc_now,
-             Initializer init = Initializer::NONE, std::string name = "");
+             Initializer init = Initializer::NONE, std::string name = "",
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new CharTensor object
    *
    * @param d Tensor dim for this tensor
    * @param buf buffer
+   * @param qscheme_ quantization scheme of the tensor
    */
-  CharTensor(const TensorDim &d, const void *buf = nullptr);
+  CharTensor(const TensorDim &d, const void *buf = nullptr,
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new CharTensor object
    *
    * @param d data for the Tensor
+   * @param scales scale factors for the Tensor
    * @param fm format for the Tensor
+   * @param qscheme_ quantization scheme of the tensor
    */
   CharTensor(
     std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
-    Tformat fm);
+    std::vector<float> const &scales, Tformat fm, QScheme qscheme_);
 
   /**
    * @brief Construct a new CharTensor object
@@ -101,6 +109,16 @@ class CharTensor : public TensorBase {
    */
   void *getData(size_t idx) const override;
 
+  /**
+   * @copydoc Tensor::getScale()
+   */
+  void *getScale() const override;
+
+  /**
+   * @copydoc Tensor::getScale(size_t idx)
+   */
+  void *getScale(size_t idx) const override;
+
   /**
    * @brief     i data index
    * @retval    address of ith data
@@ -227,11 +245,16 @@ class CharTensor : public TensorBase {
    */
   void read_quantization_info(std::ifstream &file) override;
 
+  /**
+   * @copydoc Tensor::scale_size()
+   */
+  size_t scale_size() const override;
+
 private:
   /**
-   * @brief quantization axis
+   * @brief quantization scheme
    */
-  uint8_t axis;
+  QScheme qscheme;
 
   /**
    * @brief copy a buffer to @a this, the caller has to ensure that @a this is

diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp
@@ -9,6 +9,7 @@
  */
 
 #include <quantizer.h>
+#include <tensor.h>
 
 namespace nntrainer {
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ @@
      */
     #include <quantizer.h>
+    #include <tensor.h>
     namespace nntrainer {
@@ Expand Down @@