From a5ec4e6f8cae9d49c1fa8074b8a1abb8bcbb9d83 Mon Sep 17 00:00:00 2001 From: Donghyeon Jeong Date: Fri, 27 Dec 2024 09:30:22 +0900 Subject: [PATCH] [CharTensor] Enable memory data to store scale factors based on quantization schemes This pull request aims to modify the existing codebase such that the memory data of CharTensor can now store scale factors based on different quantization schemes. Additionally, this change allows the Tensor class to specify the desired quantization scheme while creating a new CharTensor instance. The scale factors are determined either during the quantization process using a specific quantizer or they can be manually initialized if both the quantized data and the corresponding scale factors are provided as inputs. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong --- nntrainer/tensor/char_tensor.cpp | 97 ++++++++++++++++++--- nntrainer/tensor/char_tensor.h | 35 ++++++-- nntrainer/tensor/quantizer.cpp | 1 + nntrainer/tensor/quantizer.h | 52 +++++++---- nntrainer/tensor/tensor.cpp | 21 +++-- nntrainer/tensor/tensor.h | 82 +++++++++++------ test/unittest/unittest_nntrainer_tensor.cpp | 23 +++-- 7 files changed, 237 insertions(+), 74 deletions(-) diff --git a/nntrainer/tensor/char_tensor.cpp b/nntrainer/tensor/char_tensor.cpp index ede1802be0..921e8413ea 100644 --- a/nntrainer/tensor/char_tensor.cpp +++ b/nntrainer/tensor/char_tensor.cpp @@ -17,18 +17,18 @@ namespace nntrainer { -CharTensor::CharTensor(std::string name_, Tformat fm) : +CharTensor::CharTensor(std::string name_, Tformat fm, QScheme qscheme_) : TensorBase(name_, fm, Tdatatype::QINT8) {} CharTensor::CharTensor(const TensorDim &d, bool alloc_now, Initializer init, - std::string name) : - TensorBase(d, alloc_now, init, name) { + std::string name, QScheme qscheme_) : + TensorBase(d, alloc_now, init, name), qscheme(qscheme_) { if (alloc_now) allocate(); } -CharTensor::CharTensor(const TensorDim &d, const void *buf) : - CharTensor(d, true) { +CharTensor::CharTensor(const TensorDim &d, const void *buf, QScheme qscheme_) : + CharTensor(d, true, Initializer::NONE, "", qscheme_) { if (d.getDataLen() != 0) { if (buf != nullptr) copy(buf); @@ -37,7 +37,7 @@ CharTensor::CharTensor(const TensorDim &d, const void *buf) : CharTensor::CharTensor( std::vector>>> const &d, - Tformat fm) { + std::vector const &scales, Tformat fm, QScheme qscheme_) { if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { throw std::out_of_range( "[Tensor] trying to initialize CharTensor from empty vector"); @@ -59,9 +59,14 @@ CharTensor::CharTensor( strides = dim.computeStrides(); contiguous = true; initializer = Initializer::NONE; + qscheme = qscheme_; - MemoryData *mem_data = - new MemoryData((void *)(new int8_t[dim.getDataLen()]())); + NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument) + << "invalid scale factor size " << scales.size(); + + /// @note 4 * scale_size() assumes scale factors are in full-precision fp. + MemoryData *mem_data = new MemoryData( + (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]())); data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { delete[] mem_data->getAddr(); }); @@ -84,9 +89,16 @@ CharTensor::CharTensor( for (unsigned int l = 0; l < channel(); ++l) this->setValue(i, l, j, k, d[i][j][k][l]); } + + // copy scale factors + scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1); } bool CharTensor::operator==(const CharTensor &rhs) const { + if (qscheme != rhs.qscheme) + return false; + + // compare quantized data const int8_t *_data = (int8_t *)getData(); const int8_t *_rdata = (int8_t *)rhs.getData(); for (size_t i = 0; i < size(); ++i) { @@ -94,6 +106,14 @@ bool CharTensor::operator==(const CharTensor &rhs) const { return false; } + // compare scale factors + const float *_scales = (float *)getScale(); + const float *_rscales = (float *)rhs.getScale(); + for (size_t i = 0; i < scale_size(); ++i) { + if (_scales[i] != _rscales[i]) + return false; + } + return true; } @@ -109,7 +129,8 @@ void CharTensor::allocate() { /// allocate new memory for the tensor data MemoryData *mem_data; - mem_data = new MemoryData((void *)(new int8_t[dim.getDataLen()]{})); + mem_data = new MemoryData( + (void *)(new int8_t[dim.getDataLen() + 4 * scale_size()]{})); data = std::shared_ptr(mem_data, [](auto *mem_data) { delete[] mem_data->template getAddr(); delete mem_data; @@ -141,6 +162,25 @@ void *CharTensor::getData(size_t idx) const { return data->getAddr() + offset + idx; } +void *CharTensor::getScale() const { + if (!data) + return nullptr; + + data->validate(); + return ((int8_t *)getData()) + size(); +} + +void *CharTensor::getScale(size_t idx) const { + NNTR_THROW_IF(idx > scale_size(), std::invalid_argument) + << "Tensor::getScale() index is not valid"; + + if (!data) + return nullptr; + + data->validate(); + return ((float *)getScale()) + idx; +} + void *CharTensor::getAddress(unsigned int i) { size_t index = getIndex(batch(), channel(), height(), width()); if (i > index) { @@ -349,7 +389,35 @@ void CharTensor::print(std::ostream &out) const { out.copyfmt(init); } - /// @todo print quantization information + /// print quantization information + const float *q_scales = (float *)getScale(); + + if (scale_size() > 50) { + out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' ' + << q_scales[2] << " ... " << q_scales[len - 3] << ' ' + << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl; + return; + } + + out << "Scale factors: "; + for (unsigned i = 0; i < scale_size(); ++i) { + out << q_scales[i] << " "; + } + out << std::endl; +} + +size_t CharTensor::scale_size() const { + switch (qscheme) { + case QScheme::PER_TENSOR_AFFINE: + return 1; + break; + case QScheme::PER_CHANNEL_AFFINE: + return width(); + break; + default: + break; + } + return 0; } void CharTensor::copy(const void *buf) { @@ -360,19 +428,22 @@ void CharTensor::copy(const void *buf) { return; } - /// @todo need to optimize + /// @todo need to optimize after #2834 for (unsigned int i = 0; i < size(); ++i) { ((int8_t *)getData())[i] = ((int8_t *)buf)[i]; } + + float *scales = (float *)(((int8_t *)buf) + size()); + scopy(scale_size(), scales, 1, (float *)getScale(), 1); } void CharTensor::save_quantization_info(std::ostream &file) { - checkedWrite(file, (char *)&axis, sizeof(uint8_t), + checkedWrite(file, (char *)&qscheme, sizeof(uint8_t), "[CharTensor::save] failed to write quantization information"); } void CharTensor::read_quantization_info(std::ifstream &file) { - checkedRead(file, (char *)&axis, sizeof(uint8_t), + checkedRead(file, (char *)&qscheme, sizeof(uint8_t), "[CharTensor::read] failed to read quantization information"); } diff --git a/nntrainer/tensor/char_tensor.h b/nntrainer/tensor/char_tensor.h index f46bb22985..1a76017f00 100644 --- a/nntrainer/tensor/char_tensor.h +++ b/nntrainer/tensor/char_tensor.h @@ -12,6 +12,7 @@ #define __CHAR_TENSOR_H__ #ifdef __cplusplus +#include #include namespace nntrainer { @@ -25,7 +26,8 @@ class CharTensor : public TensorBase { /** * @brief Basic Constructor of Tensor */ - CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW); + CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object @@ -34,27 +36,33 @@ class CharTensor : public TensorBase { * @param alloc_now Allocate memory to this tensor or not * @param init Initializer for the tensor * @param name Name of the tensor + * @param qscheme_ Quantization scheme of the tensor */ CharTensor(const TensorDim &d, bool alloc_now, - Initializer init = Initializer::NONE, std::string name = ""); + Initializer init = Initializer::NONE, std::string name = "", + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object * * @param d Tensor dim for this tensor * @param buf buffer + * @param qscheme_ quantization scheme of the tensor */ - CharTensor(const TensorDim &d, const void *buf = nullptr); + CharTensor(const TensorDim &d, const void *buf = nullptr, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object * * @param d data for the Tensor + * @param scales scale factors for the Tensor * @param fm format for the Tensor + * @param qscheme_ quantization scheme of the tensor */ CharTensor( std::vector>>> const &d, - Tformat fm); + std::vector const &scales, Tformat fm, QScheme qscheme_); /** * @brief Construct a new CharTensor object @@ -101,6 +109,16 @@ class CharTensor : public TensorBase { */ void *getData(size_t idx) const override; + /** + * @copydoc Tensor::getScale() + */ + void *getScale() const override; + + /** + * @copydoc Tensor::getScale(size_t idx) + */ + void *getScale(size_t idx) const override; + /** * @brief i data index * @retval address of ith data @@ -227,11 +245,16 @@ class CharTensor : public TensorBase { */ void read_quantization_info(std::ifstream &file) override; + /** + * @copydoc Tensor::scale_size() + */ + size_t scale_size() const override; + private: /** - * @brief quantization axis + * @brief quantization scheme */ - uint8_t axis; + QScheme qscheme; /** * @brief copy a buffer to @a this, the caller has to ensure that @a this is diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp index 22ef10e0de..3aa50cd2ce 100644 --- a/nntrainer/tensor/quantizer.cpp +++ b/nntrainer/tensor/quantizer.cpp @@ -9,6 +9,7 @@ */ #include +#include namespace nntrainer { diff --git a/nntrainer/tensor/quantizer.h b/nntrainer/tensor/quantizer.h index dcd6a9baed..e073ed8eaa 100644 --- a/nntrainer/tensor/quantizer.h +++ b/nntrainer/tensor/quantizer.h @@ -12,11 +12,16 @@ #define __QUANTIZER_H__ #ifdef __cplusplus -#include +#include +#include #include +#include + namespace nntrainer { +class Tensor; + /** * @brief defines the quantization scheme * @details NNTrainer provides basic quantization schemes (e.g., Per tensor @@ -73,7 +78,8 @@ class Quantizer { * @param input Input tensor * @param qtype quantized data type */ - virtual void calculateQParams(const Tensor &input, Tdatatype qtype) = 0; + virtual void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; public: /** @@ -112,14 +118,16 @@ class Quantizer { * @param[in] input Floating point tensor to quantize * @return Tensor quantized tensor */ - virtual Tensor quantize(const Tensor &input, Tdatatype qtype) = 0; + virtual Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; /** * @brief Dequantize a quantized tensor into a tensor. * @param[in] input Quantized tensor to dequantize * @return Tensor dequantized tensor */ - virtual Tensor dequantize(const Tensor &input, Tdatatype qtype) = 0; + virtual Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; /** * @brief Get quantization Scheme type. @@ -172,12 +180,14 @@ class PerTensorAffineQuantizer : public UniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -191,9 +201,11 @@ class PerTensorAffineQuantizer : public UniformQuantizer { long int quant_max; /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** @@ -220,12 +232,14 @@ class PerChannelAffineQuantizer : public UniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -239,9 +253,11 @@ class PerChannelAffineQuantizer : public UniformQuantizer { long int quant_max; /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** @@ -265,12 +281,14 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -279,9 +297,11 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer { private: /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index b0cbae110d..bcbdc1497b 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -27,9 +27,11 @@ namespace nntrainer { Tensor::Tensor( std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type) { - itensor = std::shared_ptr(new CharTensor(d, t_type.format), - std::default_delete()); + std::vector const &scales, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_) { + itensor = std::shared_ptr( + new CharTensor(d, scales, t_type.format, qscheme_), + std::default_delete()); } Tensor::Tensor( @@ -102,7 +104,7 @@ Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) { } Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, - std::string name) { + std::string name, QScheme qscheme) { itensor = nullptr; if (d.getDataType() == Tdatatype::FP32) { @@ -130,9 +132,9 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, std::shared_ptr(new UInt32Tensor(d, alloc_now, init, name), std::default_delete()); } else if (d.getDataType() == Tdatatype::QINT8) { - itensor = - std::shared_ptr(new CharTensor(d, alloc_now, init, name), - std::default_delete()); + itensor = std::shared_ptr( + new CharTensor(d, alloc_now, init, name, qscheme), + std::default_delete()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = @@ -150,7 +152,7 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, } } -Tensor::Tensor(const TensorDim &d, const void *buf) { +Tensor::Tensor(const TensorDim &d, const void *buf, QScheme qscheme) { itensor = nullptr; if (d.getDataType() == Tdatatype::FP32) { @@ -173,7 +175,7 @@ Tensor::Tensor(const TensorDim &d, const void *buf) { itensor = std::shared_ptr( new UInt32Tensor(d, buf), std::default_delete()); } else if (d.getDataType() == Tdatatype::QINT8) { - itensor = std::shared_ptr(new CharTensor(d, buf), + itensor = std::shared_ptr(new CharTensor(d, buf, qscheme), std::default_delete()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM @@ -1038,6 +1040,7 @@ void Tensor::copy(const Tensor &from) { } if (from.size() != 0 && size() == from.size() && + scale_size() == from.scale_size() && getDataType() == from.getDataType()) { // if tensor size and data type match, copy data itensor->copy(from); diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h index 0e79ff10e6..c97989ec21 100644 --- a/nntrainer/tensor/tensor.h +++ b/nntrainer/tensor/tensor.h @@ -25,6 +25,7 @@ #include #include +#include #include #ifdef ENABLE_FP16 @@ -63,17 +64,21 @@ class Tensor { * @param alloc_now If the memory of the tensor must be allocated * @param init Initializer for the tensor * @param name Name of the tensor + * @param qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(const TensorDim &d, bool alloc_now, - Initializer init = Initializer::NONE, std::string name = ""); + Initializer init = Initializer::NONE, std::string name = "", + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Constructor of Tensor with dimension/buf * @param d Tensor dim for this tensor * @param buf buffer + * @param qscheme_ Quantization scheme (only applies to Quantized Tensor) * @note Memory for this tensor is instantaneously allocated */ - Tensor(const TensorDim &d, const void *buf = nullptr); + Tensor(const TensorDim &d, const void *buf = nullptr, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Constructor of Tensor @@ -83,10 +88,12 @@ class Tensor { * @param[in] d3 Width * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr, qscheme_){}; /** * @brief Constructor of Tensor @@ -95,10 +102,12 @@ class Tensor { * @param[in] d3 Width * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, d1, d2, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, d1, d2, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor with batch size one and d1 size one @@ -106,20 +115,24 @@ class Tensor { * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, 1, d2, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, 1, d2, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ explicit Tensor(size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, 1, 1, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, 1, 1, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor @@ -128,10 +141,12 @@ class Tensor { * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : - Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr){}; + ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr, qscheme_){}; /** * @brief Constructor of Tensor @@ -139,9 +154,11 @@ class Tensor { * @param[in] d2 Height * @param[in] d3 Width * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : + ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, d1, d2, d3, t_type){}; /** @@ -149,19 +166,23 @@ class Tensor { * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ - Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) : + Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, (t_type.format == Tformat::NCHW) ? d2 : 1, - (t_type.format == Tformat::NCHW) ? d3 : d2, t_type){}; + (t_type.format == Tformat::NCHW) ? d3 : d2, t_type, qscheme_){}; /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ - explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type) : + explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1, - (t_type.format == Tformat::NCHW) ? d3 : 1, t_type){}; + (t_type.format == Tformat::NCHW) ? d3 : 1, t_type, qscheme_){}; /** * @brief Constructor of Tensor @@ -312,32 +333,43 @@ class Tensor { Tensor(std::vector::type>{d}, t_type){}; /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type); + std::vector const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_); /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector>> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector::type>{d}, t_type){}; + std::vector const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_) : + Tensor(std::vector::type>{d}, scales, t_type, + qscheme_){}; /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor with batch size one + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector::type>{d}, t_type){}; + std::vector const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_) : + Tensor(std::vector::type>{d}, scales, t_type, + qscheme_){}; /** * @brief Constructor of Tensor by directly assigning TensorBase. diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp index 73cf6bd461..4202d164aa 100644 --- a/test/unittest/unittest_nntrainer_tensor.cpp +++ b/test/unittest/unittest_nntrainer_tensor.cpp @@ -200,12 +200,23 @@ TEST(nntrainer_Tensor, Tensor_04_p) { in.push_back(ttv); } + std::vector scales = {1.349f, 3.135f, 6.196f, 2.105f, 6.125f, + 4.106f, 0.916f, 7.014f, 9.814f, 5.556f}; + nntrainer::Tensor tensor = nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); + in, scales, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + nntrainer::QScheme::PER_CHANNEL_AFFINE); ASSERT_NE(nullptr, tensor.getData(0)); if (tensor.getValue(0, 0, 0, 1) != 1) status = ML_ERROR_INVALID_PARAMETER; + + float *scale_data = tensor.getScale(); + + for (unsigned int idx = 0; idx < scales.size(); ++idx) { + ASSERT_FLOAT_EQ(scale_data[idx], scales[idx]); + } + EXPECT_EQ(status, ML_ERROR_NONE); } @@ -335,9 +346,11 @@ TEST(nntrainer_Tensor, Tensor_08_n) { in.push_back(ttv); } - EXPECT_THROW(nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}), - std::out_of_range); + EXPECT_THROW( + nntrainer::Tensor(in, {3.561f}, + {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + nntrainer::QScheme::PER_TENSOR_AFFINE), + std::out_of_range); } TEST(nntrainer_Tensor, Tensor_09_n) { @@ -3815,7 +3828,7 @@ TEST(nntrainer_Tensor, print_small_size_02) { << " 1 1 \n" << " 1 1 \n" << "\n" - << "-------\n"; + << "-------\nScale factors: 0 \n"; EXPECT_EQ(ss.str(), expected.str()); }