diff --git a/nntrainer/tensor/char_tensor.cpp b/nntrainer/tensor/char_tensor.cpp index ede1802be0..921e8413ea 100644 --- a/nntrainer/tensor/char_tensor.cpp +++ b/nntrainer/tensor/char_tensor.cpp @@ -17,18 +17,18 @@ namespace nntrainer { -CharTensor::CharTensor(std::string name_, Tformat fm) : +CharTensor::CharTensor(std::string name_, Tformat fm, QScheme qscheme_) : TensorBase(name_, fm, Tdatatype::QINT8) {} CharTensor::CharTensor(const TensorDim &d, bool alloc_now, Initializer init, - std::string name) : - TensorBase(d, alloc_now, init, name) { + std::string name, QScheme qscheme_) : + TensorBase(d, alloc_now, init, name), qscheme(qscheme_) { if (alloc_now) allocate(); } -CharTensor::CharTensor(const TensorDim &d, const void *buf) : - CharTensor(d, true) { +CharTensor::CharTensor(const TensorDim &d, const void *buf, QScheme qscheme_) : + CharTensor(d, true, Initializer::NONE, "", qscheme_) { if (d.getDataLen() != 0) { if (buf != nullptr) copy(buf); @@ -37,7 +37,7 @@ CharTensor::CharTensor(const TensorDim &d, const void *buf) : CharTensor::CharTensor( std::vector>>> const &d, - Tformat fm) { + std::vector const &scales, Tformat fm, QScheme qscheme_) { if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { throw std::out_of_range( "[Tensor] trying to initialize CharTensor from empty vector"); @@ -59,9 +59,14 @@ CharTensor::CharTensor( strides = dim.computeStrides(); contiguous = true; initializer = Initializer::NONE; + qscheme = qscheme_; - MemoryData *mem_data = - new MemoryData((void *)(new int8_t[dim.getDataLen()]())); + NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument) + << "invalid scale factor size " << scales.size(); + + /// @note 4 * scale_size() assumes scale factors are in full-precision fp. + MemoryData *mem_data = new MemoryData( + (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]())); data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { delete[] mem_data->getAddr(); }); @@ -84,9 +89,16 @@ CharTensor::CharTensor( for (unsigned int l = 0; l < channel(); ++l) this->setValue(i, l, j, k, d[i][j][k][l]); } + + // copy scale factors + scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1); } bool CharTensor::operator==(const CharTensor &rhs) const { + if (qscheme != rhs.qscheme) + return false; + + // compare quantized data const int8_t *_data = (int8_t *)getData(); const int8_t *_rdata = (int8_t *)rhs.getData(); for (size_t i = 0; i < size(); ++i) { @@ -94,6 +106,14 @@ bool CharTensor::operator==(const CharTensor &rhs) const { return false; } + // compare scale factors + const float *_scales = (float *)getScale(); + const float *_rscales = (float *)rhs.getScale(); + for (size_t i = 0; i < scale_size(); ++i) { + if (_scales[i] != _rscales[i]) + return false; + } + return true; } @@ -109,7 +129,8 @@ void CharTensor::allocate() { /// allocate new memory for the tensor data MemoryData *mem_data; - mem_data = new MemoryData((void *)(new int8_t[dim.getDataLen()]{})); + mem_data = new MemoryData( + (void *)(new int8_t[dim.getDataLen() + 4 * scale_size()]{})); data = std::shared_ptr(mem_data, [](auto *mem_data) { delete[] mem_data->template getAddr(); delete mem_data; @@ -141,6 +162,25 @@ void *CharTensor::getData(size_t idx) const { return data->getAddr() + offset + idx; } +void *CharTensor::getScale() const { + if (!data) + return nullptr; + + data->validate(); + return ((int8_t *)getData()) + size(); +} + +void *CharTensor::getScale(size_t idx) const { + NNTR_THROW_IF(idx > scale_size(), std::invalid_argument) + << "Tensor::getScale() index is not valid"; + + if (!data) + return nullptr; + + data->validate(); + return ((float *)getScale()) + idx; +} + void *CharTensor::getAddress(unsigned int i) { size_t index = getIndex(batch(), channel(), height(), width()); if (i > index) { @@ -349,7 +389,35 @@ void CharTensor::print(std::ostream &out) const { out.copyfmt(init); } - /// @todo print quantization information + /// print quantization information + const float *q_scales = (float *)getScale(); + + if (scale_size() > 50) { + out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' ' + << q_scales[2] << " ... " << q_scales[len - 3] << ' ' + << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl; + return; + } + + out << "Scale factors: "; + for (unsigned i = 0; i < scale_size(); ++i) { + out << q_scales[i] << " "; + } + out << std::endl; +} + +size_t CharTensor::scale_size() const { + switch (qscheme) { + case QScheme::PER_TENSOR_AFFINE: + return 1; + break; + case QScheme::PER_CHANNEL_AFFINE: + return width(); + break; + default: + break; + } + return 0; } void CharTensor::copy(const void *buf) { @@ -360,19 +428,22 @@ void CharTensor::copy(const void *buf) { return; } - /// @todo need to optimize + /// @todo need to optimize after #2834 for (unsigned int i = 0; i < size(); ++i) { ((int8_t *)getData())[i] = ((int8_t *)buf)[i]; } + + float *scales = (float *)(((int8_t *)buf) + size()); + scopy(scale_size(), scales, 1, (float *)getScale(), 1); } void CharTensor::save_quantization_info(std::ostream &file) { - checkedWrite(file, (char *)&axis, sizeof(uint8_t), + checkedWrite(file, (char *)&qscheme, sizeof(uint8_t), "[CharTensor::save] failed to write quantization information"); } void CharTensor::read_quantization_info(std::ifstream &file) { - checkedRead(file, (char *)&axis, sizeof(uint8_t), + checkedRead(file, (char *)&qscheme, sizeof(uint8_t), "[CharTensor::read] failed to read quantization information"); } diff --git a/nntrainer/tensor/char_tensor.h b/nntrainer/tensor/char_tensor.h index f46bb22985..1a76017f00 100644 --- a/nntrainer/tensor/char_tensor.h +++ b/nntrainer/tensor/char_tensor.h @@ -12,6 +12,7 @@ #define __CHAR_TENSOR_H__ #ifdef __cplusplus +#include #include namespace nntrainer { @@ -25,7 +26,8 @@ class CharTensor : public TensorBase { /** * @brief Basic Constructor of Tensor */ - CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW); + CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object @@ -34,27 +36,33 @@ class CharTensor : public TensorBase { * @param alloc_now Allocate memory to this tensor or not * @param init Initializer for the tensor * @param name Name of the tensor + * @param qscheme_ Quantization scheme of the tensor */ CharTensor(const TensorDim &d, bool alloc_now, - Initializer init = Initializer::NONE, std::string name = ""); + Initializer init = Initializer::NONE, std::string name = "", + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object * * @param d Tensor dim for this tensor * @param buf buffer + * @param qscheme_ quantization scheme of the tensor */ - CharTensor(const TensorDim &d, const void *buf = nullptr); + CharTensor(const TensorDim &d, const void *buf = nullptr, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object * * @param d data for the Tensor + * @param scales scale factors for the Tensor * @param fm format for the Tensor + * @param qscheme_ quantization scheme of the tensor */ CharTensor( std::vector>>> const &d, - Tformat fm); + std::vector const &scales, Tformat fm, QScheme qscheme_); /** * @brief Construct a new CharTensor object @@ -101,6 +109,16 @@ class CharTensor : public TensorBase { */ void *getData(size_t idx) const override; + /** + * @copydoc Tensor::getScale() + */ + void *getScale() const override; + + /** + * @copydoc Tensor::getScale(size_t idx) + */ + void *getScale(size_t idx) const override; + /** * @brief i data index * @retval address of ith data @@ -227,11 +245,16 @@ class CharTensor : public TensorBase { */ void read_quantization_info(std::ifstream &file) override; + /** + * @copydoc Tensor::scale_size() + */ + size_t scale_size() const override; + private: /** - * @brief quantization axis + * @brief quantization scheme */ - uint8_t axis; + QScheme qscheme; /** * @brief copy a buffer to @a this, the caller has to ensure that @a this is diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp index 22ef10e0de..3aa50cd2ce 100644 --- a/nntrainer/tensor/quantizer.cpp +++ b/nntrainer/tensor/quantizer.cpp @@ -9,6 +9,7 @@ */ #include +#include namespace nntrainer { diff --git a/nntrainer/tensor/quantizer.h b/nntrainer/tensor/quantizer.h index dcd6a9baed..e073ed8eaa 100644 --- a/nntrainer/tensor/quantizer.h +++ b/nntrainer/tensor/quantizer.h @@ -12,11 +12,16 @@ #define __QUANTIZER_H__ #ifdef __cplusplus -#include +#include +#include #include +#include + namespace nntrainer { +class Tensor; + /** * @brief defines the quantization scheme * @details NNTrainer provides basic quantization schemes (e.g., Per tensor @@ -73,7 +78,8 @@ class Quantizer { * @param input Input tensor * @param qtype quantized data type */ - virtual void calculateQParams(const Tensor &input, Tdatatype qtype) = 0; + virtual void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; public: /** @@ -112,14 +118,16 @@ class Quantizer { * @param[in] input Floating point tensor to quantize * @return Tensor quantized tensor */ - virtual Tensor quantize(const Tensor &input, Tdatatype qtype) = 0; + virtual Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; /** * @brief Dequantize a quantized tensor into a tensor. * @param[in] input Quantized tensor to dequantize * @return Tensor dequantized tensor */ - virtual Tensor dequantize(const Tensor &input, Tdatatype qtype) = 0; + virtual Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; /** * @brief Get quantization Scheme type. @@ -172,12 +180,14 @@ class PerTensorAffineQuantizer : public UniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -191,9 +201,11 @@ class PerTensorAffineQuantizer : public UniformQuantizer { long int quant_max; /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** @@ -220,12 +232,14 @@ class PerChannelAffineQuantizer : public UniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -239,9 +253,11 @@ class PerChannelAffineQuantizer : public UniformQuantizer { long int quant_max; /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** @@ -265,12 +281,14 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -279,9 +297,11 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer { private: /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index b0cbae110d..bcbdc1497b 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -27,9 +27,11 @@ namespace nntrainer { Tensor::Tensor( std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type) { - itensor = std::shared_ptr(new CharTensor(d, t_type.format), - std::default_delete()); + std::vector const &scales, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_) { + itensor = std::shared_ptr( + new CharTensor(d, scales, t_type.format, qscheme_), + std::default_delete()); } Tensor::Tensor( @@ -102,7 +104,7 @@ Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) { } Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, - std::string name) { + std::string name, QScheme qscheme) { itensor = nullptr; if (d.getDataType() == Tdatatype::FP32) { @@ -130,9 +132,9 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, std::shared_ptr(new UInt32Tensor(d, alloc_now, init, name), std::default_delete()); } else if (d.getDataType() == Tdatatype::QINT8) { - itensor = - std::shared_ptr(new CharTensor(d, alloc_now, init, name), - std::default_delete()); + itensor = std::shared_ptr( + new CharTensor(d, alloc_now, init, name, qscheme), + std::default_delete()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = @@ -150,7 +152,7 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, } } -Tensor::Tensor(const TensorDim &d, const void *buf) { +Tensor::Tensor(const TensorDim &d, const void *buf, QScheme qscheme) { itensor = nullptr; if (d.getDataType() == Tdatatype::FP32) { @@ -173,7 +175,7 @@ Tensor::Tensor(const TensorDim &d, const void *buf) { itensor = std::shared_ptr( new UInt32Tensor(d, buf), std::default_delete()); } else if (d.getDataType() == Tdatatype::QINT8) { - itensor = std::shared_ptr(new CharTensor(d, buf), + itensor = std::shared_ptr(new CharTensor(d, buf, qscheme), std::default_delete()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM @@ -1038,6 +1040,7 @@ void Tensor::copy(const Tensor &from) { } if (from.size() != 0 && size() == from.size() && + scale_size() == from.scale_size() && getDataType() == from.getDataType()) { // if tensor size and data type match, copy data itensor->copy(from); diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h index 0e79ff10e6..c97989ec21 100644 --- a/nntrainer/tensor/tensor.h +++ b/nntrainer/tensor/tensor.h @@ -25,6 +25,7 @@ #include #include +#include #include #ifdef ENABLE_FP16 @@ -63,17 +64,21 @@ class Tensor { * @param alloc_now If the memory of the tensor must be allocated * @param init Initializer for the tensor * @param name Name of the tensor + * @param qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(const TensorDim &d, bool alloc_now, - Initializer init = Initializer::NONE, std::string name = ""); + Initializer init = Initializer::NONE, std::string name = "", + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Constructor of Tensor with dimension/buf * @param d Tensor dim for this tensor * @param buf buffer + * @param qscheme_ Quantization scheme (only applies to Quantized Tensor) * @note Memory for this tensor is instantaneously allocated */ - Tensor(const TensorDim &d, const void *buf = nullptr); + Tensor(const TensorDim &d, const void *buf = nullptr, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Constructor of Tensor @@ -83,10 +88,12 @@ class Tensor { * @param[in] d3 Width * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr, qscheme_){}; /** * @brief Constructor of Tensor @@ -95,10 +102,12 @@ class Tensor { * @param[in] d3 Width * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, d1, d2, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, d1, d2, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor with batch size one and d1 size one @@ -106,20 +115,24 @@ class Tensor { * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, 1, d2, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, 1, d2, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ explicit Tensor(size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, 1, 1, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, 1, 1, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor @@ -128,10 +141,12 @@ class Tensor { * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : - Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr){}; + ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr, qscheme_){}; /** * @brief Constructor of Tensor @@ -139,9 +154,11 @@ class Tensor { * @param[in] d2 Height * @param[in] d3 Width * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : + ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, d1, d2, d3, t_type){}; /** @@ -149,19 +166,23 @@ class Tensor { * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ - Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) : + Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, (t_type.format == Tformat::NCHW) ? d2 : 1, - (t_type.format == Tformat::NCHW) ? d3 : d2, t_type){}; + (t_type.format == Tformat::NCHW) ? d3 : d2, t_type, qscheme_){}; /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ - explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type) : + explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1, - (t_type.format == Tformat::NCHW) ? d3 : 1, t_type){}; + (t_type.format == Tformat::NCHW) ? d3 : 1, t_type, qscheme_){}; /** * @brief Constructor of Tensor @@ -312,32 +333,43 @@ class Tensor { Tensor(std::vector::type>{d}, t_type){}; /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type); + std::vector const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_); /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector>> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector::type>{d}, t_type){}; + std::vector const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_) : + Tensor(std::vector::type>{d}, scales, t_type, + qscheme_){}; /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor with batch size one + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector::type>{d}, t_type){}; + std::vector const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_) : + Tensor(std::vector::type>{d}, scales, t_type, + qscheme_){}; /** * @brief Constructor of Tensor by directly assigning TensorBase. diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp index 73cf6bd461..4202d164aa 100644 --- a/test/unittest/unittest_nntrainer_tensor.cpp +++ b/test/unittest/unittest_nntrainer_tensor.cpp @@ -200,12 +200,23 @@ TEST(nntrainer_Tensor, Tensor_04_p) { in.push_back(ttv); } + std::vector scales = {1.349f, 3.135f, 6.196f, 2.105f, 6.125f, + 4.106f, 0.916f, 7.014f, 9.814f, 5.556f}; + nntrainer::Tensor tensor = nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); + in, scales, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + nntrainer::QScheme::PER_CHANNEL_AFFINE); ASSERT_NE(nullptr, tensor.getData(0)); if (tensor.getValue(0, 0, 0, 1) != 1) status = ML_ERROR_INVALID_PARAMETER; + + float *scale_data = tensor.getScale(); + + for (unsigned int idx = 0; idx < scales.size(); ++idx) { + ASSERT_FLOAT_EQ(scale_data[idx], scales[idx]); + } + EXPECT_EQ(status, ML_ERROR_NONE); } @@ -335,9 +346,11 @@ TEST(nntrainer_Tensor, Tensor_08_n) { in.push_back(ttv); } - EXPECT_THROW(nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}), - std::out_of_range); + EXPECT_THROW( + nntrainer::Tensor(in, {3.561f}, + {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + nntrainer::QScheme::PER_TENSOR_AFFINE), + std::out_of_range); } TEST(nntrainer_Tensor, Tensor_09_n) { @@ -3815,7 +3828,7 @@ TEST(nntrainer_Tensor, print_small_size_02) { << " 1 1 \n" << " 1 1 \n" << "\n" - << "-------\n"; + << "-------\nScale factors: 0 \n"; EXPECT_EQ(ss.str(), expected.str()); }