diff --git a/debian/nntrainer-dev.install b/debian/nntrainer-dev.install index 314f509db..423d3b806 100644 --- a/debian/nntrainer-dev.install +++ b/debian/nntrainer-dev.install @@ -10,6 +10,7 @@ /usr/include/nntrainer/memory_data.h /usr/include/nntrainer/tensor.h /usr/include/nntrainer/tensor_base.h +/usr/include/nntrainer/int4_tensor.h /usr/include/nntrainer/char_tensor.h /usr/include/nntrainer/short_tensor.h /usr/include/nntrainer/uint_tensor.h diff --git a/nntrainer/tensor/int4_tensor.cpp b/nntrainer/tensor/int4_tensor.cpp new file mode 100644 index 000000000..4066833f4 --- /dev/null +++ b/nntrainer/tensor/int4_tensor.cpp @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * @file int4_tensor.cpp + * @date 23 January 2025 + * @brief This is Int4QTensor class for quantized 4-bit integer calculation + * @see https://github.com/nnstreamer/nntrainer + * @author Donghyeon Jeong + * @bug No known bugs except for NYI items + */ + +#include +#include + +#include +#include +#include + +namespace nntrainer { + +Int4QTensor::Int4QTensor(std::string name_, Tformat fm, QScheme qscheme_) : + TensorBase(name_, fm, Tdatatype::QINT4) {} + +Int4QTensor::Int4QTensor(const TensorDim &d, bool alloc_now, Initializer init, + std::string name, QScheme qscheme_) : + TensorBase(d, alloc_now, init, name), qscheme(qscheme_) { + if (alloc_now) + allocate(); +} + +Int4QTensor::Int4QTensor(const TensorDim &d, const void *buf, + QScheme qscheme_) : + Int4QTensor(d, true, Initializer::NONE, "", qscheme_) { + if (d.getDataLen() != 0) { + if (buf != nullptr) + copy(buf); + } +} + +Int4QTensor::Int4QTensor( + std::vector>>> const &d, + std::vector const &scales, Tformat fm, QScheme qscheme_) : + qscheme(qscheme_) { + if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { + throw std::out_of_range( + "[Tensor] trying to initialize Int4QTensor from empty vector"); + } + + NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument) + << "invalid scale factor size " << scales.size(); + + dim.setTensorDim(0, d.size()); + if (fm == Tformat::NCHW) { + dim.setTensorDim(1, d[0].size()); + dim.setTensorDim(2, d[0][0].size()); + dim.setTensorDim(3, d[0][0][0].size()); + } else { + dim.setTensorDim(2, d[0].size()); + dim.setTensorDim(3, d[0][0].size()); + dim.setTensorDim(1, d[0][0][0].size()); + } + + dim.setTensorType({fm, Tdatatype::QINT4}); + + strides = dim.computeStrides(); + contiguous = true; + initializer = Initializer::NONE; + qscheme = qscheme_; + + /// @note sizeof(float) * scale_size() assumes scale factors are in + /// full-precision fp. + MemoryData *mem_data = + new MemoryData((void *)(new int8_t[(dim.getDataLen() + 1) / 2 + + sizeof(float) * scale_size()]())); + data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { + delete[] mem_data->getAddr(); + }); + + offset = 0; + + if (fm == Tformat::NCHW) { + for (unsigned int i = 0; i < batch(); ++i) + for (unsigned int j = 0; j < channel(); ++j) + for (unsigned int k = 0; k < height(); ++k) + for (unsigned int l = 0; l < width(); ++l) + this->setValue(i, j, k, l, d[i][j][k][l]); + } else { + for (unsigned int i = 0; i < batch(); ++i) + for (unsigned int j = 0; j < height(); ++j) + for (unsigned int k = 0; k < width(); ++k) + for (unsigned int l = 0; l < channel(); ++l) + this->setValue(i, l, j, k, d[i][j][k][l]); + } + + // copy scale factors + scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1); +} + +bool Int4QTensor::operator==(const Int4QTensor &rhs) const { + if (qscheme != rhs.qscheme) + return false; + + // compare quantized data + const int8_t *_data = (int8_t *)getData(); + const int8_t *_rdata = (int8_t *)rhs.getData(); + for (size_t i = 0; i < (size() + 1) / 2; ++i) { + if (_data[i] != _rdata[i]) + return false; + } + + // compare scale factors + const float *_scales = (float *)getScale(); + const float *_rscales = (float *)rhs.getScale(); + for (size_t i = 0; i < scale_size(); ++i) { + if (std::fabs(_scales[i] - _rscales[i]) > 1e-5) + return false; + } + + return true; +} + +void Int4QTensor::allocate() { + if (empty() || data) + return; + + if (src_tensor) { + /// allocate data based on the source tensor + allocateSrcTensor(); + /** as this memory is shared, do NOT initialize */ + } else { + /// allocate new memory for the tensor data + MemoryData *mem_data; + + /// quantized 4-bit is stored as a 8-bit signed integer (int4x2) + mem_data = + new MemoryData((void *)(new int8_t[(dim.getDataLen() + 1) / 2 + + sizeof(float) * scale_size()]{})); + data = std::shared_ptr(mem_data, [](auto *mem_data) { + delete[] mem_data->template getAddr(); + delete mem_data; + }); + + offset = 0; + initialize(); + } +} + +void Int4QTensor::deallocate() { + data = nullptr; + offset = 0; +} + +void *Int4QTensor::getData() const { + if (!data) + return nullptr; + + data->validate(); + return data->getAddr() + offset; +} + +void *Int4QTensor::getData(size_t idx) const { + if (!data) + return nullptr; + + data->validate(); + return data->getAddr() + offset + (idx / 2); +} + +void *Int4QTensor::getScale() const { + if (!data) + return nullptr; + + data->validate(); + return ((int8_t *)getData()) + (size() + 1) / 2; +} + +void *Int4QTensor::getScale(size_t idx) const { + NNTR_THROW_IF(idx > scale_size(), std::invalid_argument) + << "Tensor::getScale() index is not valid"; + + if (!data) + return nullptr; + + data->validate(); + return ((float *)getScale()) + idx; +} + +void *Int4QTensor::getAddress(unsigned int i) { + size_t index = getIndex(batch(), channel(), height(), width()); + if (i > index) { + return nullptr; + } + return &((int8_t *)getData())[i / 2]; +} + +const void *Int4QTensor::getAddress(unsigned int i) const { + size_t index = getIndex(batch(), channel(), height(), width()); + if (i > index) { + return nullptr; + } + return &((int8_t *)getData())[i / 2]; +} + +const int8_t Int4QTensor::getValue(unsigned int i) const { + int8_t value = ((int8_t *)getData())[i / 2]; + return (i % 2 == 0) ? value >> 4 : ((int8_t)(value << 4) >> 4); +} + +int8_t Int4QTensor::getValue(unsigned int i) { + int8_t value = ((int8_t *)getData())[i / 2]; + return (i % 2 == 0) ? value >> 4 : ((int8_t)(value << 4) >> 4); +} + +const int8_t Int4QTensor::getValue(unsigned int b, unsigned int c, + unsigned int h, unsigned int w) const { + return getValue(getIndex(b, c, h, w)); +} + +int8_t Int4QTensor::getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) { + return getValue(getIndex(b, c, h, w)); +} + +/// @todo this func should be template function +void Int4QTensor::setValue(float value) { + NNTR_THROW_IF(value < -8 || value > 7, std::out_of_range) + << "Value must be in range [-8, 7]. Input value: " << value; + + int8_t val = value; + int8_t *data = (int8_t *)getData(); + std::fill(data, data + (size() + 1) / 2, (val << 4) | (val & 0x0f)); +} + +/// @todo this func should be template function +void Int4QTensor::addValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w, float value, float beta) { + auto const &idx = getIndex(b, c, h, w); + float output = getValue(idx); + output *= beta; + output += value; + + // if result value is out of range, clamp to max/min value + int8_t val = std::trunc(std::clamp((int)output, -8, 7)); + + // encode result value to int8 data + ((int8_t *)getData())[idx / 2] = + (idx % 2 == 0) ? (val << 4) | (((int8_t *)getData())[idx / 2] & 0x0f) + : (((int8_t *)getData())[idx / 2] << 4) | (val & 0x0f); +} + +/// @todo this func should be template function +void Int4QTensor::setValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w, float value) { + NNTR_THROW_IF(value < -8 || value > 7, std::out_of_range) + << "Value must be in range [-8, 7]. Input value: " << value; + + auto const &idx = getIndex(b, c, h, w); + int8_t val = value; + + ((int8_t *)getData())[idx / 2] = + (idx % 2 == 0) ? (val << 4) | ((int8_t *)getData())[idx / 2] + : ((int8_t *)getData())[idx / 2] | (val & 0x0f); +} + +void Int4QTensor::setZero() { + /// @todo accelerate with SIMD + setValue(0); +} + +void Int4QTensor::initialize() { + if (empty() || !isAllocated()) + return; + + /// @note Sampling from the normal/uniform distribution is invalid + switch (initializer) { + case Initializer::ZEROS: + setZero(); + break; + case Initializer::ONES: + setValue(1.0f); + break; + case Initializer::NONE: + break; + default: + throw std::invalid_argument( + "Initializer other than zero and one is not valid for " + + getStringDataType()); + break; + } + + putData(); +} + +void Int4QTensor::initialize(Initializer init) { + initializer = init; + initialize(); +} + +void Int4QTensor::copy(const Tensor &from) { + reshape(from.getDim()); + copy(from.getData()); +} + +void Int4QTensor::copyData(const Tensor &from) { + NNTR_THROW_IF(!contiguous, std::invalid_argument) + << getName() << " is not contiguous, cannot copy."; + + NNTR_THROW_IF(size() != from.size(), std::invalid_argument) + << "Size of the tensor to copy must match."; + + /// @todo support copy from float32 & float16 to int8 data + switch (from.getDataType()) { + case ml::train::TensorDim::DataType::QINT4: + copy(from.getData()); + break; + default: + throw std::invalid_argument("Error: Unsupported data type"); + break; + } +} + +void Int4QTensor::copy_with_stride(const Tensor &input, Tensor &output) { + for (unsigned int b = 0; b < output.batch(); ++b) { + for (unsigned int c = 0; c < output.channel(); ++c) { + for (unsigned int h = 0; h < output.height(); ++h) { + for (unsigned int w = 0; w < output.width(); ++w) { + output.setValue(b, c, h, w, input.getValue(b, c, h, w)); + } + } + } + } +} + +std::vector Int4QTensor::argmax() const { + std::vector result; + const int8_t *data = (int8_t *)getData(); + size_t batch_size = batch(); + size_t feature_len = dim.getFeatureLen(); + result.resize(batch_size); + + for (unsigned int b = 0; b < batch_size; ++b) { + int8_t curr_val, max_val = -8; + unsigned int max_element_idx; + for (unsigned int idx = 0; idx < feature_len; ++idx) { + curr_val = getValue(idx + b * feature_len); + + if (curr_val > max_val) { + max_val = curr_val; + max_element_idx = idx; + } + } + result[b] = max_element_idx; + } + return result; +} + +float Int4QTensor::max_abs() const { + int8_t abs_max_val = 0; + int8_t curr_val; + for (unsigned int idx = 0; idx < size(); ++idx) { + curr_val = std::abs(getValue(idx)); + abs_max_val = (curr_val > abs_max_val) ? curr_val : abs_max_val; + + // Terminate search when abs_max_val is an Int4 absolute max value 8 + if (abs_max_val == 8) + return abs_max_val; + } + + return abs_max_val; +} + +float Int4QTensor::maxValue() const { + int8_t max_val = -8; + int8_t curr_val; + for (unsigned int idx = 0; idx < size(); ++idx) { + curr_val = getValue(idx); + max_val = (curr_val > max_val) ? curr_val : max_val; + + // Terminate search when max_val is an Int4 max value 7 + if (max_val == 7) + return max_val; + } + + return max_val; +} + +float Int4QTensor::minValue() const { + int8_t min_val = 7; + int8_t curr_val; + for (unsigned int idx = 0; idx < size(); ++idx) { + curr_val = getValue(idx); + min_val = (curr_val < min_val) ? curr_val : min_val; + + // Terminate search when min_val is an Int4 min value -8 + if (min_val == -8) + return min_val; + } + + return min_val; +} + +void Int4QTensor::print(std::ostream &out) const { + const int8_t *data = (int8_t *)getData(); + unsigned int len = size(); + out << "data addr: " << reinterpret_cast(data) << '\n'; + out << dim; + + if (len > 100) { + out << '[' << (int)getValue(0) << ' ' << (int)getValue(1) << ' ' + << (int)getValue(2) << " ... " << (int)getValue(len - 3) << ' ' + << (int)getValue(len - 2) << ' ' << (int)getValue(len - 1) << ']' + << std::endl; + return; + } + + std::ios init(NULL); + init.copyfmt(out); + if (getFormat() == Tformat::NCHW) { + for (unsigned int k = 0; k < batch(); k++) { + for (unsigned int l = 0; l < channel(); l++) { + for (unsigned int i = 0; i < height(); i++) { + for (unsigned int j = 0; j < width(); j++) { + out << std::setw(10) << (int)this->getValue(k, l, i, j) << " "; + } + out << std::endl; + } + out << std::endl; + } + out << "-------" << std::endl; + } + } else { + for (unsigned int k = 0; k < batch(); k++) { + for (unsigned int i = 0; i < height(); i++) { + for (unsigned int j = 0; j < width(); j++) { + for (unsigned int l = 0; l < channel(); l++) { + out << std::setw(10) << (int)this->getValue(k, l, i, j) << " "; + } + out << std::endl; + } + out << std::endl; + } + out << "-------" << std::endl; + } + out.copyfmt(init); + } + + /// print quantization information + const float *q_scales = (float *)getScale(); + + if (scale_size() > 50) { + out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' ' + << q_scales[2] << " ... " << q_scales[len - 3] << ' ' + << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl; + return; + } + + out << "Scale factors: "; + for (unsigned i = 0; i < scale_size(); ++i) { + out << q_scales[i] << " "; + } + out << std::endl; +} + +size_t Int4QTensor::scale_size() const { + switch (qscheme) { + case QScheme::PER_TENSOR_AFFINE: + return 1; + break; + case QScheme::PER_CHANNEL_AFFINE: + return height(); + break; + default: + break; + } + return 0; +} + +QScheme Int4QTensor::q_scheme() const { return qscheme; } + +void Int4QTensor::copy(const void *buf) { + NNTR_THROW_IF(!contiguous, std::invalid_argument) + << getName() << " is not contiguous, cannot copy."; + + if (buf == getData()) { + return; + } + // copy tensor data + scopy((size() + 1) / 2, (int8_t *)buf, 1, (int8_t *)getData(), 1); + + // copy scale factor data + float *scales = (float *)(((int8_t *)buf) + (size() + 1) / 2); + scopy(scale_size(), scales, 1, (float *)getScale(), 1); +} + +void Int4QTensor::save_quantization_info(std::ostream &file) { + checkedWrite(file, (char *)&qscheme, sizeof(uint8_t), + "[Int4QTensor::save] failed to write quantization information"); +} + +void Int4QTensor::read_quantization_info(std::ifstream &file) { + checkedRead(file, (char *)&qscheme, sizeof(uint8_t), + "[Int4QTensor::read] failed to read quantization information"); +} + +} // namespace nntrainer diff --git a/nntrainer/tensor/int4_tensor.h b/nntrainer/tensor/int4_tensor.h new file mode 100644 index 000000000..d55f097e2 --- /dev/null +++ b/nntrainer/tensor/int4_tensor.h @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * @file int4_tensor.h + * @date 23 January 2025 + * @brief This is Int4QTensor class for quantized 4-bit integer calculation + * @see https://github.com/nnstreamer/nntrainer + * @author Donghyeon Jeong + * @bug No known bugs except for NYI items + */ + +#ifndef __INT4_TENSOR_H__ +#define __INT4_TENSOR_H__ +#ifdef __cplusplus + +#include +#include + +namespace nntrainer { + +/** + * @class Int4QTensor class + * @brief Int4QTensor class for quantized 4-bit integer calculation + * + * @note Int4QTensor store int4 data within the int8 memory space. + * Specifically, each int8 value contains two int4 values packed together. + * The first four bits represent the first int4 value, while the last four bits + * represent the second int4 value. + * E.g., 01011001 (89) represents 0101 (+5) and 1001 (-1) + */ +class Int4QTensor : public TensorBase { +public: + /** + * @brief Basic Constructor of Tensor + */ + Int4QTensor(std::string name_ = "", Tformat fm = Tformat::NCHW, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); + + /** + * @brief Construct a new Int4QTensor object + * + * @param d Tensor dim for this qint4 tensor + * @param alloc_now Allocate memory to this tensor or not + * @param init Initializer for the tensor + * @param name Name of the tensor + * @param qscheme_ Quantization scheme of the tensor + */ + Int4QTensor(const TensorDim &d, bool alloc_now, + Initializer init = Initializer::NONE, std::string name = "", + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); + + /** + * @brief Construct a new Int4QTensor object + * + * @param d Tensor dim for this tensor + * @param buf buffer + * @param qscheme_ quantization scheme of the tensor + */ + Int4QTensor(const TensorDim &d, const void *buf = nullptr, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); + + /** + * @brief Construct a new Int4QTensor object + * + * @param d data for the Tensor + * @param scales scale factors for the Tensor + * @param fm format for the Tensor + * @param qscheme_ quantization scheme of the tensor + */ + Int4QTensor( + std::vector>>> const &d, + std::vector const &scales, Tformat fm, QScheme qscheme_); + + /** + * @brief Construct a new Int4QTensor object + * @param rhs TensorBase object to copy + */ + Int4QTensor(TensorBase &rhs) : TensorBase(rhs) {} + + /** + * @brief Basic Destructor + */ + ~Int4QTensor() {} + + /** + * @brief Comparison operator overload + * @param[in] rhs Tensor to be compared with + */ + bool operator==(const Int4QTensor &rhs) const; + + /** + * @brief Comparison operator overload + * @param[in] rhs Tensor to be compared with + */ + bool operator!=(const Int4QTensor &rhs) const { return !(*this == rhs); } + + /** + * @copydoc Tensor::allocate() + */ + void allocate() override; + + /** + * @copydoc Tensor::deallocate() + */ + void deallocate() override; + + /** + * @copydoc Tensor::getData() + */ + void *getData() const override; + + /** + * @copydoc Tensor::getData(size_t idx) + */ + void *getData(size_t idx) const override; + + /** + * @copydoc Tensor::getScale() + */ + void *getScale() const override; + + /** + * @copydoc Tensor::getScale(size_t idx) + */ + void *getScale(size_t idx) const override; + + /** + * @brief i data index + * @retval address of ith data + */ + void *getAddress(unsigned int i) override; + + /** + * @brief i data index + * @retval address of ith data + */ + const void *getAddress(unsigned int i) const override; + + /** + * @brief return value at specific location + * @param[in] i index + */ + const int8_t getValue(unsigned int i) const; + + /** + * @brief return value at specific location + * @param[in] i index + */ + int8_t getValue(unsigned int i); + + /** + * @brief return value at specific location + * @param[in] b batch location + * @param[in] c channel location + * @param[in] h height location + * @param[in] w width location + */ + const int8_t getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) const; + + /** + * @brief return value at specific location + * @param[in] b batch location + * @param[in] c channel location + * @param[in] h height location + * @param[in] w width location + */ + int8_t getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w); + + /** + * @copydoc Tensor::setValue(float value) + */ + void setValue(float value) override; + + /** + * @copydoc Tensor::setValue(b, c, h, w, value) + */ + void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, + float value) override; + + /** + * @copydoc Tensor::addValue(b, c, h, w, value, beta) + */ + void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, + float value, float beta) override; + + /** + * @copydoc Tensor::setZero() + */ + void setZero() override; + + /** + * @copydoc Tensor::initialize() + */ + void initialize() override; + + /** + * @copydoc Tensor::initialize(Initializer init) + */ + void initialize(Initializer init) override; + + /** + * @copydoc Tensor::copy(const Tensor &from) + */ + void copy(const Tensor &from) override; + + /** + * @copydoc Tensor::copyData(const Tensor &from) + */ + void copyData(const Tensor &from) override; + + /** + * @copydoc Tensor::copy_with_stride() + */ + void copy_with_stride(const Tensor &input, Tensor &output) override; + + /** + * @copydoc Tensor::argmax() + */ + std::vector argmax() const override; + + /** + * @copydoc Tensor::max_abs() + */ + float max_abs() const override; + + /** + * @copydoc Tensor::maxValue() + */ + float maxValue() const override; + + /** + * @copydoc Tensor::minValue() + */ + float minValue() const override; + + /** + * @copydoc Tensor::print(std::ostream &out) + */ + void print(std::ostream &out) const override; + + /** + * @copydoc TensorBase::save_quantization_info() + */ + void save_quantization_info(std::ostream &file) override; + + /** + * @copydoc TensorBase::read_quantization_info() + */ + void read_quantization_info(std::ifstream &file) override; + + /** + * @copydoc Tensor::scale_size() + */ + size_t scale_size() const override; + + /** + * @copydoc Tensor::q_scheme() + */ + QScheme q_scheme() const; + +private: + /** + * @brief quantization scheme + */ + QScheme qscheme; + + /** + * @brief copy a buffer to @a this, the caller has to ensure that @a this is + * initialized otherwise undefined behavior + * + * @param buf buffer to copy from + */ + void copy(const void *buf); + + /** + * @brief Get the Data Type String object + * @return std::string of tensor data type (QINT4) + */ + std::string getStringDataType() const override { return "QINT4"; } + + /** + * @copydoc Tensor::isValid() + */ + bool isValid() const override { return true; }; +}; + +} // namespace nntrainer + +#endif /* __cplusplus */ +#endif /* __INT4_TENSOR_H__ */ diff --git a/nntrainer/tensor/meson.build b/nntrainer/tensor/meson.build index 721e58157..a358f43fb 100644 --- a/nntrainer/tensor/meson.build +++ b/nntrainer/tensor/meson.build @@ -8,6 +8,7 @@ tensor_sources = [ 'tensor.cpp', 'tensor_base.cpp', 'float_tensor.cpp', + 'int4_tensor.cpp', 'char_tensor.cpp', 'short_tensor.cpp', 'tensor_dim.cpp', @@ -29,6 +30,7 @@ tensor_headers = [ 'tensor.h', 'tensor_base.h', 'float_tensor.h', + 'int4_tensor.h', 'char_tensor.h', 'short_tensor.h', 'uint_tensor.h', diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index 913d0172d..bdf96f4ea 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -49,9 +50,19 @@ Tensor::Tensor( std::vector>>> const &d, std::vector const &scales, ml::train::TensorDim::TensorType t_type, QScheme qscheme_) { - itensor = std::shared_ptr( - new CharTensor(d, scales, t_type.format, qscheme_), - std::default_delete()); + if (t_type.data_type == Tdatatype::QINT4) { + itensor = std::shared_ptr( + new Int4QTensor(d, scales, t_type.format, qscheme_), + std::default_delete()); + } else if (t_type.data_type == Tdatatype::QINT8) { + itensor = std::shared_ptr( + new CharTensor(d, scales, t_type.format, qscheme_), + std::default_delete()); + } else { + throw std::invalid_argument( + "Error: Tensor cannot be constructed because the given data type is " + "incorrect. The supported d_types are: QINT4, QINT8"); + } } Tensor::Tensor( @@ -110,6 +121,9 @@ Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) { } else if (d_type == Tdatatype::QINT8) { itensor = std::shared_ptr(new CharTensor(name_, fm), std::default_delete()); + } else if (d_type == Tdatatype::QINT4) { + itensor = std::shared_ptr(new Int4QTensor(name_, fm), + std::default_delete()); } else if (d_type == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = std::shared_ptr(new BCQTensor(name_, fm), @@ -162,6 +176,10 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, itensor = std::shared_ptr( new CharTensor(d, alloc_now, init, name, qscheme), std::default_delete()); + } else if (d.getDataType() == Tdatatype::QINT4) { + itensor = std::shared_ptr( + new Int4QTensor(d, alloc_now, init, name, qscheme), + std::default_delete()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = @@ -207,6 +225,9 @@ Tensor::Tensor(const TensorDim &d, const void *buf, QScheme qscheme) { } else if (d.getDataType() == Tdatatype::QINT8) { itensor = std::shared_ptr(new CharTensor(d, buf, qscheme), std::default_delete()); + } else if (d.getDataType() == Tdatatype::QINT4) { + itensor = std::shared_ptr(new Int4QTensor(d, buf), + std::default_delete()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = std::shared_ptr(new BCQTensor(d, buf), @@ -249,6 +270,9 @@ Tensor::Tensor(const Tensor &rhs) { } else if (rhs.getDataType() == Tdatatype::QINT8) { itensor = std::shared_ptr(new CharTensor(*rhs.itensor), std::default_delete()); + } else if (rhs.getDataType() == Tdatatype::QINT4) { + itensor = std::shared_ptr(new Int4QTensor(*rhs.itensor), + std::default_delete()); } else if (rhs.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = std::shared_ptr(new BCQTensor(*rhs.itensor), @@ -293,6 +317,9 @@ Tensor &Tensor::operator=(const Tensor &rhs) { } else if (rhs.getDataType() == Tdatatype::QINT8) { itensor = std::shared_ptr(new CharTensor(*rhs.itensor), std::default_delete()); + } else if (rhs.getDataType() == Tdatatype::QINT4) { + itensor = std::shared_ptr(new Int4QTensor(*rhs.itensor), + std::default_delete()); } else if (rhs.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = std::shared_ptr(new BCQTensor(*rhs.itensor), @@ -336,6 +363,9 @@ bool Tensor::operator==(const Tensor &rhs) const { } else if (getDataType() == Tdatatype::QINT8) { return *std::dynamic_pointer_cast(itensor) == *std::dynamic_pointer_cast(rhs.itensor); + } else if (getDataType() == Tdatatype::QINT4) { + return *std::dynamic_pointer_cast(itensor) == + *std::dynamic_pointer_cast(rhs.itensor); } else if (getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM return *std::dynamic_pointer_cast(itensor) == diff --git a/packaging/nntrainer.spec b/packaging/nntrainer.spec index bd4ad0f3f..b9b09fb94 100644 --- a/packaging/nntrainer.spec +++ b/packaging/nntrainer.spec @@ -541,6 +541,7 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/ %{_includedir}/nntrainer/memory_data.h %{_includedir}/nntrainer/tensor.h %{_includedir}/nntrainer/tensor_base.h +%{_includedir}/nntrainer/int4_tensor.h %{_includedir}/nntrainer/char_tensor.h %{_includedir}/nntrainer/short_tensor.h %{_includedir}/nntrainer/uint_tensor.h diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp index 765b1a3cb..047f4832f 100644 --- a/test/unittest/unittest_nntrainer_tensor.cpp +++ b/test/unittest/unittest_nntrainer_tensor.cpp @@ -289,26 +289,6 @@ TEST(nntrainer_Tensor, Tensor_07_p) { } } -// TEST(nntrainer_Tensor, Tensor_06_p) { -// int status = ML_ERROR_NONE; -// nntrainer::Tensor tensor = nntrainer::Tensor( -// 1, 4, 2, 2, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}); -// ASSERT_NE(nullptr, tensor.getData()); - -// tensor.setValue(2); - -// for (size_t b = 0; b < tensor.batch(); ++b) { -// for (size_t c = 0; c < tensor.channel(); ++c) { -// for (size_t h = 0; h < tensor.height(); ++h) { -// for (size_t w = 0; w < tensor.width(); ++w) { -// size_t idx = tensor.getIndex(b, c, h, w); -// ASSERT_EQ(2, tensor.getValueQint4(idx)); -// } -// } -// } -// } -// } - TEST(nntrainer_Tensor, Tensor_07_n) { int status = ML_ERROR_NONE; int batch = 3; @@ -419,6 +399,88 @@ TEST(nntrainer_Tensor, QTensor_01_p) { EXPECT_EQ(status, ML_ERROR_NONE); } +/** + * @brief Int4QTensor creation with initializer + */ +TEST(nntrainer_Tensor, QTensor_02_p) { + int status = ML_ERROR_NONE; + nntrainer::Tensor tensor = nntrainer::Tensor( + 1, 4, 2, 2, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}); + ASSERT_NE(nullptr, tensor.getData()); + + // Initialize tensor with one + tensor.initialize(nntrainer::Initializer::ONES); + + for (size_t b = 0; b < tensor.batch(); ++b) { + for (size_t c = 0; c < tensor.channel(); ++c) { + for (size_t h = 0; h < tensor.height(); ++h) { + for (size_t w = 0; w < tensor.width(); ++w) { + size_t idx = tensor.getIndex(b, c, h, w); + // get encoded int8 data and decode to a single int 4 value + int8_t value = tensor.getValue(idx / 2); + if (idx % 2 == 1) { + value <<= 4; + } + value >>= 4; + + // check if the value of data is one + ASSERT_EQ(1, value); + } + } + } + } +} + +/** + * @brief Int4QTensor creation with the vector data + */ +TEST(nntrainer_Tensor, QTensor_03_p) { + std::vector>> in = {{{-8, 0}, {-4, 4}}, + {{-7, 1}, {-3, 5}}, + {{-6, 2}, {-2, 6}}, + {{-5, 3}, {-1, 7}}}; + + nntrainer::Tensor tensor( + in, {3.561f}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}, + nntrainer::QScheme::PER_TENSOR_AFFINE); + + // compare tensor data with vector data + for (size_t b = 0; b < tensor.batch(); ++b) { + for (size_t c = 0; c < tensor.channel(); ++c) { + for (size_t h = 0; h < tensor.height(); ++h) { + for (size_t w = 0; w < tensor.width(); ++w) { + size_t idx = tensor.getIndex(b, c, h, w); + // get encoded int8 data and decode to a single int 4 value + int8_t value = tensor.getValue(idx / 2); + if (idx % 2 == 1) { + value <<= 4; + } + value >>= 4; + ASSERT_EQ(in[c][h][w], value); + } + } + } + } + + ASSERT_FLOAT_EQ(*tensor.getScale(), 3.561f); +} + +/** + * @brief Int4QTensor creation with incorrect size of scale factors + */ +TEST(nntrainer_Tensor, QTensor_04_n) { + std::vector>> in = {{{-8, 0}, {-4, 4}}, + {{-7, 1}, {-3, 5}}, + {{-6, 2}, {-2, 6}}, + {{-5, 3}, {-1, 7}}}; + + EXPECT_THROW( + nntrainer::Tensor(in, {3.561f}, + {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}, + nntrainer::QScheme::PER_CHANNEL_AFFINE), + std::invalid_argument); +} + TEST(nntrainer_Tensor, copy_01_n) { int batch = 3; int channel = 1; @@ -667,7 +729,6 @@ TEST(nntrainer_Tensor, copy_11_p) { } TEST(nntrainer_Tensor, copy_12_p) { - int status = ML_ERROR_NONE; int batch = 3; int channel = 1; int height = 3; @@ -687,6 +748,25 @@ TEST(nntrainer_Tensor, copy_12_p) { ASSERT_EQ(input, output); } +TEST(nntrainer_Tensor, copy_13_p) { + std::vector> in = {{0, -5, -6, 1, 4}, + {5, -7, 3, -1, 5}, + {-6, 3, 0, 3, 6}, + {-1, 1, 3, 5, 7}, + {4, -5, 6, -7, -8}}; + + nntrainer::Tensor input( + in, {0.051626f}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}, + nntrainer::QScheme::PER_TENSOR_AFFINE); + + nntrainer::Tensor output( + 1, 1, 5, 5, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}); + + output.copy(input); + + ASSERT_EQ(input, output); +} + TEST(nntrainer_Tensor, multiply_i_01_p) { int status = ML_ERROR_NONE; int batch = 3; @@ -3617,7 +3697,7 @@ TEST(nntrainer_Tensor, save_read_01_n) { ASSERT_EQ(status, 0); } -TEST(nntrainer_Tensor, max_element_01_p) { +TEST(nntrainer_Tensor, argmax_01_p) { int batch = 3; int channel = 1; int height = 5; @@ -3630,7 +3710,7 @@ TEST(nntrainer_Tensor, max_element_01_p) { EXPECT_EQ(target.argmax(), std::vector({24, 0, 0})); } -TEST(nntrainer_Tensor, max_element_02_p) { +TEST(nntrainer_Tensor, argmax_02_p) { int batch = 3; int channel = 1; int height = 5; @@ -3643,7 +3723,20 @@ TEST(nntrainer_Tensor, max_element_02_p) { EXPECT_EQ(target.argmax(), std::vector({24, 0, 0})); } -TEST(nntrainer_Tensor, max_element_03_p) { +TEST(nntrainer_Tensor, argmax_03_p) { + std::vector>>> in = { + {{{0, 1, 2}, {-1, 0, 1}, {-2, -1, 0}}}, + {{{-7, -6, -5}, {-8, -7, -6}, {7, -8, -7}}}, + {{{2, 3, 4}, {1, 2, 3}, {0, 1, 2}}}}; + + nntrainer::Tensor target( + in, {0.0719785f}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}, + nntrainer::QScheme::PER_TENSOR_AFFINE); + + EXPECT_EQ(target.argmax(), std::vector({2, 6, 2})); +} + +TEST(nntrainer_Tensor, max_element_01_p) { int batch = 3; int channel = 1; int height = 5; @@ -3656,7 +3749,7 @@ TEST(nntrainer_Tensor, max_element_03_p) { EXPECT_EQ(target.max_abs(), 31); } -TEST(nntrainer_Tensor, max_element_04_p) { +TEST(nntrainer_Tensor, max_element_02_p) { int batch = 3; int channel = 1; int height = 5; @@ -3669,6 +3762,30 @@ TEST(nntrainer_Tensor, max_element_04_p) { EXPECT_EQ(target.max_abs(), 31); } +TEST(nntrainer_Tensor, max_element_03_p) { + std::vector>> in = {{{1, 1, 1, 1, 1, 1}, + {2, 1, 0, -1, -2, -3}, + {3, 1, -1, -3, -5, -7}, + {4, 1, -2, -5, -8, 5}, + {5, 1, -3, -7, 5, 1}}, + {{0, 0, 0, 0, 0, 0}, + {1, 0, -1, -2, -3, -4}, + {2, 0, -2, -4, -6, -8}, + {3, 0, -3, -6, 7, 4}, + {4, 0, -4, -8, 4, 0}}, + {{-1, -1, -1, -1, -1, -1}, + {0, -1, -2, -3, -4, -5}, + {1, -1, -3, -5, -7, 7}, + {2, -1, -4, -7, 6, 3}, + {3, -1, -5, 7, 3, -1}}}; + + nntrainer::Tensor target( + in, {3.561f}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}, + nntrainer::QScheme::PER_TENSOR_AFFINE); + + EXPECT_EQ(target.max_abs(), 8); +} + TEST(nntrainer_Tensor, min_element_01_p) { int batch = 3; int channel = 1; @@ -3707,6 +3824,43 @@ TEST(nntrainer_Tensor, min_element_02_p) { EXPECT_EQ(target.minValue(), 16); } +/** + * @brief Int4QTensor minimum value test + */ +TEST(nntrainer_Tensor, min_element_03_p) { + std::vector> in = {{0, 5, -6, -1, 4}, + {5, -7, -3, 1, 5}, + {-6, -3, 0, 3, 6}, + {-1, 1, 3, 5, 7}, + {4, 5, 6, 7, -8}}; + + nntrainer::Tensor target( + in, {0.05126f}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}, + nntrainer::QScheme::PER_TENSOR_AFFINE); + + EXPECT_EQ(target.minValue(), -8); + + // Add 2 to mininum element. next minimum value is -7 + // [ 0 5 -6 -1 4] [ 0 5 -6 -1 4] + // [ 5 -7 -3 1 5] [ 5 -7 -3 1 5] + // [-6 -3 0 3 6] -> [-6 -3 0 3 6] + // [-1 1 3 5 7] [-1 1 3 5 7] + // [ 4 5 6 7 -8] [ 4 5 6 7 -6] + target.addValue(0, 0, 4, 4, 2, 1); + + EXPECT_EQ(target.minValue(), -7); + + // Add 2 to mininum element. next minimum value is -6 + // [ 0 5 -6 -1 4] [ 0 5 -6 -1 4] + // [ 5 -7 -3 1 5] [ 5 -5 -3 1 5] + // [-6 -3 0 3 6] -> [-6 -3 0 3 6] + // [-1 1 3 5 7] [-1 1 3 5 7] + // [ 4 5 6 7 -6] [ 4 5 6 7 -6] + target.addValue(0, 0, 1, 1, 2, 1); + + EXPECT_EQ(target.minValue(), -6); +} + TEST(nntrainer_Tensor, copy_and_shares_variable_01_p) { nntrainer::Tensor A = constant(1.0f, 3, 4, 5, 6); nntrainer::Tensor B = A.clone(); @@ -4340,6 +4494,45 @@ TEST(nntrainer_Tensor, initialize_15_n) { std::invalid_argument); } +/** + * @brief initializer one / zero test + */ +TEST(nntrainer_Tensor, initialize_16_p) { + nntrainer::Tensor result( + {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, true, + nntrainer::Initializer::ONES); + nntrainer::Tensor tensor( + {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, true, + nntrainer::Initializer::ZEROS); + EXPECT_NE(tensor, result); + tensor.initialize(nntrainer::Initializer::ONES); + EXPECT_EQ(tensor, result); +} + +/** + * @brief invalid initializer + */ +TEST(nntrainer_Tensor, initialize_17_n) { + nntrainer::Tensor tensor( + {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, + true); + + /// @note Int4QTensor does not support HE_NORMAL initialization + EXPECT_THROW(tensor.initialize(nntrainer::Initializer::HE_NORMAL), + std::invalid_argument); +} + +/** + * @brief set out of range value. must be in range [-8, 7] + */ +TEST(nntrainer_Tensor, initialize_18_n) { + nntrainer::Tensor tensor( + {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, + true); + + EXPECT_THROW(tensor.setValue(127), std::out_of_range); +} + TEST(nntrainer_Tensor, split_01_p) { { nntrainer::TensorDim ref_dim(3, 2, 4, 5); @@ -5237,112 +5430,6 @@ TEST(nntrainer_Tensor, multiply_strided_06_p) { EXPECT_EQ(status, ML_ERROR_NONE); } -// /** -// * @brief dequantize FP32 tensor -// */ -// TEST(nntrainer_Tensor, dequantize_01_n) { -// int batch = 1; -// int channel = 3; -// int height = 4; -// int width = 5; - -// nntrainer::Tensor input(batch, channel, height, width); -// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); -// input.setScaleFactors({1.5, 1.0, 0.5}); -// input.setZeroPoints({1, 4, 7}); - -// nntrainer::Tensor output(batch, channel, height, width); - -// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -// } - -// /** -// * @brief dequantize tensor with different dimension -// */ -// TEST(nntrainer_Tensor, dequantize_02_n) { -// int batch = 1; -// int channel = 3; -// int height = 4; -// int width = 5; - -// nntrainer::Tensor input( -// batch + 1, channel, height + 1, width + 1, -// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); -// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); -// input.setScaleFactors({1.5, 1.0, 0.5}); -// input.setZeroPoints({1, 4, 7}); - -// nntrainer::Tensor output(batch, channel, height, width); - -// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -// } - -// /** -// * @brief dequantize tensor with no scale factors -// */ -// TEST(nntrainer_Tensor, dequantize_03_n) { -// int batch = 1; -// int channel = 3; -// int height = 4; -// int width = 5; - -// nntrainer::Tensor input( -// batch, channel, height, width, -// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); -// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - -// nntrainer::Tensor output(batch, channel, height, width); - -// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -// } - -// /** -// * @brief dequantize tensor with incorrect number of scale factors -// */ -// TEST(nntrainer_Tensor, dequantize_04_n) { -// int batch = 1; -// int channel = 3; -// int height = 4; -// int width = 5; - -// nntrainer::Tensor input( -// batch, channel, height, width, -// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); -// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - -// nntrainer::Tensor output( -// batch, channel, height, width, -// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - -// input.setScaleFactors({2.0, 1.5, 1.0, 0.5}); -// input.setZeroPoints({2, 3, 4, 5}); -// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -// EXPECT_NO_THROW({ input.dequantize(output, 2); }); -// } - -// /** -// * @brief dequantize tensor to QINT8 -// */ -// TEST(nntrainer_Tensor, dequantize_05_n) { -// int batch = 1; -// int channel = 3; -// int height = 4; -// int width = 5; - -// nntrainer::Tensor input( -// batch, channel, height, width, -// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); -// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); -// input.setScaleFactors({1.5, 1.0, 0.5}); -// input.setZeroPoints({1, 4, 7}); - -// nntrainer::Tensor output( -// batch, channel, height, width, -// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - -// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -// } - TEST(nntrainer_Tensor, sin_contiguous_p) { int batch = 1; int channel = 1;