Skip to content

Commit

Permalink
[CharTensor] Enable memory data to store scale factors based on quant…
Browse files Browse the repository at this point in the history
…ization schemes

This pull request aims to modify the existing codebase such that the memory data of CharTensor can now store scale factors based on different quantization schemes.
Additionally, this change allows the Tensor class to specify the desired quantization scheme while creating a new CharTensor instance.
The scale factors are determined either during the quantization process using a specific quantizer or they can be manually initialized if both the quantized data and the corresponding scale factors are provided as inputs.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <[email protected]>
  • Loading branch information
djeong20 committed Dec 30, 2024
1 parent 24a868d commit a5ec4e6
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 74 deletions.
97 changes: 84 additions & 13 deletions nntrainer/tensor/char_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@

namespace nntrainer {

CharTensor::CharTensor(std::string name_, Tformat fm) :
CharTensor::CharTensor(std::string name_, Tformat fm, QScheme qscheme_) :
TensorBase(name_, fm, Tdatatype::QINT8) {}

CharTensor::CharTensor(const TensorDim &d, bool alloc_now, Initializer init,
std::string name) :
TensorBase(d, alloc_now, init, name) {
std::string name, QScheme qscheme_) :
TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
if (alloc_now)
allocate();
}

CharTensor::CharTensor(const TensorDim &d, const void *buf) :
CharTensor(d, true) {
CharTensor::CharTensor(const TensorDim &d, const void *buf, QScheme qscheme_) :
CharTensor(d, true, Initializer::NONE, "", qscheme_) {
if (d.getDataLen() != 0) {
if (buf != nullptr)
copy(buf);
Expand All @@ -37,7 +37,7 @@ CharTensor::CharTensor(const TensorDim &d, const void *buf) :

CharTensor::CharTensor(
std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
Tformat fm) {
std::vector<float> const &scales, Tformat fm, QScheme qscheme_) {
if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
throw std::out_of_range(
"[Tensor] trying to initialize CharTensor from empty vector");
Expand All @@ -59,9 +59,14 @@ CharTensor::CharTensor(
strides = dim.computeStrides();
contiguous = true;
initializer = Initializer::NONE;
qscheme = qscheme_;

MemoryData *mem_data =
new MemoryData((void *)(new int8_t[dim.getDataLen()]()));
NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
<< "invalid scale factor size " << scales.size();

/// @note 4 * scale_size() assumes scale factors are in full-precision fp.
MemoryData *mem_data = new MemoryData(
(void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]()));
data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
delete[] mem_data->getAddr<int8_t>();
});
Expand All @@ -84,16 +89,31 @@ CharTensor::CharTensor(
for (unsigned int l = 0; l < channel(); ++l)
this->setValue(i, l, j, k, d[i][j][k][l]);
}

// copy scale factors
scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
}

bool CharTensor::operator==(const CharTensor &rhs) const {
if (qscheme != rhs.qscheme)
return false;

// compare quantized data
const int8_t *_data = (int8_t *)getData();
const int8_t *_rdata = (int8_t *)rhs.getData();
for (size_t i = 0; i < size(); ++i) {
if (_data[i] != _rdata[i])
return false;
}

// compare scale factors
const float *_scales = (float *)getScale();
const float *_rscales = (float *)rhs.getScale();
for (size_t i = 0; i < scale_size(); ++i) {
if (_scales[i] != _rscales[i])
return false;
}

return true;
}

Expand All @@ -109,7 +129,8 @@ void CharTensor::allocate() {
/// allocate new memory for the tensor data
MemoryData *mem_data;

mem_data = new MemoryData((void *)(new int8_t[dim.getDataLen()]{}));
mem_data = new MemoryData(
(void *)(new int8_t[dim.getDataLen() + 4 * scale_size()]{}));
data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
delete[] mem_data->template getAddr<int8_t>();
delete mem_data;
Expand Down Expand Up @@ -141,6 +162,25 @@ void *CharTensor::getData(size_t idx) const {
return data->getAddr<int8_t>() + offset + idx;
}

void *CharTensor::getScale() const {
if (!data)
return nullptr;

data->validate();
return ((int8_t *)getData()) + size();
}

void *CharTensor::getScale(size_t idx) const {
NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
<< "Tensor::getScale() index is not valid";

if (!data)
return nullptr;

data->validate();
return ((float *)getScale()) + idx;
}

void *CharTensor::getAddress(unsigned int i) {
size_t index = getIndex(batch(), channel(), height(), width());
if (i > index) {
Expand Down Expand Up @@ -349,7 +389,35 @@ void CharTensor::print(std::ostream &out) const {
out.copyfmt(init);
}

/// @todo print quantization information
/// print quantization information
const float *q_scales = (float *)getScale();

if (scale_size() > 50) {
out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
<< q_scales[2] << " ... " << q_scales[len - 3] << ' '
<< q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
return;
}

out << "Scale factors: ";
for (unsigned i = 0; i < scale_size(); ++i) {
out << q_scales[i] << " ";
}
out << std::endl;
}

size_t CharTensor::scale_size() const {
switch (qscheme) {
case QScheme::PER_TENSOR_AFFINE:
return 1;
break;
case QScheme::PER_CHANNEL_AFFINE:
return width();
break;
default:
break;
}
return 0;
}

void CharTensor::copy(const void *buf) {
Expand All @@ -360,19 +428,22 @@ void CharTensor::copy(const void *buf) {
return;
}

/// @todo need to optimize
/// @todo need to optimize after #2834
for (unsigned int i = 0; i < size(); ++i) {
((int8_t *)getData())[i] = ((int8_t *)buf)[i];
}

float *scales = (float *)(((int8_t *)buf) + size());
scopy(scale_size(), scales, 1, (float *)getScale(), 1);
}

void CharTensor::save_quantization_info(std::ostream &file) {
checkedWrite(file, (char *)&axis, sizeof(uint8_t),
checkedWrite(file, (char *)&qscheme, sizeof(uint8_t),
"[CharTensor::save] failed to write quantization information");
}

void CharTensor::read_quantization_info(std::ifstream &file) {
checkedRead(file, (char *)&axis, sizeof(uint8_t),
checkedRead(file, (char *)&qscheme, sizeof(uint8_t),
"[CharTensor::read] failed to read quantization information");
}

Expand Down
35 changes: 29 additions & 6 deletions nntrainer/tensor/char_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#define __CHAR_TENSOR_H__
#ifdef __cplusplus

#include <quantizer.h>
#include <tensor_base.h>

namespace nntrainer {
Expand All @@ -25,7 +26,8 @@ class CharTensor : public TensorBase {
/**
* @brief Basic Constructor of Tensor
*/
CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW);
CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW,
QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);

/**
* @brief Construct a new CharTensor object
Expand All @@ -34,27 +36,33 @@ class CharTensor : public TensorBase {
* @param alloc_now Allocate memory to this tensor or not
* @param init Initializer for the tensor
* @param name Name of the tensor
* @param qscheme_ Quantization scheme of the tensor
*/
CharTensor(const TensorDim &d, bool alloc_now,
Initializer init = Initializer::NONE, std::string name = "");
Initializer init = Initializer::NONE, std::string name = "",
QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);

/**
* @brief Construct a new CharTensor object
*
* @param d Tensor dim for this tensor
* @param buf buffer
* @param qscheme_ quantization scheme of the tensor
*/
CharTensor(const TensorDim &d, const void *buf = nullptr);
CharTensor(const TensorDim &d, const void *buf = nullptr,
QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);

/**
* @brief Construct a new CharTensor object
*
* @param d data for the Tensor
* @param scales scale factors for the Tensor
* @param fm format for the Tensor
* @param qscheme_ quantization scheme of the tensor
*/
CharTensor(
std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
Tformat fm);
std::vector<float> const &scales, Tformat fm, QScheme qscheme_);

/**
* @brief Construct a new CharTensor object
Expand Down Expand Up @@ -101,6 +109,16 @@ class CharTensor : public TensorBase {
*/
void *getData(size_t idx) const override;

/**
* @copydoc Tensor::getScale()
*/
void *getScale() const override;

/**
* @copydoc Tensor::getScale(size_t idx)
*/
void *getScale(size_t idx) const override;

/**
* @brief i data index
* @retval address of ith data
Expand Down Expand Up @@ -227,11 +245,16 @@ class CharTensor : public TensorBase {
*/
void read_quantization_info(std::ifstream &file) override;

/**
* @copydoc Tensor::scale_size()
*/
size_t scale_size() const override;

private:
/**
* @brief quantization axis
* @brief quantization scheme
*/
uint8_t axis;
QScheme qscheme;

/**
* @brief copy a buffer to @a this, the caller has to ensure that @a this is
Expand Down
1 change: 1 addition & 0 deletions nntrainer/tensor/quantizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/

#include <quantizer.h>
#include <tensor.h>

namespace nntrainer {

Expand Down
Loading

0 comments on commit a5ec4e6

Please sign in to comment.