Skip to content

Commit

Permalink
[CharTensor] QINT8 addition feature
Browse files Browse the repository at this point in the history
This pull request addresses the activation of the QINT8 element-wise addition feature in CharTensor.
It allows the addition of two tensors of the same dimensions, returning a matrix that contains the sums of the corresponding elements.
Please note that the functionality for automatically determining the new scale factor will be implemented in a future update.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <[email protected]>
  • Loading branch information
djeong20 committed Jan 10, 2025
1 parent d6d02c8 commit d8a746e
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 0 deletions.
48 changes: 48 additions & 0 deletions nntrainer/tensor/char_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,54 @@ void CharTensor::initialize(Initializer init) {
initialize();
}

Tensor &CharTensor::add(Tensor const &input, Tensor &output,
float const scale) const {
CREATE_IF_EMPTY_DIMS(output, dim, nullptr, qscheme);

NNTR_THROW_IF(q_scheme() != input.q_scheme(), std::invalid_argument)
<< "[Tensor] Cannot multiply tensors with different quantization schemes.";

/// @note remove after vector scale multiply is implemented
NNTR_THROW_IF(q_scheme() != QScheme::PER_TENSOR_AFFINE, std::invalid_argument)
<< "Tensor addition other than per tensor affine quantization scheme is "
"NYI.";

float lhs_scale = *(float *)getScale();
float rhs_scale = *input.getScale<float>();

/// @note current impl assumes pre-established quantization parameters are set
/// @todo 1. verify result_scale is valid 2. calculate qparams if not given
/// 3. check qscheme is per tensor affine
NNTR_THROW_IF(std::fpclassify(lhs_scale) == FP_ZERO ||
std::fpclassify(rhs_scale) == FP_ZERO ||
std::fpclassify(scale) == FP_ZERO,
std::invalid_argument)
<< "scale factors not set, cannot multiply";

/// @todo check whether the following method has faster execution speed.
/// 1. clone input A and B to A_fp32 and B_fp32
/// 2. dequantize A_fp32 and B_fp32
/// 3. perform addition: A_fp32.add(B_fp32, output_fp32)
/// 4. quantize output_fp32
for (unsigned int b = 0; b < batch(); ++b) {
for (unsigned int c = 0; c < channel(); ++c) {
for (unsigned int h = 0; h < height(); ++h) {
for (unsigned int w = 0; w < width(); ++w) {
float val = getValue(b, c, h, w) * lhs_scale +
input.getValue<int8_t>(b, c, h, w) * rhs_scale;

output.setValue(
b, c, h, w,
std::max(-128, std::min((int)std::lround(val / scale), 127)));
}
}
}
}
*output.getScale<float>() = scale;

return output;
}

void CharTensor::copy(const Tensor &from) {
reshape(from.getDim());
copy(from.getData());
Expand Down
7 changes: 7 additions & 0 deletions nntrainer/tensor/char_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,13 @@ class CharTensor : public TensorBase {
*/
void initialize(Initializer init) override;

/**
* @copydoc Tensor::add(Tensor const &m, Tensor &output, float const
* alpha)
*/
Tensor &add(Tensor const &m, Tensor &output,
float const scale) const override;

/**
* @copydoc Tensor::copy(const Tensor &from)
*/
Expand Down
77 changes: 77 additions & 0 deletions test/unittest/unittest_nntrainer_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2013,6 +2013,83 @@ TEST(nntrainer_Tensor, add_08_n) {
EXPECT_THROW(input.add(test, output), std::invalid_argument);
}

/**
* @brief Test elementwise addition of qint8
* @note Compare quantized int 8 addition result with float addition
*/
TEST(nntrainer_Quantizer, add_09_p) {
size_t batch = 1;
size_t channel = 1;
size_t height = 4;
size_t width = 4;

// float tensor A and B (original data)
float dataA[] = {0.29764187, 0.03480661, 0.23380315, -0.12472117,
-0.31381518, 0.17460883, 0.22656035, 0.40918356,
-0.18949383, 0.13317966, -0.18087250, -0.28150725,
-0.37915850, 0.45573741, -0.31624895, -0.36885685};
nntrainer::Tensor A({batch, channel, height, width}, dataA);

float dataB[] = {0.35672212, -0.03879440, -0.29017872, -0.29774767,
-0.03309470, -0.42983186, 0.05469221, -0.08551443,
0.29058170, -0.13359779, -0.06470931, -0.44647706,
0.20454758, 0.47189242, 0.26254445, 0.10401177};
nntrainer::Tensor B({batch, channel, height, width}, dataB);

// quantized tensor qA and qB (quantized data - per tensor affine)
std::vector<int8_t> qdataA = {83, 10, 65, -35, -88, 49, 63, 114,
-53, 37, -51, -79, -106, 127, -88, -103};
float scaleA = 0.00357441115193f;
int8_t *arrayA = reinterpret_cast<int8_t *>(&scaleA);
for (unsigned int i = 0; i < 4; ++i) {
qdataA.push_back(arrayA[i]);
}
nntrainer::Tensor qA({batch, channel, height, width, nntrainer::Tformat::NCHW,
nntrainer::Tdatatype::QINT8},
qdataA.data());

std::vector<int8_t> qdataB = {96, -10, -78, -80, -9, -116, 15, -23,
79, -36, -17, -121, 55, 127, 71, 28};
float scaleB = 0.0037011168897152f;
int8_t *arrayB = reinterpret_cast<int8_t *>(&scaleB);
for (unsigned int i = 0; i < 4; ++i) {
qdataB.push_back(arrayB[i]);
}
nntrainer::Tensor qB({batch, channel, height, width, nntrainer::Tformat::NCHW,
nntrainer::Tdatatype::QINT8},
qdataB.data());

// output tensors to store result
nntrainer::Tensor C(batch, channel, height, width);
nntrainer::Tensor qC(batch, channel, height, width, nntrainer::Tformat::NCHW,
nntrainer::Tdatatype::QINT8);

float output_scale = 0.00828241f;

// perform addition
EXPECT_NO_THROW(A.add(B, C));
EXPECT_NO_THROW(qA.add(qB, qC, output_scale));

// compare addition result
nntrainer::Tensor dequantizedC = qC.clone(nntrainer::Tdatatype::FP32);

// dequantize
dequantizedC.multiply_i(output_scale);

const float eps = 1e-2;

for (unsigned int b = 0; b < batch; b++) {
for (unsigned c = 0; c < channel; c++) {
for (unsigned h = 0; h < height; h++) {
for (unsigned w = 0; w < width; w++) {
EXPECT_NEAR(C.getValue(b, c, h, w), dequantizedC.getValue(b, c, h, w),
eps);
}
}
}
}
}

TEST(nntrainer_Tensor, pow_01_p) {
nntrainer::Tensor input = constant(4.0, 3, 2, 4, 5);

Expand Down

0 comments on commit d8a746e

Please sign in to comment.