Skip to content

Commit

Permalink
[CharTensor] Enable QINT8 multiplication feature
Browse files Browse the repository at this point in the history
This pull request aims to enable the QINT8 element-wise multiplication feature in CharTensor.
This takes two tensors of the same dimensions and returns a matrix of the multiplied corresponding elements.
Please note that automatically determining the new scale factor will be added in a future update.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <[email protected]>
  • Loading branch information
djeong20 committed Dec 31, 2024
1 parent a5ec4e6 commit 5df2b81
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 1 deletion.
51 changes: 50 additions & 1 deletion nntrainer/tensor/char_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ CharTensor::CharTensor(
NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
<< "invalid scale factor size " << scales.size();

/// @note 4 * scale_size() assumes scale factors are in full-precision fp.
MemoryData *mem_data = new MemoryData(
(void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]()));
data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
Expand Down Expand Up @@ -268,6 +267,56 @@ void CharTensor::initialize(Initializer init) {
initialize();
}

int CharTensor::multiply_i(float const &value) {
// multiply value to scale factors
float *g_scale = (float *)getScale();

sscal(scale_size(), value, g_scale, 1);
return ML_ERROR_NONE;
}

Tensor &CharTensor::multiply(Tensor const &input, Tensor &output,
const float scale) const {
NNTR_THROW_IF(input.getFormat() != this->getFormat(), std::invalid_argument)
<< "Tensor Format of " << getName() << ":"
<< ((bool)(this->getFormat()) ? "NHWC" : "NCHW") << " is not match. ("
<< ((bool)(input.getFormat()) ? "NHWC" : "NCHW") << ")";

NNTR_THROW_IF(!contiguous || !input.getContiguous() ||
!output.getContiguous(),
std::invalid_argument)
<< getName() << " is not contiguous, cannot multiply";

float lhs_scale = *(float *)getScale();
float rhs_scale = *input.getScale<float>();

/// @note current impl assumes pre-established quantization parameters are set
/// @todo 1. verify result_scale is valid 2. calculate qparams if not given
NNTR_THROW_IF(std::fpclassify(lhs_scale) == FP_ZERO ||
std::fpclassify(rhs_scale) == FP_ZERO ||
std::fpclassify(scale) == FP_ZERO,
std::invalid_argument)
<< "scale factors not set, cannot multiply";

float multiplier = lhs_scale * rhs_scale / scale;

int8_t *lhs = (int8_t *)getData();
int8_t *rhs = input.getData<int8_t>();
int8_t *result = output.getData<int8_t>();

for (unsigned int i = 0; i < size(); ++i) {
int32_t accum_val =
static_cast<int32_t>(lhs[i]) * static_cast<int32_t>(rhs[i]);

result[i] =
std::max(-128, std::min((int)std::lround(multiplier * accum_val), 127));
}

*output.getScale<float>() = scale;

return output;
}

void CharTensor::copy(const Tensor &from) {
reshape(from.getDim());
copy(from.getData());
Expand Down
19 changes: 19 additions & 0 deletions nntrainer/tensor/char_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,25 @@ class CharTensor : public TensorBase {
*/
void initialize(Initializer init) override;

/**
* @copydoc Tensor::multiply_i(float const &value)
*/
int multiply_i(float const &value) override;

/**
* @copydoc Tensor::multiply(Tensor const &m, Tensor &output, const
* float scale = 0.0)
*
* @note multiply only works under the following conditions.
* 1. appropriate scale must be provided (feature to automatically determine
* the scale factor will be added in the future update.)
* 2. should have same data type QINT8.
* 3. should have same size (broadcasting is currently not supported)
* 4. only per-tensor quantization qscheme is supported
*/
Tensor &multiply(Tensor const &m, Tensor &output,
const float scale = 0.0) const override;

/**
* @copydoc Tensor::copy(const Tensor &from)
*/
Expand Down
83 changes: 83 additions & 0 deletions test/unittest/unittest_nntrainer_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,89 @@ TEST(nntrainer_Tensor, multiply_08_n) {
EXPECT_THROW(input.multiply(test, output), std::invalid_argument);
}

/**
* @brief Test elementwise multiplication of qint8
* @note Compare quantized int 8 mutiplication result with float multiplication
*/
TEST(nntrainer_Quantizer, multiply_09_p) {
size_t batch = 1;
size_t channel = 1;
size_t height = 4;
size_t width = 4;

// float tensor A and B (original data)
float dataA[] = {-0.16924214, -0.10338581, 0.31561565, -0.00533330,
0.44809300, -0.15348488, 0.14003623, -0.07908171,
-0.21415669, -0.35267806, 0.46354777, -0.35009885,
-0.07760239, -0.28348053, -0.37242615, 0.30941701};
nntrainer::Tensor A({batch, channel, height, width}, dataA);

float dataB[] = {-0.27615008, 0.43723762, -0.34135219, -0.01534167,
-0.32217509, 0.43340221, 0.11122712, -0.46792096,
-0.48326263, -0.26464382, 0.48709807, -0.18793547,
0.02684793, -0.10355628, 0.06903752, -0.07670835};
nntrainer::Tensor B({batch, channel, height, width}, dataB);

// quantized tensor qA and qB (quantized data - per tensor affine)
std::vector<int8_t> qdataA = {-47, -28, 87, -1, 123, -42, 39, -22,
-59, -97, 127, -96, -21, -78, -102, 85};
float scaleA = 0.00363567f;
int8_t *arrayA = reinterpret_cast<int8_t *>(&scaleA);
for (unsigned int i = 0; i < 4; ++i) {
qdataA.push_back(arrayA[i]);
}
nntrainer::Tensor qA({batch, channel, height, width, nntrainer::Tformat::NCHW,
nntrainer::Tdatatype::QINT8},
qdataA.data());

std::vector<int8_t> qdataB = {-72, 114, -89, -4, -84, 113, 29, -122,
-126, -69, 127, -49, 7, -27, 18, -20};
float scaleB = 0.0038354177f;
int8_t *arrayB = reinterpret_cast<int8_t *>(&scaleB);
for (unsigned int i = 0; i < 4; ++i) {
qdataB.push_back(arrayB[i]);
}
nntrainer::Tensor qB({batch, channel, height, width, nntrainer::Tformat::NCHW,
nntrainer::Tdatatype::QINT8},
qdataB.data());

// output tensors to store result
nntrainer::Tensor C(batch, channel, height, width);
nntrainer::Tensor qC(batch, channel, height, width, nntrainer::Tformat::NCHW,
nntrainer::Tdatatype::QINT8);

// perform multiplication
EXPECT_NO_THROW(A.multiply(B, C));
EXPECT_NO_THROW(qA.multiply(qB, qC, 0.001927134f));

// compare multiplication result
/// @todo change line 1098 - 1104 to clone() after #2834
// nntrainer::Tensor dequantizedC = qC.clone(nntrainer::Tdatatype::FP32);
nntrainer::Tensor dequantizedC(batch, channel, height, width);
float *data = dequantizedC.getData<float>();
int8_t *qdata = qC.getData<int8_t>();

for (unsigned int i = 0; i < dequantizedC.size(); ++i) {
data[i] = qdata[i];
}

// dequantize
dequantizedC.multiply_i(0.001927134f);

const float eps = 1e-3;

for (unsigned int b = 0; b < batch; b++) {
for (unsigned c = 0; c < channel; c++) {
for (unsigned h = 0; h < height; h++) {
for (unsigned w = 0; w < width; w++) {
EXPECT_NEAR(C.getValue(b, c, h, w), dequantizedC.getValue(b, c, h, w),
eps);
}
}
}
}
}

TEST(nntrainer_Tensor, multiply_float_01_p) {
int batch = 3;
int channel = 1;
Expand Down

0 comments on commit 5df2b81

Please sign in to comment.