diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp index 467c2e36f9..a952ea8a54 100644 --- a/nntrainer/tensor/float_tensor.cpp +++ b/nntrainer/tensor/float_tensor.cpp @@ -162,12 +162,19 @@ const void *FloatTensor::getAddress(unsigned int i) const { return &((float *)getData())[i]; } -const float FloatTensor::getValue(unsigned int i) const { +const float &FloatTensor::getValue(unsigned int i) const { return ((float *)getData())[i]; } -const float FloatTensor::getValue(unsigned int b, unsigned int c, - unsigned int h, unsigned int w) const { +float &FloatTensor::getValue(unsigned int i) { return ((float *)getData())[i]; } + +const float &FloatTensor::getValue(unsigned int b, unsigned int c, + unsigned int h, unsigned int w) const { + return getValue(getIndex(b, c, h, w)); +} + +float &FloatTensor::getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) { return getValue(getIndex(b, c, h, w)); } @@ -896,6 +903,191 @@ void FloatTensor::zoneout_mask(TensorV2 &opposite, float zoneout) { } } +std::vector FloatTensor::split(std::vector sizes, int axis) { + size_t num_size = sizes.size(); + + if (axis == -1) { + axis = 3; + } + + size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); + NNTR_THROW_IF(dim.getTensorDim(axis) != total_size, std::invalid_argument) + << "given sum of sizes did not match with origin tensor dim, tensor dim: " + << dim.getTensorDim(axis) << " total size: " << total_size; + + std::vector ret_dims; + ret_dims.reserve(num_size); + for (unsigned int i = 0; i < num_size; ++i) { + ret_dims[i] = dim; + ret_dims[i].setTensorDim(axis, sizes[i]); + } + + bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false; + std::vector ret; + + auto iter_value = [this, is_format_nchw]( + std::array &loc, + const std::array &end_loc, + const std::array &reset_dim_arr) -> float & { + auto &value = (is_format_nchw) ? getValue(loc[0], loc[1], loc[2], loc[3]) + : getValue(loc[0], loc[3], loc[1], loc[2]); + for (int i = 3; i >= 0; --i) { + loc[i]++; + if (loc[i] == end_loc[i]) { + loc[i] -= reset_dim_arr[i]; + continue; + } + break; + } + return value; + }; + + ret.reserve(num_size); + + unsigned int accumulated_size = 0; + for (unsigned int i = 0; i < num_size; ++i) { + std::array loc = {0, 0, 0, 0}; + + if (is_format_nchw) { + loc[axis] += accumulated_size; + } else { + if (axis == 0) { + loc[0] += accumulated_size; + } else if (axis == 1) { + loc[3] += accumulated_size; + } else if (axis == 2 || axis == 3) { + loc[axis - 1] += accumulated_size; + } + } + + ret.emplace_back(ret_dims[i]); + auto &ret_t = ret.back(); + + std::array end_loc; + + if (is_format_nchw) { + end_loc = {ret_dims[i].batch(), ret_dims[i].channel(), + ret_dims[i].height(), ret_dims[i].width()}; + } else { + end_loc = {ret_dims[i].batch(), ret_dims[i].height(), ret_dims[i].width(), + ret_dims[i].channel()}; + } + + accumulated_size += sizes[i]; + + if (is_format_nchw) { + end_loc[axis] = accumulated_size; + } else { + if (axis == 0) { + end_loc[0] = accumulated_size; + } else if (axis == 1) { + end_loc[3] = accumulated_size; + } else if (axis == 2 || axis == 3) { + end_loc[axis - 1] = accumulated_size; + } + } + + std::array reset_dim_arr; + if (is_format_nchw) { + reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].channel(), + ret_dims[i].height(), ret_dims[i].width()}; + } else { + reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].height(), + ret_dims[i].width(), ret_dims[i].channel()}; + } + + ret_t.apply_i( + [&iter_value, &loc, &end_loc, &reset_dim_arr](float _) { + return iter_value(loc, end_loc, reset_dim_arr); + }); + } + + return ret; +} + +TensorV2 FloatTensor::cat(const std::vector &tensors, int axis) { + if (axis == -1) { + axis = 3; + } + + TensorV2 ret; + auto ref_dim = tensors.front().getDim(); + bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW); + ref_dim.setTensorDim(axis, 1); + NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(), + [&ref_dim, axis](const TensorV2 &t) { + auto cur_dim = t.getDim(); + cur_dim.setTensorDim(axis, 1); + return ref_dim == cur_dim; + }), + std::invalid_argument) + << " all tensor must have the same dimension except for the axis, ref_dim: " + << ref_dim << " axis : " << axis; + + auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u, + [axis](unsigned cur, const TensorV2 &t) { + return cur += t.getDim().getTensorDim(axis); + }); + auto iter_value = + [is_format_nchw](std::array &loc, + const std::array &start_loc, TensorV2 &t, + const std::array &ref_dim_arr) -> float & { + auto &value = is_format_nchw + ? t.getValue(loc[0], loc[1], loc[2], loc[3]) + : t.getValue(loc[0], loc[3], loc[1], loc[2]); + + for (int i = 3; i >= 0; --i) { + loc[i]++; + if (loc[i] - start_loc[i] == ref_dim_arr[i]) { + loc[i] = start_loc[i]; + continue; + } + break; + } + return value; + }; + + auto ret_dim = ref_dim; + ret_dim.setTensorDim(axis, axis_dim); + + ret = TensorV2(ret_dim); + + std::array loc = {0, 0, 0, 0}; + for (auto &t : tensors) { + std::array start_loc = loc; + std::array tensor_dim_arr; + if (is_format_nchw) { + tensor_dim_arr[0] = t.getDim().getTensorDim(0); + tensor_dim_arr[1] = t.getDim().getTensorDim(1); + tensor_dim_arr[2] = t.getDim().getTensorDim(2); + tensor_dim_arr[3] = t.getDim().getTensorDim(3); + } else { + tensor_dim_arr[0] = t.getDim().getTensorDim(0); + tensor_dim_arr[1] = t.getDim().getTensorDim(2); + tensor_dim_arr[2] = t.getDim().getTensorDim(3); + tensor_dim_arr[3] = t.getDim().getTensorDim(1); + } + + for (size_t i = 0u, sz = t.size(); i < sz; ++i) { + iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue(i); + } + + if (is_format_nchw) { + loc[axis] += t.getDim().getTensorDim(axis); + } else { + if (axis == 0) { + loc[0] += t.getDim().getTensorDim(axis); + } else if (axis == 1) { + loc[3] += t.getDim().getTensorDim(axis); + } else if (axis == 2 || axis == 3) { + loc[axis - 1] += t.getDim().getTensorDim(axis); + } + } + } + + return ret; +} + void FloatTensor::print(std::ostream &out) const { printInstance(out, this); const float *data = (float *)getData(); diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h index c8f239ef26..7f27788e07 100644 --- a/nntrainer/tensor/float_tensor.h +++ b/nntrainer/tensor/float_tensor.h @@ -119,7 +119,13 @@ class FloatTensor : public TensorBase { * @brief return value at specific location * @param[in] i index */ - const float getValue(unsigned int i) const; + const float &getValue(unsigned int i) const; + + /** + * @brief return value at specific location + * @param[in] i index + */ + float &getValue(unsigned int i); /** * @brief return value at specific location @@ -128,8 +134,18 @@ class FloatTensor : public TensorBase { * @param[in] h height location * @param[in] w width location */ - const float getValue(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const; + const float &getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) const; + + /** + * @brief return value at specific location + * @param[in] b batch location + * @param[in] c channel location + * @param[in] h height location + * @param[in] w width location + */ + float &getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w); /** * @copydoc TensorV2::setValue(float value) @@ -302,6 +318,16 @@ class FloatTensor : public TensorBase { */ void zoneout_mask(TensorV2 &opposite, float zoneout) override; + /** + * @copydoc TensorV2::split(std::vector sizes, int axis) + */ + std::vector split(std::vector sizes, int axis) override; + + /** + * @copydoc TensorV2::cat(const std::vector &tensors, int axis) + */ + static TensorV2 cat(const std::vector &tensors, int axis); + /** * @copydoc TensorV2::copy(const TensorV2 &from) */ diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp index 5902c8b6ac..14b752b7f1 100644 --- a/nntrainer/tensor/half_tensor.cpp +++ b/nntrainer/tensor/half_tensor.cpp @@ -162,12 +162,19 @@ const void *HalfTensor::getAddress(unsigned int i) const { return &((_FP16 *)getData())[i]; } -const _FP16 HalfTensor::getValue(unsigned int i) const { +const _FP16 &HalfTensor::getValue(unsigned int i) const { return ((_FP16 *)getData())[i]; } -const _FP16 HalfTensor::getValue(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const { +_FP16 &HalfTensor::getValue(unsigned int i) { return ((_FP16 *)getData())[i]; } + +const _FP16 &HalfTensor::getValue(unsigned int b, unsigned int c, + unsigned int h, unsigned int w) const { + return getValue(getIndex(b, c, h, w)); +} + +_FP16 &HalfTensor::getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) { return getValue(getIndex(b, c, h, w)); } @@ -755,6 +762,189 @@ void HalfTensor::zoneout_mask(TensorV2 &opposite, float zoneout) { } } +std::vector HalfTensor::split(std::vector sizes, int axis) { + size_t num_size = sizes.size(); + + if (axis == -1) { + axis = 3; + } + + size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); + NNTR_THROW_IF(dim.getTensorDim(axis) != total_size, std::invalid_argument) + << "given sum of sizes did not match with origin tensor dim, tensor dim: " + << dim.getTensorDim(axis) << " total size: " << total_size; + + std::vector ret_dims; + ret_dims.reserve(num_size); + for (unsigned int i = 0; i < num_size; ++i) { + ret_dims[i] = dim; + ret_dims[i].setTensorDim(axis, sizes[i]); + } + + bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false; + std::vector ret; + + auto iter_value = [this, is_format_nchw]( + std::array &loc, + const std::array &end_loc, + const std::array &reset_dim_arr) -> _FP16 & { + auto &value = (is_format_nchw) ? getValue(loc[0], loc[1], loc[2], loc[3]) + : getValue(loc[0], loc[3], loc[1], loc[2]); + for (int i = 3; i >= 0; --i) { + loc[i]++; + if (loc[i] == end_loc[i]) { + loc[i] -= reset_dim_arr[i]; + continue; + } + break; + } + return value; + }; + + ret.reserve(num_size); + + unsigned int accumulated_size = 0; + for (unsigned int i = 0; i < num_size; ++i) { + std::array loc = {0, 0, 0, 0}; + + if (is_format_nchw) { + loc[axis] += accumulated_size; + } else { + if (axis == 0) { + loc[0] += accumulated_size; + } else if (axis == 1) { + loc[3] += accumulated_size; + } else if (axis == 2 || axis == 3) { + loc[axis - 1] += accumulated_size; + } + } + + ret.emplace_back(ret_dims[i]); + auto &ret_t = ret.back(); + + std::array end_loc; + + if (is_format_nchw) { + end_loc = {ret_dims[i].batch(), ret_dims[i].channel(), + ret_dims[i].height(), ret_dims[i].width()}; + } else { + end_loc = {ret_dims[i].batch(), ret_dims[i].height(), ret_dims[i].width(), + ret_dims[i].channel()}; + } + + accumulated_size += sizes[i]; + + if (is_format_nchw) { + end_loc[axis] = accumulated_size; + } else { + if (axis == 0) { + end_loc[0] = accumulated_size; + } else if (axis == 1) { + end_loc[3] = accumulated_size; + } else if (axis == 2 || axis == 3) { + end_loc[axis - 1] = accumulated_size; + } + } + + std::array reset_dim_arr; + if (is_format_nchw) { + reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].channel(), + ret_dims[i].height(), ret_dims[i].width()}; + } else { + reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].height(), + ret_dims[i].width(), ret_dims[i].channel()}; + } + + ret_t.apply_i<_FP16>( + [&iter_value, &loc, &end_loc, &reset_dim_arr](_FP16 _) { + return iter_value(loc, end_loc, reset_dim_arr); + }); + } + + return ret; +} + +TensorV2 HalfTensor::cat(const std::vector &tensors, int axis) { + if (axis == -1) { + axis = 3; + } + TensorV2 ret; + auto ref_dim = tensors.front().getDim(); + bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW); + ref_dim.setTensorDim(axis, 1); + NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(), + [&ref_dim, axis](const TensorV2 &t) { + auto cur_dim = t.getDim(); + cur_dim.setTensorDim(axis, 1); + return ref_dim == cur_dim; + }), + std::invalid_argument) + << " all tensor must have the same dimension except for the axis, ref_dim: " + << ref_dim << " axis : " << axis; + + auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u, + [axis](unsigned cur, const TensorV2 &t) { + return cur += t.getDim().getTensorDim(axis); + }); + auto iter_value = + [is_format_nchw](std::array &loc, + const std::array &start_loc, TensorV2 &t, + const std::array &ref_dim_arr) -> _FP16 & { + auto &value = is_format_nchw + ? t.getValue<_FP16>(loc[0], loc[1], loc[2], loc[3]) + : t.getValue<_FP16>(loc[0], loc[3], loc[1], loc[2]); + + for (int i = 3; i >= 0; --i) { + loc[i]++; + if (loc[i] - start_loc[i] == ref_dim_arr[i]) { + loc[i] = start_loc[i]; + continue; + } + break; + } + return value; + }; + + auto ret_dim = ref_dim; + ret_dim.setTensorDim(axis, axis_dim); + + ret = TensorV2(ret_dim); + + std::array loc = {0, 0, 0, 0}; + for (auto &t : tensors) { + std::array start_loc = loc; + std::array tensor_dim_arr; + if (is_format_nchw) { + tensor_dim_arr[0] = t.getDim().getTensorDim(0); + tensor_dim_arr[1] = t.getDim().getTensorDim(1); + tensor_dim_arr[2] = t.getDim().getTensorDim(2); + tensor_dim_arr[3] = t.getDim().getTensorDim(3); + } else { + tensor_dim_arr[0] = t.getDim().getTensorDim(0); + tensor_dim_arr[1] = t.getDim().getTensorDim(2); + tensor_dim_arr[2] = t.getDim().getTensorDim(3); + tensor_dim_arr[3] = t.getDim().getTensorDim(1); + } + + for (size_t i = 0u, sz = t.size(); i < sz; ++i) { + iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue<_FP16>(i); + } + + if (is_format_nchw) { + loc[axis] += t.getDim().getTensorDim(axis); + } else { + if (axis == 0) { + loc[0] += t.getDim().getTensorDim(axis); + } else if (axis == 1) { + loc[3] += t.getDim().getTensorDim(axis); + } else if (axis == 2 || axis == 3) { + loc[axis - 1] += t.getDim().getTensorDim(axis); + } + } + } + return ret; +} + void HalfTensor::print(std::ostream &out) const { printInstance(out, this); const _FP16 *data = (_FP16 *)getData(); diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h index a905ba4bb8..3dc3a081b9 100644 --- a/nntrainer/tensor/half_tensor.h +++ b/nntrainer/tensor/half_tensor.h @@ -118,7 +118,13 @@ class HalfTensor : public TensorBase { * @brief return value at specific location * @param[in] idx location */ - const _FP16 getValue(unsigned int i) const; + const _FP16 &getValue(unsigned int i) const; + + /** + * @brief return value at specific location + * @param[in] idx location + */ + _FP16 &getValue(unsigned int i); /** * @brief return value at specific location @@ -127,8 +133,18 @@ class HalfTensor : public TensorBase { * @param[in] h height location * @param[in] w width location */ - const _FP16 getValue(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const; + const _FP16 &getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) const; + + /** + * @brief return value at specific location + * @param[in] b batch location + * @param[in] c channel location + * @param[in] h height location + * @param[in] w width location + */ + _FP16 &getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w); /** * @copydoc TensorV2::setValue(float value) @@ -301,6 +317,16 @@ class HalfTensor : public TensorBase { */ void zoneout_mask(TensorV2 &opposite, float zoneout) override; + /** + * @copydoc TensorV2::split(std::vector sizes, int axis) + */ + std::vector split(std::vector sizes, int axis) override; + + /** + * @copydoc TensorV2::cat(const std::vector &tensors, int axis) + */ + static TensorV2 cat(const std::vector &tensors, int axis); + /** * @copydoc TensorV2::copy(const TensorV2 &from) */ diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h index 28484ed9e9..a8a3fff017 100644 --- a/nntrainer/tensor/tensor_base.h +++ b/nntrainer/tensor/tensor_base.h @@ -327,6 +327,11 @@ class TensorBase { */ virtual void zoneout_mask(TensorV2 &opposite, float zoneout) = 0; + /** + * @copydoc TensorV2::split(std::vector sizes, int axis) + */ + virtual std::vector split(std::vector sizes, int axis) = 0; + /** * @copydoc TensorV2::print(std::ostream &out) */ diff --git a/nntrainer/tensor/tensor_v2.cpp b/nntrainer/tensor/tensor_v2.cpp index d37c6b2371..98fd095e6b 100644 --- a/nntrainer/tensor/tensor_v2.cpp +++ b/nntrainer/tensor/tensor_v2.cpp @@ -669,6 +669,69 @@ void TensorV2::zoneout_mask(TensorV2 &opposite, float zoneout) { itensor->zoneout_mask(opposite, zoneout); } +std::vector TensorV2::split(unsigned num_size, int axis) { + NNTR_THROW_IF(num_size == 0, std::invalid_argument) + << "num size cannot be zero"; + + if (axis == -1) { + axis = 3; + } + + NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument) + << "cannot split axis of axis: " << axis; + + NNTR_THROW_IF(getDim().getTensorDim(axis) % num_size != 0, + std::invalid_argument) + << "axis is not divisible by num_size, axis: " << axis + << " num size: " << num_size; + + std::vector sizes; + sizes.resize(num_size); + + unsigned int sz = getDim().getTensorDim(axis) / num_size; + std::fill(sizes.begin(), sizes.end(), sz); + + return split(sizes, axis); +} + +std::vector TensorV2::split(std::vector sizes, int axis) { + NNTR_THROW_IF(sizes.size() == 0, std::invalid_argument) + << "num size cannot be zero"; + + NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument) + << "cannot split axis of axis: " << axis; + + NNTR_THROW_IF( + std::any_of(sizes.begin(), sizes.end(), [](size_t sz) { return !sz; }), + std::invalid_argument) + << "among given sizes at least one of size is 0"; + + return itensor->split(sizes, axis); +} + +TensorV2 TensorV2::cat(const std::vector &tensors, int axis) { + NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument) + << "cannot split axis of axis: " << axis; + + NNTR_THROW_IF(tensors.empty(), std::invalid_argument) + << "given tensor vector is empty"; + + TensorV2 output; + Tdatatype dtype = tensors.front().getDim().getDataType(); + + if (dtype == Tdatatype::FP32) { + output = FloatTensor::cat(tensors, axis); + } else if (dtype == ml::train::TensorDim::DataType::FP16) { +#ifdef ENABLE_FP16 + output = HalfTensor::cat(tensors, axis); +#else + throw std::invalid_argument("Error: enable-fp16 is not enabled"); +#endif + } + + return output; +} + void TensorV2::print(std::ostream &out) const { itensor->print(out); } void TensorV2::putData() const { itensor->putData(); } diff --git a/nntrainer/tensor/tensor_v2.h b/nntrainer/tensor/tensor_v2.h index ba0bf36a4d..9c1171adb9 100644 --- a/nntrainer/tensor/tensor_v2.h +++ b/nntrainer/tensor/tensor_v2.h @@ -1010,6 +1010,35 @@ class TensorV2 { */ void zoneout_mask(TensorV2 &opposite, float zoneout); + /** + * @brief split tensor along axis. + * + * @param num_size num_size + * @param axis axis + * @return Tensor splitted tensor + */ + std::vector split(unsigned num_size, int axis = 0); + + /** + * @brief split tensor along axis. + * + * @param sizes sizes + * @param axis axis + * @return Tensor splitted tensor + * @note if the given array sizes is just a 1 unsigned int value, assumes that + * it divide tensor by given size evenly + */ + std::vector split(std::vector sizes, int axis = 0); + + /** + * @brief concatenate tensors along axis + * + * @param tensors tensors to be concatenated to the first tensor + * @param axis axis + * @return Tensor concatenated tensor + */ + static TensorV2 cat(const std::vector &tensors, int axis = 0); + /** * @brief Print element * @param[in] out out stream