Skip to content

Commit

Permalink
[Layers] Modify layers for data type
Browse files Browse the repository at this point in the history
It is assumed that activations and weight are fully compotaible,
so it's unnecessary to be converted to.
input layer and loss layres are different, cause input data and label
data is assumed to be always float 32 type now.

Signed-off-by: Jiho Chu <[email protected]>
  • Loading branch information
jihochu authored and DonghakPark committed May 27, 2024
1 parent 27c9f0f commit 600f996
Show file tree
Hide file tree
Showing 16 changed files with 560 additions and 215 deletions.
12 changes: 9 additions & 3 deletions nntrainer/layers/bn_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
context.requestWeight(dim, bnparams_beta, WeightRegularizer::NONE, 1.0f,
bias_decay, "beta", true);

/**
* @note declare weigth dimention with activation datatype
*/
TensorDim w_dim = dim;
w_dim.setDataType(in_dim.getDataType());

/**
* caches the deviation -> input - avg(input)
* @todo check if avoiding this storage and adding dependency on input (no
Expand All @@ -121,7 +127,7 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
TensorLifespan::ITERATION_LIFESPAN);
/** caches the inverse standard deviation */
wt_idx[BNParams::invstd] =
context.requestTensor(dim, "invstd", Tensor::Initializer::NONE, false,
context.requestTensor(w_dim, "invstd", Tensor::Initializer::NONE, false,
TensorLifespan::ITERATION_LIFESPAN);
/**
* Temporary tensor to store the full sized tensors in order to allow batch
Expand All @@ -136,13 +142,13 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
* caches variance + epsilon as well.
*/
wt_idx[BNParams::cvar] =
context.requestTensor(dim, "cvar", Tensor::Initializer::NONE, false,
context.requestTensor(w_dim, "cvar", Tensor::Initializer::NONE, false,
TensorLifespan::ITERATION_LIFESPAN);
/**
* Temporary tensor to store the reduced tensors along the axes_to_reduce.
*/
wt_idx[BNParams::t_reduced] =
context.requestTensor(dim, "tensor_reduced", Tensor::Initializer::NONE,
context.requestTensor(w_dim, "tensor_reduced", Tensor::Initializer::NONE,
false, TensorLifespan::FORWARD_DERIV_LIFESPAN);
}

Expand Down
160 changes: 101 additions & 59 deletions nntrainer/layers/conv2d_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ namespace {
static TensorDim calcCol2ImOutputDim(const TensorDim &out,
const TensorDim &kdim) {

return TensorDim({kdim.getFeatureLen(), out.width() * out.height()});
return TensorDim({kdim.getFeatureLen(), out.width() * out.height()},
out.getTensorType());
}

/**
Expand All @@ -56,7 +57,10 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
const std::array<props::Stride, CONV2D_DIM> &mstride,
const std::array<props::Dilation, CONV2D_DIM> &dilation,
Tensor &image) {
auto [pt, pb, pl, pr] = padding;
auto pt = padding[0];
auto pb = padding[1];
auto pl = padding[2];
auto pr = padding[3];

unsigned k_height = kdim.height();
unsigned k_width = kdim.width();
Expand Down Expand Up @@ -84,32 +88,48 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
int h_stride_end = im_eff_height - eff_k_height - pt;
int w_stride_end = im_eff_width - eff_k_width - pl;

unsigned col_w = 0;
for (int hs = -pt; hs <= h_stride_end; hs += hstride) {
for (int ws = -pl; ws <= w_stride_end; ws += wstride) {
unsigned col_h = 0;
int patch_height_end = hs + eff_k_height;
int patch_width_end = ws + eff_k_width;
for (unsigned c = 0; c < im_channel; c++) {
for (int h = hs; h < patch_height_end; h += hdilation) {
if (h < 0 || im_height <= h) {
col_h += k_width;
continue;
}
for (int w = ws; w < patch_width_end; w += wdilation) {
if (w < 0 || im_width <= w) {
col_h++;
auto apply_data = [&]<typename T>(T *val) {
unsigned col_w = 0;
for (int hs = -pt; hs <= h_stride_end; hs += hstride) {
for (int ws = -pl; ws <= w_stride_end; ws += wstride) {
unsigned col_h = 0;
int patch_height_end = hs + eff_k_height;
int patch_width_end = ws + eff_k_width;
for (unsigned c = 0; c < im_channel; c++) {
for (int h = hs; h < patch_height_end; h += hdilation) {
if (h < 0 || im_height <= h) {
col_h += k_width;
continue;
}

float *val = image.getAddress<float>(0, c, h, w);
*val += col_matrix.getValue<float>(0, 0, col_h, col_w);
col_h++;
for (int w = ws; w < patch_width_end; w += wdilation) {
if (w < 0 || im_width <= w) {
col_h++;
continue;
}

val = image.getAddress<T>(0, c, h, w);
*val += col_matrix.getValue<T>(0, 0, col_h, col_w);
col_h++;
}
}
}
col_w++;
}
col_w++;
}
};

if (image.getDataType() == nntrainer::Tdatatype::FP32) {
float val;
apply_data(&val);
}
#ifdef ENABLE_FP16
else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
_FP16 val;
apply_data(&val);
}
#endif
else {
throw std::runtime_error("Not supported datatype");
}
}

Expand Down Expand Up @@ -179,7 +199,10 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
// }
*/

auto [pt, pb, pl, pr] = padding;
auto pt = padding[0];
auto pb = padding[1];
auto pl = padding[2];
auto pr = padding[3];

unsigned int channel = in.channel();
int in_height = in.height();
Expand All @@ -198,46 +221,62 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
unsigned int out_width = (width - eff_k_width) / mstride[1] + 1;

out.reshape(
TensorDim({out_height * out_width, in.channel() * k_height * k_width}));
float *out_data = out.getData();

int h_stride_end = height - eff_k_height - pt;
int w_stride_end = width - eff_k_width - pl;

/// get a patch, size of kernel
/// hs is height_strided, ws is width_strided
unsigned int owidth = out.width();
unsigned int base_im_w = 0;
for (int hs = -pt; hs <= h_stride_end; hs += mstride[0]) {
unsigned int base_im_h = 0;
int patch_height_end = eff_k_height + hs;
/// map the patch to a single line looping through channel
for (unsigned int c = 0; c < channel; ++c) {
for (int h = hs; h < patch_height_end; h += dilation[0]) {
if (h < 0 || in_height <= h) {
base_im_h += k_width;
continue;
}

unsigned int im_w = base_im_w;
for (int ws = -pl; ws <= w_stride_end; ws += mstride[1]) {
unsigned int im_h = base_im_h;
int patch_width_end = eff_k_width + ws;
TensorDim({out_height * out_width, in.channel() * k_height * k_width},
in.getTensorType()));

auto apply_data = [&]<typename T>(T *out_data) {
int h_stride_end = height - eff_k_height - pt;
int w_stride_end = width - eff_k_width - pl;

/// get a patch, size of kernel
/// hs is height_strided, ws is width_strided
unsigned int owidth = out.width();
unsigned int base_im_w = 0;
for (int hs = -pt; hs <= h_stride_end; hs += mstride[0]) {
unsigned int base_im_h = 0;
int patch_height_end = eff_k_height + hs;
/// map the patch to a single line looping through channel
for (unsigned int c = 0; c < channel; ++c) {
for (int h = hs; h < patch_height_end; h += dilation[0]) {
if (h < 0 || in_height <= h) {
base_im_h += k_width;
continue;
}

for (int w = ws; w < patch_width_end; w += dilation[1]) {
if (w < 0 || in_width <= w) {
unsigned int im_w = base_im_w;
for (int ws = -pl; ws <= w_stride_end; ws += mstride[1]) {
unsigned int im_h = base_im_h;
int patch_width_end = eff_k_width + ws;

for (int w = ws; w < patch_width_end; w += dilation[1]) {
if (w < 0 || in_width <= w) {
im_h++;
continue;
}
out_data[im_w * owidth + im_h] = in.getValue<T>(0, c, h, w);
im_h++;
continue;
}
out_data[im_w * owidth + im_h] = in.getValue<float>(0, c, h, w);
im_h++;
im_w++;
}
im_w++;
base_im_h += k_width;
}
base_im_h += k_width;
}
base_im_w += out_width;
}
base_im_w += out_width;
};

if (out.getDataType() == nntrainer::Tdatatype::FP32) {
float *out_data = out.getData<float>();
apply_data(out_data);
}
#ifdef ENABLE_FP16
else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
_FP16 *out_data = out.getData<_FP16>();
apply_data(out_data);
}
#endif
else {
throw std::runtime_error("Not supported datatype");
}
}

Expand Down Expand Up @@ -279,9 +318,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
auto &dilation =
std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);

TensorDim kernel_dim =
TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1]);
TensorDim bias_dim = TensorDim(1, filter_size, 1, 1);
auto in_t_type = in_dim.getTensorType();
in_t_type.data_type = context.getWeightDataType();
TensorDim kernel_dim = TensorDim(filter_size, in_dim.channel(),
kernel_size[0], kernel_size[1], in_t_type);
TensorDim bias_dim = TensorDim(1, filter_size, 1, 1, in_t_type);

padding = std::get<props::Padding2D>(conv_props)
.compute(in_dim, kernel_dim, {stride[0], stride[1]},
Expand Down Expand Up @@ -309,6 +350,7 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
out_dim.channel(filter_size);
out_dim.height((eff_in_height - eff_k_height) / stride[0] + 1);
out_dim.width((eff_in_width - eff_k_width) / stride[1] + 1);
out_dim.setTensorType(in_dim.getTensorType());
context.setOutputDimensions({out_dim});

NNTR_THROW_IF(eff_in_height < kernel_size[0] || eff_in_width < kernel_size[1],
Expand Down
51 changes: 51 additions & 0 deletions nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
return weights[idx]->getVariableRef();
}

/**
* @brief Get the Weight tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight tensor
*/
Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
return weights[idx]->getVariableMasterRef();
}

/**
* @brief Get the Weight Gradient tensor object
*
Expand Down Expand Up @@ -195,6 +205,18 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
return weights[idx]->getOptimizerVariableRef(jdx);
}

/**
* @brief Get the Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
* @param jdx Identifier of the optimizer variables
* @return Tensor& Reference to the weight optimizer variable tensor
*/
Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
unsigned int jdx) const {
return weights[idx]->getOptimizerMasterVariableRef(jdx);
}

/**
* @brief Get the Number of Weight Optimizer Variable tensor object
*
Expand All @@ -205,6 +227,16 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
return weights[idx]->getNumOptVariable();
}

/**
* @brief Get the Number of Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
* @return int Number of the weight optimizer variable
*/
unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
return weights[idx]->getNumOptMasterVariable();
}

/**
* @brief Get regularization loss for the weight
*
Expand Down Expand Up @@ -344,6 +376,25 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
return getInputGrad(idx);
}

bool RunLayerContext::validateDerivatives() {
auto num_in = getNumInputs();
auto num_out = getNumOutputs();

for (unsigned int i = 0; i < num_in; ++i) {
auto deriv = getIncomingDerivative(i);
if (deriv.checkDataValidation(false) == false)
return false;
}

for (unsigned int i = 0; i < num_out; ++i) {
auto deriv = getOutgoingDerivative(i);
if (deriv.checkDataValidation(false) == false)
return false;
}

return true;
}

/**
* @brief Get the Tensor object
*
Expand Down
Loading

0 comments on commit 600f996

Please sign in to comment.