Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Layer] Remove Tensor setDataType() usage #2498

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions nntrainer/layers/layer_normalization_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ void LayerNormalizationLayer::incremental_forwarding(RunLayerContext &context,
deviation.multiply(inv_std_dev, output);
output.multiply_i(gamma);
output.add_i(beta);

}

void LayerNormalizationLayer::calcDerivative(RunLayerContext &context) {
Expand All @@ -236,8 +235,8 @@ void LayerNormalizationLayer::calcDerivative(RunLayerContext &context) {
TensorDim::TensorType weight_tensor_type =
context.getWeight(wt_idx[LNParams::gamma]).getTensorType();

Tensor empty;
empty.setTensorType(weight_tensor_type);
Tensor empty =
Tensor("empty", weight_tensor_type.format, weight_tensor_type.data_type);
Copy link
Collaborator

@jijoongmoon jijoongmoon Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can set the format and data_type with weight_tensor_type. No need to set separately. Also, we do not need to set the tensor name always. I think when we create tensor like here, Tensor empty, then it does not allocate the buffer. So.. I think it is ok to set the Tensor type here. The reason we set like this.. we do not want to allocate a buffer at this time.

Copy link
Contributor Author

@djeong20 djeong20 Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we discussed offline, Tensor empty; would create FloatTensor as a default.

nntrainer::Tensor t;
if (t.getDataType() == nntrainer::Tdatatype::FP32) {
  // This would be executed
  std::cout << "FP32" << std::endl;
} else {
  std::cout << "FP16" << std::endl;
}

I will make an additional Tensor constructor that only takes data type without allocating a buffer (e.g., Tensor empty = Tensor(weight_tensor_type.data_type);)


Tensor &outgoing_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
const Tensor &incoming_derivative =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

auto dataType = y.getDataType();

Tensor ret;
ret.setDataType(dataType);
Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
if (dataType == ml::train::TensorDim::DataType::FP32) {
y.apply(ActiFunc::softmax<float>, ret);
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
Expand Down
25 changes: 13 additions & 12 deletions nntrainer/layers/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ void LSTMLayer::forwardingBatchFirstLSTM(
for (unsigned int t = 0; t < max_timestep; ++t) {
Tensor input = input_sample.getSharedDataTensor(
input_tensor_dim, (reverse ? max_timestep - 1 - t : t) * feature_size);
Tensor prev_hidden_state;
prev_hidden_state.setTensorType(tensor_type);

Tensor prev_hidden_state = Tensor(
"prev_hidden_state", weight_ih.getFormat(), weight_ih.getDataType());

if (!t) {
prev_hidden_state = Tensor(unit, tensor_type);
Expand Down Expand Up @@ -285,8 +286,9 @@ void LSTMLayer::calcGradientBatchFirstLSTM(

// Temporary variable for d_prev_hidden_state. d_prev_hidden_state
// already have precalculated values from incomming derivatives
Tensor d_prev_hidden_state_temp;
d_prev_hidden_state_temp.setTensorType(tensor_type);
Tensor d_prev_hidden_state_temp =
Tensor("d_prev_hidden_state_temp", tensor_type.format,
tensor_type.data_type);

calcGradientLSTM(
1, unit, disable_bias, integrate_bias, acti_func,
Expand Down Expand Up @@ -383,8 +385,9 @@ void LSTMLayer::calcGradientBatchFirstLSTM(

// Temporary variable for d_prev_hidden_state. d_prev_hidden_state
// already have precalculated values from incomming derivatives
Tensor d_prev_hidden_state_temp;
d_prev_cell_state.setTensorType(tensor_type);
Tensor d_prev_hidden_state_temp =
Tensor("d_prev_hidden_state_temp", tensor_type.format,
tensor_type.data_type);

calcGradientLSTM(1, unit, disable_bias, integrate_bias, acti_func,
recurrent_acti_func, input, prev_hidden_state,
Expand Down Expand Up @@ -652,9 +655,8 @@ void LSTMLayer::forwarding(RunLayerContext &context, bool training) {
const Tensor &weight_ih = context.getWeight(wt_idx[LSTMParams::weight_ih]);
const Tensor &weight_hh = context.getWeight(wt_idx[LSTMParams::weight_hh]);

TensorDim::TensorType weight_tensor_type = weight_ih.getTensorType();
Tensor empty;
empty.setTensorType(weight_tensor_type);
Tensor empty =
Tensor("empty", weight_ih.getFormat(), weight_ih.getDataType());

const Tensor &bias_h = !disable_bias && integrate_bias
? context.getWeight(wt_idx[LSTMParams::bias_h])
Expand Down Expand Up @@ -836,9 +838,8 @@ void LSTMLayer::calcGradient(RunLayerContext &context) {
const Tensor &weight_hh = context.getWeight(wt_idx[LSTMParams::weight_hh]);
Tensor &d_weight_hh = context.getWeightGrad(wt_idx[LSTMParams::weight_hh]);

TensorDim::TensorType weight_tensor_type = weight_hh.getTensorType();
Tensor empty;
empty.setTensorType(weight_tensor_type);
Tensor empty =
Tensor("empty", weight_hh.getFormat(), weight_hh.getDataType());

Tensor &d_bias_h = !disable_bias && integrate_bias
? context.getWeightGrad(wt_idx[LSTMParams::bias_h])
Expand Down
14 changes: 6 additions & 8 deletions nntrainer/layers/lstmcell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,8 @@ void LSTMCellLayer::forwarding(RunLayerContext &context, bool training) {
const Tensor &weight_hh =
context.getWeight(wt_idx[LSTMCellParams::weight_hh]);

TensorDim::TensorType weight_tensor_type = weight_ih.getTensorType();
Tensor empty;
empty.setTensorType(weight_tensor_type);
Tensor empty =
Tensor("empty", weight_ih.getFormat(), weight_ih.getDataType());

const Tensor &bias_h = !disable_bias && integrate_bias
? context.getWeight(wt_idx[LSTMCellParams::bias_h])
Expand Down Expand Up @@ -286,9 +285,8 @@ void LSTMCellLayer::calcGradient(RunLayerContext &context) {
Tensor &d_weight_hh =
context.getWeightGrad(wt_idx[LSTMCellParams::weight_hh]);

TensorDim::TensorType weight_tensor_type = weight_hh.getTensorType();
Tensor empty;
empty.setTensorType(weight_tensor_type);
Tensor empty =
Tensor("empty", weight_hh.getFormat(), weight_hh.getDataType());

Tensor &d_bias_h = !disable_bias && integrate_bias
? context.getWeightGrad(wt_idx[LSTMCellParams::bias_h])
Expand Down Expand Up @@ -324,8 +322,8 @@ void LSTMCellLayer::calcGradient(RunLayerContext &context) {
}
}

Tensor d_hidden_state_masked;
d_hidden_state_masked.setTensorType(weight_tensor_type);
Tensor d_hidden_state_masked = Tensor(
"d_hidden_state_masked", weight_hh.getFormat(), weight_hh.getDataType());

if (dropout_rate > epsilon) {
Tensor &dropout_mask =
Expand Down
4 changes: 2 additions & 2 deletions nntrainer/layers/lstmcell_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ void LSTMCore::calcGradientLSTM(
Tensor d_output_gate = d_ifgo.getSharedDataTensor(
{batch_size, 1, 1, unit, tensor_type}, unit * 3, false);

Tensor activated_cell_state;
activated_cell_state.setTensorType(cell_state.getTensorType());
Tensor activated_cell_state = Tensor(
"activated_cell_state", cell_state.getFormat(), cell_state.getDataType());

acti_func.run_fn(cell_state, activated_cell_state);
d_hidden_state.multiply_strided(activated_cell_state, d_output_gate);
Expand Down
14 changes: 6 additions & 8 deletions nntrainer/layers/multi_head_attention_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,8 +500,8 @@ void MultiHeadAttentionLayer::forwarding(RunLayerContext &context,

for (unsigned int i = 0; i < mask_dim_height; ++i) {
for (unsigned int j = i + 1; j < mask_dim_width; ++j) {
causal_mask.setValue(
0, 0, i, j, _MASK_NUM(attention_weight.getDataType()));
causal_mask.setValue(0, 0, i, j,
_MASK_NUM(attention_weight.getDataType()));
}
}

Expand Down Expand Up @@ -629,9 +629,8 @@ void MultiHeadAttentionLayer::initial_incremental_forwarding(
Tensor &key = context.getInput(INOUT_INDEX::KEY);
Tensor &value = context.getInput(INOUT_INDEX::VALUE);

Tensor empty_tensor;

empty_tensor.setTensorType(value.getTensorType());
Tensor empty_tensor =
Tensor("empty_tensor", value.getFormat(), value.getDataType());

Tensor &mask =
provide_attention_mask ? context.getInput(INOUT_INDEX::MASK) : empty_tensor;
Expand Down Expand Up @@ -909,9 +908,8 @@ void MultiHeadAttentionLayer::incremental_forwarding(RunLayerContext &context,
Tensor &key = context.getInput(INOUT_INDEX::KEY);
Tensor &value = context.getInput(INOUT_INDEX::VALUE);

Tensor empty_tensor;

empty_tensor.setTensorType(value.getTensorType());
Tensor empty_tensor =
Tensor("empty_tensor", value.getFormat(), value.getDataType());

Tensor &mask =
provide_attention_mask ? context.getInput(INOUT_INDEX::MASK) : empty_tensor;
Expand Down
Loading