Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ Layer ] Move the Weight Read Function to Layer object #2856

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions nntrainer/layers/bn_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,4 +376,49 @@ void BatchNormalizationLayer::setBatch(RunLayerContext &context,
}
}

void BatchNormalizationLayer::read(std::ifstream &file,
RunLayerContext &run_context, bool opt_var,
ml::train::ExecutionMode mode,
bool trainable,
TensorDim::DataType definedWeightDataType) {
if (opt_var) {
for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
if (run_context.isGradientLastAccess(i) && trainable) {
/// @note read optimizer variables
for (unsigned int j = 0; j < run_context.getNumWeightOptVar(i); ++j) {
run_context.getWeightOptVar(i, j).read(file);
}
}
}
} else {
for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
/// @note shared weights are only be read at the first acecss
// if (run_context->isGradientLastAccess(i)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// if (run_context->isGradientLastAccess(i)) {

let's remove this line

if (run_context.isGradientFirstAccess(i)) {
if ((mode == ml::train::ExecutionMode::TRAIN) &&
(definedWeightDataType != TensorDim::DataType::FP32)) {

/** @note for batch normalization layer, we do need full
precision
* for training. but weight can be saved with other type. for
* training, bn weight type is fixed with full precsion */

TensorDim dim = run_context.getWeight(i).getDim();
dim.setDataType(definedWeightDataType);
Tensor T_read(dim, true);
Comment on lines +406 to +408
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
TensorDim dim = run_context.getWeight(i).getDim();
dim.setDataType(definedWeightDataType);
Tensor T_read(dim, true);
Tensor T_read = run_context.getWeight(i).clone(definedWeightDataType);

we can utilize Tensor::clone here :)

T_read.read(file);
run_context.getWeight(i).copyData(T_read);
} else {
run_context.getWeight(i).read(file);
}

if (run_context.isMixedPrecision(i) && trainable &&
!run_context.getWeightFP32(i).empty()) {
run_context.getWeightFP32(i).copyData(run_context.getWeight(i));
}
}
}
}
}

} /* namespace nntrainer */
9 changes: 9 additions & 0 deletions nntrainer/layers/bn_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@ class BatchNormalizationLayer : public Layer {

inline static const std::string type = "batch_normalization";

/**
* @copydoc Layer::read(std::ifstream &file, RunLayerContext &context, bool
* opt_var, ml::train::ExecutionMode mode, bool trainable, TensorDim::DataType
* definedWeightDataType)
*/
void read(std::ifstream &file, RunLayerContext &context, bool opt_var,
ml::train::ExecutionMode mode, bool trainable,
TensorDim::DataType definedWeightDataType) override;

private:
float divider; /**< size of the axes of the reduced */

Expand Down
40 changes: 39 additions & 1 deletion nntrainer/layers/layer_devel.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <base_properties.h>
#include <common.h>
#include <layer_context.h>
#include <tensor_dim.h>

namespace ml::train {
Expand All @@ -38,7 +39,6 @@ class Layer;
namespace nntrainer {

class InitLayerContext;
class RunLayerContext;
class Exporter;

/**
Expand Down Expand Up @@ -315,6 +315,44 @@ class Layer {
*/
virtual bool supportBackwarding() const = 0;

/**
* @brief read layer Weight & Bias data from file
* @param file input file stream
* @param run context for layer
* @param bool read optimizer variables
* @param mode execution mode
* @param bool trainable
* @param type Required Weight Tensor Type from Network
*
*/
virtual void read(std::ifstream &file, RunLayerContext &run_context,
bool opt_var, ml::train::ExecutionMode mode, bool trainable,
TensorDim::DataType defineWeightDataType) {
if (opt_var) {
for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
if (run_context.isGradientLastAccess(i) && trainable) {
/// @note read optimizer variables
for (unsigned int j = 0; j < run_context.getNumWeightOptVar(i); ++j) {
run_context.getWeightOptVar(i, j).read(file);
}
}
}
} else {

for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
/// @note shared weights are only be read at the first acecss
if (run_context.isGradientFirstAccess(i)) {
run_context.getWeight(i).read(file);

if (run_context.isMixedPrecision(i) && trainable &&
!run_context.getWeightFP32(i).empty()) {
run_context.getWeightFP32(i).copyData(run_context.getWeight(i));
}
}
}
}
}

protected:
bool is_inplace = false; /**< whether this layer is in-place or not */
};
Expand Down
45 changes: 3 additions & 42 deletions nntrainer/layers/layer_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,51 +500,12 @@ void LayerNode::exportTo(Exporter &exporter,

void LayerNode::read(std::ifstream &file, bool opt_var,
ml::train::ExecutionMode mode) {

NNTR_THROW_IF(!run_context, std::runtime_error)
<< __func__ << " layer needs to be finalized first!";

if (opt_var) {
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
if (run_context->isGradientLastAccess(i) && getTrainable()) {
/// @note read optimizer variables
for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
run_context->getWeightOptVar(i, j).read(file);
}
}
}
} else {

for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
/// @note shared weights are only be read at the first acecss
// if (run_context->isGradientLastAccess(i)) {
if (run_context->isGradientFirstAccess(i)) {
if (layer->getType() == BatchNormalizationLayer::type) {
if ((mode == ml::train::ExecutionMode::TRAIN) &&
(this->getWeightDataType() != TensorDim::DataType::FP32)) {

/** @note for batch normalization layer, we do need full precision
* for training. but weight can be saved with other type. for
* training, bn weight type is fixed with full precsion */

TensorDim dim = run_context->getWeight(i).getDim();
dim.setDataType(this->getWeightDataType());
Tensor T_read(dim, true);
T_read.read(file);
run_context->getWeight(i).copyData(T_read);
} else {
run_context->getWeight(i).read(file);
}
} else {
run_context->getWeight(i).read(file);
}

if (run_context->isMixedPrecision(i) && getTrainable() &&
!run_context->getWeightFP32(i).empty()) {
run_context->getWeightFP32(i).copyData(run_context->getWeight(i));
}
}
}
}
getLayer()->read(file, *run_context, opt_var, mode, getTrainable(),
getWeightDataType());
}

void LayerNode::save(std::ofstream &file, bool opt_var,
Expand Down
Loading