diff --git a/nntrainer/cl_context.cpp b/nntrainer/cl_context.cpp index b6c200792b..f09116091a 100644 --- a/nntrainer/cl_context.cpp +++ b/nntrainer/cl_context.cpp @@ -53,9 +53,11 @@ static void add_default_object(ClContext &cc) { ml::train::LayerType::LAYER_RESHAPE); } - // @todo rmsnormlayercl also needs to be updated. - cc.registerFactory(nntrainer::createLayer, - RMSNormLayerCl::type, ml::train::LayerType::LAYER_RMSNORM); + if (RMSNormLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + RMSNormLayerCl::type, + ml::train::LayerType::LAYER_RMSNORM); + } if (ConcatLayerCl::registerClKernels()) { cc.registerFactory(nntrainer::createLayer, diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp index c5802291d8..bd0b2f7fce 100644 --- a/nntrainer/layers/bn_layer.cpp +++ b/nntrainer/layers/bn_layer.cpp @@ -50,10 +50,10 @@ enum BNParams { BatchNormalizationLayer::BatchNormalizationLayer() : Layer(), divider(0), - bn_props(props::Epsilon(), props::BNPARAMS_MU_INIT(), - props::BNPARAMS_VAR_INIT(), props::BNPARAMS_BETA_INIT(), - props::BNPARAMS_GAMMA_INIT(), props::Momentum(), props::Axis(), - props::WeightDecay(), props::BiasDecay()) { + bn_props(props::Epsilon(), props::MuInitializer(), props::VarInitializer(), + props::BetaInitializer(), props::GammaInitializer(), + props::Momentum(), props::Axis(), props::WeightDecay(), + props::BiasDecay()) { wt_idx.fill(std::numeric_limits::max()); } @@ -62,10 +62,10 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) { NNTR_THROW_IF(context.getNumInputs() != 1, std::invalid_argument) << "Only one input is allowed for batch normalization layer"; - auto &bnparams_mu = std::get(bn_props); - auto &bnparams_var = std::get(bn_props); - auto &bnparams_beta = std::get(bn_props); - auto &bnparams_gamma = std::get(bn_props); + auto &bnparams_mu = std::get(bn_props); + auto &bnparams_var = std::get(bn_props); + auto &bnparams_beta = std::get(bn_props); + auto &bnparams_gamma = std::get(bn_props); auto &weight_decay = std::get(bn_props); auto &bias_decay = std::get(bn_props); diff --git a/nntrainer/layers/bn_layer.h b/nntrainer/layers/bn_layer.h index f8d611cd9d..22b1ea3ef0 100644 --- a/nntrainer/layers/bn_layer.h +++ b/nntrainer/layers/bn_layer.h @@ -126,9 +126,9 @@ class BatchNormalizationLayer : public Layer { std::vector axes_to_reduce; /**< target axes to reduce */ std::array wt_idx; /**< indices of the weights and tensors */ - std::tuple + std::tuple bn_props; }; diff --git a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp index 179b89fa8a..fe05129959 100644 --- a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp +++ b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp @@ -91,13 +91,12 @@ static constexpr size_t SINGLE_INOUT_IDX = 0; enum RMSParams { gamma }; -RMSNormLayerCl::RMSNormLayerCl() : LayerImpl() { wt_idx.fill(0); } +RMSNormLayerCl::RMSNormLayerCl() : LayerImplCl() { wt_idx.fill(0); } void RMSNormLayerCl::finalize(InitLayerContext &context) { std::vector dim = context.getInputDimensions(); context.setOutputDimensions(dim); - auto &rmsparams_gamma = - std::get(rmsnorm_props); + auto &rmsparams_gamma = std::get(rmsnorm_props); TensorDim gamma_dim( 1, 1, 1, dim[0].width(), @@ -123,9 +122,6 @@ void RMSNormLayerCl::forwarding(RunLayerContext &context, bool training) { } } -opencl::Kernel RMSNormLayerCl::kernel_rmsnorm; -opencl::Kernel RMSNormLayerCl::kernel_rmsnorm_fp16; - void RMSNormLayerCl::rmsnormProcess(Tensor const &input, Tensor &result, Tensor const &gamma, const float epsilon) { bool ret = false; @@ -138,11 +134,8 @@ void RMSNormLayerCl::rmsnormProcess(Tensor const &input, Tensor &result, int w = input.width(); do { - ClContext::SharedPtrClKernel kernel_rmsnorm_ptr = - cl_context_ref.registerClKernel(rmsnorm_cl_kernel_, "rmsnorm_cl"); - if (!kernel_rmsnorm_ptr) { - break; - } + + auto kernel_rmsnorm_ptr = layer_kernel_ptrs[Kernels::RMSNORM_CL]; opencl::Buffer inputbuf(cl_context_ref.context_inst_, dim1 * sizeof(float), true, nullptr); @@ -219,6 +212,7 @@ void RMSNormLayerCl::rmsnormProcess(Tensor const &input, Tensor &result, } while (false); } +#ifdef ENABLE_FP16 void RMSNormLayerCl::rmsnormProcess_fp16(Tensor const &input, Tensor &result, Tensor const &gamma, const float epsilon) { @@ -232,12 +226,8 @@ void RMSNormLayerCl::rmsnormProcess_fp16(Tensor const &input, Tensor &result, int h = input.height(); int w = input.width(); do { - ClContext::SharedPtrClKernel kernel_rmsnorm_ptr = - cl_context_ref.registerClKernel(rmsnorm_cl_kernel_fp16_, - "rmsnorm_cl_fp16"); - if (!kernel_rmsnorm_ptr) { - break; - } + auto kernel_rmsnorm_ptr = layer_kernel_ptrs[Kernels::RMSNORM_CL_FP16]; + opencl::Buffer inputbuf(cl_context_ref.context_inst_, dim1 * sizeof(cl_half), true, nullptr); @@ -308,6 +298,7 @@ void RMSNormLayerCl::rmsnormProcess_fp16(Tensor const &input, Tensor &result, } } while (false); } +#endif void RMSNormLayerCl::incremental_forwarding(nntrainer::RunLayerContext &context, unsigned int from, unsigned int to, @@ -339,7 +330,11 @@ void RMSNormLayerCl::incremental_forwarding(nntrainer::RunLayerContext &context, if (in_step.getDataType() == ml::train::TensorDim::DataType::FP32) { rmsnormProcess(in, out, gamma, epsilon); } else { +#ifdef ENABLE_FP16 rmsnormProcess_fp16(in, out, gamma, epsilon); +#else + throw std::runtime_error("enable-fp16 is not enabled"); +#endif } } @@ -362,4 +357,44 @@ void RMSNormLayerCl::setProperty(const std::vector &values) { LayerImpl::setProperty(remain_props); } +bool RMSNormLayerCl::registerClKernels() { + + // check if already registered + if (!layer_kernel_ptrs.empty()) { + ml_loge("kernels for concat layer are already registered."); + return false; + } + + do { + + ClContext::SharedPtrClKernel kernel_rmsnorm_ptr = nullptr; + + kernel_rmsnorm_ptr = + cl_context_ref.registerClKernel(rmsnorm_cl_kernel_, "rmsnorm_cl"); + if (!kernel_rmsnorm_ptr) { + ml_loge("OpenCL Error: Fail to register rmsnorm_cl kernel"); + break; + } + layer_kernel_ptrs.emplace_back(kernel_rmsnorm_ptr); + +#ifdef ENABLE_FP16 + kernel_rmsnorm_ptr = cl_context_ref.registerClKernel( + rmsnorm_cl_kernel_fp16_, "rmsnorm_cl_fp16"); + if (!kernel_rmsnorm_ptr) { + ml_loge("OpenCL Error: Fail to register rmsnorm_cl_fp16 kernel"); + break; + } + layer_kernel_ptrs.emplace_back(kernel_rmsnorm_ptr); +#endif + + return true; + + } while (false); + + // clear all registered kernels if any error occurs during registration + layer_kernel_ptrs.clear(); + + return false; +} + } // namespace nntrainer diff --git a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h index 43f942ea1e..d6f3225603 100644 --- a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h +++ b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h @@ -16,7 +16,7 @@ #ifdef __cplusplus #include -#include +#include #include #include @@ -25,36 +25,11 @@ namespace nntrainer { -namespace props { - -/** - * @brief RMS_NORM_GAMMA_INIT_GPU Initialization Enumeration Information - * - */ -class RMS_NORM_GAMMA_INIT_GPU final - : public ::nntrainer::EnumProperty<::nntrainer::props::InitializerInfo> { -public: - /** - * @brief Construct a RMS_NORM_GAMMA_INIT object - */ - RMS_NORM_GAMMA_INIT_GPU( - ::nntrainer::Initializer value = ::nntrainer::Initializer::ONES) { - set(value); - }; - using prop_tag = enum_class_prop_tag; - static constexpr const char *key = "gamma_initializer"; -}; -}; // namespace props - /** * @class RMSNormLayer * @brief RMS Norm layer */ - -class RMSNormLayerCl : public LayerImpl { - -private: - inline static ClContext cl_context_ref; +class RMSNormLayerCl : public LayerImplCl { public: /** @@ -118,9 +93,6 @@ class RMSNormLayerCl : public LayerImpl { */ const std::string getType() const override { return RMSNormLayerCl::type; }; - static opencl::Kernel kernel_rmsnorm; - static opencl::Kernel kernel_rmsnorm_fp16; - /** * @brief Process data and dimensions for rms norm operation * @param[in] input Tensor @@ -153,12 +125,26 @@ class RMSNormLayerCl : public LayerImpl { */ void setProperty(const std::vector &values) override; + /** + * @brief registerClKernels + */ + static bool registerClKernels(); + inline static const std::string type = "rmsnorm"; private: std::array wt_idx; - std::tuple + + std::tuple rmsnorm_props; /**< rmsnorm layer properties */ + + inline static std::vector + layer_kernel_ptrs; /**< kernel list relevant with this layer */ + + enum Kernels { + RMSNORM_CL, + RMSNORM_CL_FP16, + }; }; } // namespace nntrainer diff --git a/nntrainer/layers/common_properties.cpp b/nntrainer/layers/common_properties.cpp index e1eac32986..c38700bca6 100644 --- a/nntrainer/layers/common_properties.cpp +++ b/nntrainer/layers/common_properties.cpp @@ -314,13 +314,13 @@ WeightInitializer::WeightInitializer(Initializer value) { set(value); } BiasInitializer::BiasInitializer(Initializer value) { set(value); } -BNPARAMS_MU_INIT::BNPARAMS_MU_INIT(Initializer value) { set(value); } +MuInitializer::MuInitializer(Initializer value) { set(value); } -BNPARAMS_VAR_INIT::BNPARAMS_VAR_INIT(Initializer value) { set(value); } +VarInitializer::VarInitializer(Initializer value) { set(value); } -BNPARAMS_GAMMA_INIT::BNPARAMS_GAMMA_INIT(Initializer value) { set(value); } +GammaInitializer::GammaInitializer(Initializer value) { set(value); } -BNPARAMS_BETA_INIT::BNPARAMS_BETA_INIT(Initializer value) { set(value); } +BetaInitializer::BetaInitializer(Initializer value) { set(value); } BasicRegularizer::BasicRegularizer(nntrainer::WeightRegularizer value) { set(value); diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h index 4c7b502d20..ff59a88a4d 100644 --- a/nntrainer/layers/common_properties.h +++ b/nntrainer/layers/common_properties.h @@ -1020,57 +1020,57 @@ class BiasInitializer final : public EnumProperty { }; /** - * @brief BNPARAMS_MU_INIT Initialization Enumeration Information + * @brief MuInitializer Initialization Enumeration Information * */ -class BNPARAMS_MU_INIT final : public EnumProperty { +class MuInitializer final : public EnumProperty { public: /** - * @brief Construct a BNPARAMS_MU_INIT object + * @brief Construct a MuInitializer object */ - BNPARAMS_MU_INIT(Initializer value = Initializer::ZEROS); + MuInitializer(Initializer value = Initializer::ZEROS); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "moving_mean_initializer"; }; /** - * @brief BNPARAMS_VAR_INIT Initialization Enumeration Information + * @brief VarInitializer Initialization Enumeration Information * */ -class BNPARAMS_VAR_INIT final : public EnumProperty { +class VarInitializer final : public EnumProperty { public: /** - * @brief Construct a BNPARAMS_VAR_INIT object + * @brief Construct a VarInitializer object */ - BNPARAMS_VAR_INIT(Initializer value = Initializer::ONES); + VarInitializer(Initializer value = Initializer::ONES); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "moving_variance_initializer"; }; /** - * @brief BNPARAMS_GAMMA_INIT Initialization Enumeration Information + * @brief GammaInitializer Initialization Enumeration Information * */ -class BNPARAMS_GAMMA_INIT final : public EnumProperty { +class GammaInitializer final : public EnumProperty { public: /** - * @brief Construct a BNPARAMS_GAMMA_INIT object + * @brief Construct a GammaInitializer object */ - BNPARAMS_GAMMA_INIT(Initializer value = Initializer::ONES); + GammaInitializer(Initializer value = Initializer::ONES); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "gamma_initializer"; }; /** - * @brief BNPARAMS_BETA_INIT Initialization Enumeration Information + * @brief BetaInitializer Initialization Enumeration Information * */ -class BNPARAMS_BETA_INIT final : public EnumProperty { +class BetaInitializer final : public EnumProperty { public: /** - * @brief Construct a BNPARAMS_BETA_INIT object + * @brief Construct a BetaInitializer object */ - BNPARAMS_BETA_INIT(Initializer value = Initializer::ZEROS); + BetaInitializer(Initializer value = Initializer::ZEROS); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "beta_initializer"; }; diff --git a/nntrainer/layers/layer_normalization_layer.cpp b/nntrainer/layers/layer_normalization_layer.cpp index a115e82b62..17b732ea81 100644 --- a/nntrainer/layers/layer_normalization_layer.cpp +++ b/nntrainer/layers/layer_normalization_layer.cpp @@ -38,9 +38,9 @@ enum LNParams { LayerNormalizationLayer::LayerNormalizationLayer() : Layer(), - layer_normalization_props( - std::vector(), props::Epsilon(), props::BNPARAMS_GAMMA_INIT(), - props::BNPARAMS_BETA_INIT(), props::WeightDecay(), props::BiasDecay()) { + layer_normalization_props(std::vector(), props::Epsilon(), + props::GammaInitializer(), props::BetaInitializer(), + props::WeightDecay(), props::BiasDecay()) { wt_idx.fill(std::numeric_limits::max()); } @@ -51,9 +51,9 @@ void LayerNormalizationLayer::finalize(InitLayerContext &context) { } auto gamma_initializer = - std::get(layer_normalization_props).get(); + std::get(layer_normalization_props).get(); auto beta_initializer = - std::get(layer_normalization_props).get(); + std::get(layer_normalization_props).get(); auto weight_decay = std::get(layer_normalization_props); auto bias_decay = std::get(layer_normalization_props); diff --git a/nntrainer/layers/layer_normalization_layer.h b/nntrainer/layers/layer_normalization_layer.h index 7511df0871..ba52b6caa8 100644 --- a/nntrainer/layers/layer_normalization_layer.h +++ b/nntrainer/layers/layer_normalization_layer.h @@ -124,9 +124,8 @@ class LayerNormalizationLayer : public Layer { remain_axes; /**< remained axes (exclusive with normalize axes) */ std::array wt_idx; - std::tuple, props::Epsilon, - props::BNPARAMS_GAMMA_INIT, props::BNPARAMS_BETA_INIT, - props::WeightDecay, props::BiasDecay> + std::tuple, props::Epsilon, props::GammaInitializer, + props::BetaInitializer, props::WeightDecay, props::BiasDecay> layer_normalization_props; }; diff --git a/nntrainer/utils/node_exporter.cpp b/nntrainer/utils/node_exporter.cpp index 031d2c2fbf..40cb945cda 100644 --- a/nntrainer/utils/node_exporter.cpp +++ b/nntrainer/utils/node_exporter.cpp @@ -144,10 +144,10 @@ void Exporter::saveTflResult(const std::tuple &props, template <> void Exporter::saveTflResult( - const std::tuple &props, + const std::tuple &props, const BatchNormalizationLayer *self) { createIfNull(tf_node); diff --git a/nntrainer/utils/node_exporter.h b/nntrainer/utils/node_exporter.h index de29cf77d9..8669017ced 100644 --- a/nntrainer/utils/node_exporter.h +++ b/nntrainer/utils/node_exporter.h @@ -258,10 +258,10 @@ class BatchNormalizationLayer; */ template <> void Exporter::saveTflResult( - const std::tuple &props, + const std::tuple &props, const BatchNormalizationLayer *self); class LayerImpl;