From ba561d3b89b42c69b4b8503425042f79530b7dce Mon Sep 17 00:00:00 2001 From: Eunju Yang Date: Wed, 6 Nov 2024 17:07:26 +0900 Subject: [PATCH] [ GPU/OpenCL ] change rmsnorm_layer_cl to inherit LayerImplCl - This commit updates rmsnorm_layer_cl.cpp/h to inherit LayerImplCl. - This commit implements registerClKernels() of rmsnorm layer. - This commit update cl_context.cpp (applying rmsnorm_layer_cl's update) - This commit update common_properties.h (adding property for rmsnormlayer) Self evaluation: Build test: [X]Passed [ ]Failed [ ]Skipped Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Eunju Yang --- nntrainer/cl_context.cpp | 8 ++- .../layers/cl_layers/rmsnorm_layer_cl.cpp | 67 ++++++++++++++----- nntrainer/layers/cl_layers/rmsnorm_layer_cl.h | 48 +++++-------- nntrainer/layers/common_properties.h | 13 ++++ test/jni/Android.mk | 1 + 5 files changed, 86 insertions(+), 51 deletions(-) diff --git a/nntrainer/cl_context.cpp b/nntrainer/cl_context.cpp index b6c200792b..f09116091a 100644 --- a/nntrainer/cl_context.cpp +++ b/nntrainer/cl_context.cpp @@ -53,9 +53,11 @@ static void add_default_object(ClContext &cc) { ml::train::LayerType::LAYER_RESHAPE); } - // @todo rmsnormlayercl also needs to be updated. - cc.registerFactory(nntrainer::createLayer, - RMSNormLayerCl::type, ml::train::LayerType::LAYER_RMSNORM); + if (RMSNormLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + RMSNormLayerCl::type, + ml::train::LayerType::LAYER_RMSNORM); + } if (ConcatLayerCl::registerClKernels()) { cc.registerFactory(nntrainer::createLayer, diff --git a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp index 179b89fa8a..28168ee784 100644 --- a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp +++ b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.cpp @@ -91,13 +91,12 @@ static constexpr size_t SINGLE_INOUT_IDX = 0; enum RMSParams { gamma }; -RMSNormLayerCl::RMSNormLayerCl() : LayerImpl() { wt_idx.fill(0); } +RMSNormLayerCl::RMSNormLayerCl() : LayerImplCl() { wt_idx.fill(0); } void RMSNormLayerCl::finalize(InitLayerContext &context) { std::vector dim = context.getInputDimensions(); context.setOutputDimensions(dim); - auto &rmsparams_gamma = - std::get(rmsnorm_props); + auto &rmsparams_gamma = std::get(rmsnorm_props); TensorDim gamma_dim( 1, 1, 1, dim[0].width(), @@ -123,9 +122,6 @@ void RMSNormLayerCl::forwarding(RunLayerContext &context, bool training) { } } -opencl::Kernel RMSNormLayerCl::kernel_rmsnorm; -opencl::Kernel RMSNormLayerCl::kernel_rmsnorm_fp16; - void RMSNormLayerCl::rmsnormProcess(Tensor const &input, Tensor &result, Tensor const &gamma, const float epsilon) { bool ret = false; @@ -138,11 +134,8 @@ void RMSNormLayerCl::rmsnormProcess(Tensor const &input, Tensor &result, int w = input.width(); do { - ClContext::SharedPtrClKernel kernel_rmsnorm_ptr = - cl_context_ref.registerClKernel(rmsnorm_cl_kernel_, "rmsnorm_cl"); - if (!kernel_rmsnorm_ptr) { - break; - } + + auto kernel_rmsnorm_ptr = layer_kernel_ptrs[Kernels::RMSNORM_CL]; opencl::Buffer inputbuf(cl_context_ref.context_inst_, dim1 * sizeof(float), true, nullptr); @@ -219,6 +212,7 @@ void RMSNormLayerCl::rmsnormProcess(Tensor const &input, Tensor &result, } while (false); } +#ifdef ENABLE_FP16 void RMSNormLayerCl::rmsnormProcess_fp16(Tensor const &input, Tensor &result, Tensor const &gamma, const float epsilon) { @@ -232,12 +226,8 @@ void RMSNormLayerCl::rmsnormProcess_fp16(Tensor const &input, Tensor &result, int h = input.height(); int w = input.width(); do { - ClContext::SharedPtrClKernel kernel_rmsnorm_ptr = - cl_context_ref.registerClKernel(rmsnorm_cl_kernel_fp16_, - "rmsnorm_cl_fp16"); - if (!kernel_rmsnorm_ptr) { - break; - } + auto kernel_rmsnorm_ptr = layer_kernel_ptrs[Kernels::RMSNORM_CL_FP16]; + opencl::Buffer inputbuf(cl_context_ref.context_inst_, dim1 * sizeof(cl_half), true, nullptr); @@ -308,6 +298,7 @@ void RMSNormLayerCl::rmsnormProcess_fp16(Tensor const &input, Tensor &result, } } while (false); } +#endif void RMSNormLayerCl::incremental_forwarding(nntrainer::RunLayerContext &context, unsigned int from, unsigned int to, @@ -339,7 +330,9 @@ void RMSNormLayerCl::incremental_forwarding(nntrainer::RunLayerContext &context, if (in_step.getDataType() == ml::train::TensorDim::DataType::FP32) { rmsnormProcess(in, out, gamma, epsilon); } else { +#ifdef ENABLE_FP16 rmsnormProcess_fp16(in, out, gamma, epsilon); +#endif } } @@ -362,4 +355,44 @@ void RMSNormLayerCl::setProperty(const std::vector &values) { LayerImpl::setProperty(remain_props); } +bool RMSNormLayerCl::registerClKernels() { + + // check if already registered + if (!layer_kernel_ptrs.empty()) { + ml_loge("kernels for concat layer are already registered."); + return false; + } + + do { + + ClContext::SharedPtrClKernel kernel_rmsnorm_ptr = nullptr; + + kernel_rmsnorm_ptr = + cl_context_ref.registerClKernel(rmsnorm_cl_kernel_, "rmsnorm_cl"); + if (!kernel_rmsnorm_ptr) { + ml_loge("OpenCL Error: Fail to register rmsnorm_cl kernel"); + break; + } + layer_kernel_ptrs.emplace_back(kernel_rmsnorm_ptr); + +#ifdef ENABLE_FP16 + kernel_rmsnorm_ptr = cl_context_ref.registerClKernel( + rmsnorm_cl_kernel_fp16_, "rmsnorm_cl_fp16"); + if (!kernel_rmsnorm_ptr) { + ml_loge("OpenCL Error: Fail to register rmsnorm_cl_fp16 kernel"); + break; + } + layer_kernel_ptrs.emplace_back(kernel_rmsnorm_ptr); +#endif + + return true; + + } while (false); + + // clear all registered kernels if any error occurs during registration + layer_kernel_ptrs.clear(); + + return false; +} + } // namespace nntrainer diff --git a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h index 43f942ea1e..89a0600c4b 100644 --- a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h +++ b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h @@ -16,7 +16,7 @@ #ifdef __cplusplus #include -#include +#include #include #include @@ -25,36 +25,11 @@ namespace nntrainer { -namespace props { - -/** - * @brief RMS_NORM_GAMMA_INIT_GPU Initialization Enumeration Information - * - */ -class RMS_NORM_GAMMA_INIT_GPU final - : public ::nntrainer::EnumProperty<::nntrainer::props::InitializerInfo> { -public: - /** - * @brief Construct a RMS_NORM_GAMMA_INIT object - */ - RMS_NORM_GAMMA_INIT_GPU( - ::nntrainer::Initializer value = ::nntrainer::Initializer::ONES) { - set(value); - }; - using prop_tag = enum_class_prop_tag; - static constexpr const char *key = "gamma_initializer"; -}; -}; // namespace props - /** * @class RMSNormLayer * @brief RMS Norm layer */ - -class RMSNormLayerCl : public LayerImpl { - -private: - inline static ClContext cl_context_ref; +class RMSNormLayerCl : public LayerImplCl { public: /** @@ -118,9 +93,6 @@ class RMSNormLayerCl : public LayerImpl { */ const std::string getType() const override { return RMSNormLayerCl::type; }; - static opencl::Kernel kernel_rmsnorm; - static opencl::Kernel kernel_rmsnorm_fp16; - /** * @brief Process data and dimensions for rms norm operation * @param[in] input Tensor @@ -153,12 +125,26 @@ class RMSNormLayerCl : public LayerImpl { */ void setProperty(const std::vector &values) override; + /** + * @brief registerClKernels + */ + static bool registerClKernels(); + inline static const std::string type = "rmsnorm"; private: std::array wt_idx; - std::tuple + + std::tuple rmsnorm_props; /**< rmsnorm layer properties */ + + inline static std::vector + layer_kernel_ptrs; /**< kernel list relevant with this layer */ + + enum Kernels { + RMSNORM_CL, + RMSNORM_CL_FP16, + }; }; } // namespace nntrainer diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h index cc22e7cc56..8120760ff9 100644 --- a/nntrainer/layers/common_properties.h +++ b/nntrainer/layers/common_properties.h @@ -1075,6 +1075,19 @@ class BNPARAMS_BETA_INIT final : public EnumProperty { static constexpr const char *key = "beta_initializer"; }; +/** + * @brief RMS_NORM_GAMMA_INIT Initialization Enumeration Information + */ +class RMS_NORM_GAMMA_INIT final : public EnumProperty { +public: + /** + * @brief Construct a RMS_NORM_GAMMA_INIT object + */ + RMS_NORM_GAMMA_INIT(Initializer value = Initializer::ONES) { set(value); }; + using prop_tag = enum_class_prop_tag; + static constexpr const char *key = "gamma_initializer"; +}; + /** * @brief Enumeration of tensor regularization type */ diff --git a/test/jni/Android.mk b/test/jni/Android.mk index 0e62c860aa..6b4eded4f1 100644 --- a/test/jni/Android.mk +++ b/test/jni/Android.mk @@ -444,6 +444,7 @@ LOCAL_SRC_FILES := \ ../unittest/layers/unittest_layers_impl.cpp \ ../unittest/layers/unittest_layers_transpose_cl.cpp \ ../unittest/layers/unittest_layers_concat_cl.cpp \ + ../unittest/layers/unittest_layers_swiglu_cl.cpp \ ../unittest/layers/unittest_layers_fully_connected_cl.cpp \ ../unittest/layers/unittest_layers_input.cpp \ ../unittest/layers/unittest_layers_loss.cpp \