From 64f3d0d7f85fd97c08c4a7a9f5b72e8b57d0d827 Mon Sep 17 00:00:00 2001 From: Eunju Yang Date: Fri, 29 Nov 2024 15:17:01 +0900 Subject: [PATCH] [ GPU/OpenCL ] change transpose_cl to inherit LayerImplCl - This commit updates transpose_cl.cpp/h to inherit LayerImplCl. - This commit implements registerClKernels() of transpose_cl layer. - This commit update cl_context.cpp (applying transpose_cl's update) - This is the last commit to complete #2723. - This can close #2723. Self evaluation: Build test: [X]Passed [ ]Failed [ ]Skipped Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Eunju Yang --- nntrainer/cl_context.cpp | 9 +++++---- nntrainer/layers/cl_layers/transpose_cl.h | 22 +++++++++++++--------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/nntrainer/cl_context.cpp b/nntrainer/cl_context.cpp index 25c2d66c2b..6696c06d9f 100644 --- a/nntrainer/cl_context.cpp +++ b/nntrainer/cl_context.cpp @@ -64,10 +64,11 @@ static void add_default_object(ClContext &cc) { ConcatLayerCl::type, ml::train::LayerType::LAYER_CONCAT); } - // @todo transposlayercl also needs to be updated. - cc.registerFactory(nntrainer::createLayer, - TransposeLayerCl::type, - ml::train::LayerType::LAYER_TRANSPOSE); + if (TransposeLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + TransposeLayerCl::type, + ml::train::LayerType::LAYER_TRANSPOSE); + } } static void registerer(ClContext &cc) noexcept { diff --git a/nntrainer/layers/cl_layers/transpose_cl.h b/nntrainer/layers/cl_layers/transpose_cl.h index af6d3bd301..42bacc3b55 100644 --- a/nntrainer/layers/cl_layers/transpose_cl.h +++ b/nntrainer/layers/cl_layers/transpose_cl.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -25,13 +26,13 @@ namespace nntrainer { * @brief A tranpose layer. * */ -class TransposeLayerCl final : public Layer { +class TransposeLayerCl final : public LayerImplCl { public: /** * @brief Construct a new transpose layer object * */ - TransposeLayerCl() : Layer(), transpose_props(props::Print()) {} + TransposeLayerCl() : LayerImplCl(), transpose_props(props::Print()) {} /** * @brief Destroy the transpose layer object @@ -82,15 +83,18 @@ class TransposeLayerCl final : public Layer { */ void setProperty(const std::vector &values) override; - inline static const std::string type = "transpose"; + /** + * @brief registerClKernels for transpose_cl + * @details registerClKernels for transpose_cl always returns true + * without any specific action for kernel registeration. It only uses + * cl_blas_kernels and there is no specific kernels for this. If there are + * specific kernels for this, it should be updated to register the kernels . + */ + static bool registerClKernels() { return true; }; - static opencl::Kernel kernel_transpose_axis0; - static opencl::Kernel kernel_transpose_fp16_axis0; - static opencl::Kernel kernel_transpose_axis1; - static opencl::Kernel kernel_transpose_fp16_axis1; - static opencl::Kernel kernel_transpose_axis2; - static opencl::Kernel kernel_transpose_fp16_axis2; + inline static const std::string type = "transpose"; +private: std::tuple transpose_props; /**< transpose layer properties : unit - number of output neurons */ };