Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OpenCL/GPU] Kernel binary caching @open sesame 04/04 09:22 #2503

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ if get_option('enable-opencl')
message ('OpenCL build is enabled. Will work only if OpenCL supported GPU is available.')
extra_defines += '-DENABLE_OPENCL=1'
endif

if get_option('opencl-kernel-path') != ''
message ('OpenCL kernel path set to: @0@'.format(get_option('opencl-kernel-path')))
extra_defines += '-DOPENCL_KERNEL_PATH=@0@'.format(get_option('opencl-kernel-path'))
endif

foreach extra_arg : warning_flags
if cc.has_argument (extra_arg)
Expand Down
1 change: 1 addition & 0 deletions meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ option('enable-tflite-interpreter', type: 'boolean', value: true)
option('enable-memory-swap', type: 'boolean', value: false)
option('memory-swap-path', type: 'string', value: '')
option('test-timeout', type: 'integer', value: 60)
option('opencl-kernel-path', type: 'string', value: 'nntrainer_opencl_kernels')

# dependency conflict resolution
option('capi-ml-inference-actual', type: 'string', value: 'capi-ml-inference',
Expand Down
4 changes: 4 additions & 0 deletions nntrainer/opencl/opencl_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ void LoadOpenCLFunctions(void *libopencl) {
LoadFunction(clEnqueueWriteBuffer);
LoadFunction(clEnqueueReadBuffer);
LoadFunction(clCreateProgramWithSource);
LoadFunction(clCreateProgramWithBinary);
LoadFunction(clBuildProgram);
LoadFunction(clGetProgramInfo);
LoadFunction(clGetProgramBuildInfo);
LoadFunction(clRetainProgram);
LoadFunction(clCreateKernel);
Expand All @@ -95,7 +97,9 @@ PFN_clCreateBuffer clCreateBuffer;
PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
PFN_clCreateProgramWithSource clCreateProgramWithSource;
PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
PFN_clBuildProgram clBuildProgram;
PFN_clGetProgramInfo clGetProgramInfo;
PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
PFN_clRetainProgram clRetainProgram;
PFN_clCreateKernel clCreateKernel;
Expand Down
13 changes: 13 additions & 0 deletions nntrainer/opencl/opencl_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,24 @@ typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithSource)(
const char ** /**< strings */, const size_t * /**< lengths */,
cl_int * /**< errcode_ret */);

typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBinary)(
cl_context /**< context */, cl_uint /**< num_devices */,
const cl_device_id * /**< device_list */, const size_t * /**< lengths */,
const unsigned char ** /**< binaries */, cl_int * /**< binary_status */,
cl_int * /**< errcode_ret */);

typedef cl_int(CL_API_CALL *PFN_clBuildProgram)(
cl_program /**< program */, cl_uint /**< num_devices */,
const cl_device_id * /**< device_list */, const char * /**< options */,
void(CL_CALLBACK * /**< pfn_notify */)(cl_program /**< program */,
void * /**< user_data */),
void * /**< user_data */);

typedef cl_int(CL_API_CALL *PFN_clGetProgramInfo)(
cl_program /**< program */, cl_program_info /**< param_name */,
size_t /**< param_value_size */, void * /**< param_value */,
size_t * /**< param_value_size_ret */);

typedef cl_int(CL_API_CALL *PFN_clGetProgramBuildInfo)(
cl_program /**< program */, cl_device_id /**< device */,
cl_program_build_info /**< param_name */, size_t /**< param_value_size */,
Expand Down Expand Up @@ -128,7 +139,9 @@ extern PFN_clCreateBuffer clCreateBuffer;
extern PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
extern PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
extern PFN_clCreateProgramWithSource clCreateProgramWithSource;
extern PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
extern PFN_clBuildProgram clBuildProgram;
extern PFN_clGetProgramInfo clGetProgramInfo;
extern PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
extern PFN_clRetainProgram clRetainProgram;
extern PFN_clCreateKernel clCreateKernel;
Expand Down
155 changes: 153 additions & 2 deletions nntrainer/opencl/opencl_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,33 @@

#include "opencl_program.h"

#include <cstring>
#include <fstream>
#include <string>

#include "opencl_loader.h"

#include <nntrainer_log.h>

#define stringify(s) stringify2(s)
#define stringify2(s) #s

namespace nntrainer::opencl {

// defining DEFAULT_KERNEL_PATH
const std::string Program::DEFAULT_KERNEL_PATH = stringify(OPENCL_KERNEL_PATH);

/**
* @brief Build OpenCL program
*
* @param device_id OpenCL device id
* @param compiler_options string compiler options
* @param binaryCreated true if binary is already present false otherwise
* @return true if successful or false otherwise
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to add a comment for @param binaryCreated

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in latest commit.

*/
bool Program::BuildProgram(cl_device_id device_id,
const std::string &compiler_options) {

const std::string &compiler_options,
bool binaryCreated) {
// clBuildProgram returns NULL with error code if fails
const int error_code = clBuildProgram(
program_, 0, nullptr, compiler_options.c_str(), nullptr, nullptr);
Expand All @@ -41,6 +50,113 @@ bool Program::BuildProgram(cl_device_id device_id,
return false;
}

// saving kernel binary
if (!binaryCreated)
return GetProgramInfo(device_id);

return true;
}

/**
* @brief Utility to get program info and save kernel binaries
*
* @param device_id OpenCL device id
* @return true if successful or false otherwise
*/
bool Program::GetProgramInfo(cl_device_id device_id) {
// since only one GPU is being used
unsigned int num_devices = 1;

cl_int error_code = CL_SUCCESS;

// Read the binary size
size_t binaries_size[num_devices];
error_code =
clGetProgramInfo(program_, CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * num_devices, binaries_size, nullptr);

if (error_code != CL_SUCCESS) {
ml_loge("Failed to get program binary size. OpenCL error code: %d. %s",
error_code,
(GetProgramBuildInfo(device_id, CL_PROGRAM_BUILD_LOG)).c_str());
return false;
}

// Read the kernel name size
size_t kernel_names_size;
error_code = clGetProgramInfo(program_, CL_PROGRAM_KERNEL_NAMES, 0, nullptr,
&kernel_names_size);

if (error_code != CL_SUCCESS) {
ml_loge("Failed to get program kernel name size. OpenCL error code: %d. %s",
error_code,
(GetProgramBuildInfo(device_id, CL_PROGRAM_BUILD_LOG)).c_str());
return false;
}

// getting the kernel names
char kernel_names[kernel_names_size];
error_code = clGetProgramInfo(program_, CL_PROGRAM_KERNEL_NAMES,
kernel_names_size, kernel_names, nullptr);

if (error_code != CL_SUCCESS) {
ml_loge("Failed to get program kernel names. OpenCL error code: %d. %s",
error_code,
(GetProgramBuildInfo(device_id, CL_PROGRAM_BUILD_LOG)).c_str());
return false;
} else {
ml_logi("Saving kernel binary for: %s", std::string(kernel_names).c_str());
}

// Read the binary
size_t binaries_ptr_alloc_size = sizeof(unsigned char *) * num_devices;
unsigned char *binaries_ptr[num_devices];

for (unsigned int i = 0; i < num_devices; ++i) {
binaries_ptr[i] = new unsigned char[binaries_size[i]];
}

error_code = clGetProgramInfo(program_, CL_PROGRAM_BINARIES,
binaries_ptr_alloc_size, binaries_ptr, nullptr);

if (error_code != CL_SUCCESS) {
ml_loge("Failed to get program binary data. OpenCL error code: %d. %s",
error_code,
(GetProgramBuildInfo(device_id, CL_PROGRAM_BUILD_LOG)).c_str());

// cleanup
for (unsigned int i = 0; i < num_devices; ++i) {
delete[] binaries_ptr[i];
}
return false;
s-debadri marked this conversation as resolved.
Show resolved Hide resolved
}

// Write the binary to file
// All kernels in the program will be saved in the binary file
for (unsigned int i = 0; i < num_devices; ++i) {
std::ofstream fs(Program::DEFAULT_KERNEL_PATH + "/" +
std::string(kernel_names) + "_kernel.bin",
std::ios::out | std::ios::binary | std::ios::app);
if (!fs) {
ml_loge(
"opencl_program: could not find directory to save kernel binary - %s",
Program::DEFAULT_KERNEL_PATH.c_str());

// cleanup
for (unsigned int i = 0; i < num_devices; ++i) {
delete[] binaries_ptr[i];
}
return false;
}
fs.write((char *)binaries_ptr[i], binaries_size[i]);
fs.close();
}

// cleanup
for (unsigned int i = 0; i < num_devices; ++i) {
delete[] binaries_ptr[i];
}

return true;
}

Expand Down Expand Up @@ -103,6 +219,41 @@ bool Program::CreateCLProgram(const cl_context &context,
return BuildProgram(device_id, compiler_options);
}

/**
* @brief Create OpenCL program from pre compiled binary
*
* @param context OpenCL context
* @param device_id OpenCL device id
* @param size binary file size
* @param binary data saved as binary
* @param binary_name name of binary file for logging
* @param compiler_options string compiler options
* @return true if successful or false otherwise
*/
bool Program::CreateCLProgramWithBinary(const cl_context &context,
const cl_device_id &device_id,
size_t size, unsigned char *binary,
std::string binary_name,
const std::string &compiler_options) {

int error_code;
const cl_device_id device_list[] = {device_id};
const size_t lengths[] = {size};
const unsigned char *binaries[] = {binary};

program_ = clCreateProgramWithBinary(context, 1, device_list, lengths,
binaries, NULL, &error_code);
if (!program_ || error_code != CL_SUCCESS) {
ml_loge("Failed to create compute program. OpenCL error code: %d",
error_code);
return false;
}

ml_logi("Loaded program from binary for: %s", binary_name.c_str());

return BuildProgram(device_id, compiler_options, true);
}

/**
* @brief Get the Program object
*
Expand Down
31 changes: 29 additions & 2 deletions nntrainer/opencl/opencl_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,19 @@ class Program {
*
* @param device_id OpenCL device id
* @param compiler_options string compiler options
* @param binaryCreated true if binary is already present false otherwise
* @return true if successful or false otherwise
*/
bool BuildProgram(cl_device_id device_id,
const std::string &compiler_options);
bool BuildProgram(cl_device_id device_id, const std::string &compiler_options,
bool binaryCreated = false);

/**
* @brief Utility to get program info and save kernel binaries
*
* @param device_id OpenCL device id
* @return true if successful or false otherwise
*/
bool GetProgramInfo(cl_device_id device_id);

/**
* @brief Get the information on the program build
Expand All @@ -49,6 +58,8 @@ class Program {
cl_program_build_info info);

public:
static const std::string DEFAULT_KERNEL_PATH;

/**
* @brief Create OpenCL program from source
*
Expand All @@ -62,6 +73,22 @@ class Program {
const std::string &code,
const std::string &compiler_options);

/**
* @brief Create OpenCL program from pre compiled binary
*
* @param context OpenCL context
* @param device_id OpenCL device id
* @param size binary file size
* @param binary data saved as binary
* @param binary_name name of binary file for logging
* @param compiler_options string compiler options
* @return true if successful or false otherwise
*/
bool CreateCLProgramWithBinary(const cl_context &context,
const cl_device_id &device_id, size_t size,
unsigned char *binary, std::string binary_name,
const std::string &compiler_options);

/**
* @brief Get the Program object
*
Expand Down