diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
index 9595d01ea8..8fe26e9ca7 100644
--- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
+++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
@@ -436,12 +436,13 @@ Index: FFmpeg/libavfilter/opencl/tonemap.cl
 ===================================================================
 --- FFmpeg.orig/libavfilter/opencl/tonemap.cl
 +++ FFmpeg/libavfilter/opencl/tonemap.cl
-@@ -16,54 +16,67 @@
+@@ -16,54 +16,68 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
 -#define REFERENCE_WHITE 100.0f
 +#define FLOAT_EPS 1e-6f
++#define LUT_SIZE 65
 +
  extern float3 lrgb2yuv(float3);
  extern float  lrgb2y(float3);
@@ -525,7 +526,7 @@ Index: FFmpeg/libavfilter/opencl/tonemap.cl
      float j = tone_param;
      float a, b;
  
-@@ -71,202 +84,423 @@ float mobius(float s, float peak) {
+@@ -71,202 +85,606 @@ float mobius(float s, float peak) {
          return s;
  
      a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
@@ -780,6 +781,11 @@ Index: FFmpeg/libavfilter/opencl/tonemap.cl
 +#endif
 +    return c;
 +}
++
++#ifdef DOVI_RESHAPE
++float reshape_poly(float s, float4 coeffs) {
++    return (coeffs.z * s + coeffs.y) * s + coeffs.x;
++}
  
 -    // Rescale the variables in order to bring it into a representation where
 -    // 1.0 represents the dst_peak. This is because all of the tone mapping
@@ -787,11 +793,6 @@ Index: FFmpeg/libavfilter/opencl/tonemap.cl
 -    if (target_peak > 1.0f) {
 -        sig *= 1.0f / target_peak;
 -        peak *= 1.0f / target_peak;
-+#ifdef DOVI_RESHAPE
-+float reshape_poly(float s, float4 coeffs) {
-+    return (coeffs.z * s + coeffs.y) * s + coeffs.x;
-+}
-+
 +float reshape_mmr(float3 sig,
 +                  float4 coeffs,
 +                  __global float4 *dovi_mmr,
@@ -1113,6 +1114,189 @@ Index: FFmpeg/libavfilter/opencl/tonemap.cl
 +#else
 +    write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, chroma.z, 0.0f, 1.0f));
 +#endif
++}
++
++float3 apply_lut3d(__global float3 *lut, float3 color)
++{
++    // Scale the color to the LUT grid.
++    float3 pos = color * (float)(LUT_SIZE - 1);
++
++    // Get the integer base indices in the LUT.
++    int3 base = convert_int3(floor(pos));
++    // Compute the fractional part within the cell.
++    float3 f = pos - convert_float3(base);
++
++    // Compute the base linear index.
++    int baseIndex = base.x + base.y * LUT_SIZE + base.z * LUT_SIZE * LUT_SIZE;
++
++    // Fetch the eight corner values of the current cube cell.
++    float3 c000 = lut[baseIndex];
++    float3 c100 = lut[baseIndex + 1];
++    float3 c010 = lut[baseIndex + LUT_SIZE];
++    float3 c110 = lut[baseIndex + 1 + LUT_SIZE];
++    float3 c001 = lut[baseIndex + LUT_SIZE * LUT_SIZE];
++    float3 c101 = lut[baseIndex + 1 + LUT_SIZE * LUT_SIZE];
++    float3 c011 = lut[baseIndex + LUT_SIZE + LUT_SIZE * LUT_SIZE];
++    float3 c111 = lut[baseIndex + 1 + LUT_SIZE + LUT_SIZE * LUT_SIZE];
++
++    // Determine the tetrahedron within the cube cell.
++    // The tetrahedron selection is based on the ordering of the fractional parts.
++    // There are 6 possibilities, we calculate them all to reduce branching on GPU.
++    float3 cxyz = c000 + f.x * (c100 - c000) + f.y * (c110 - c100) + f.z * (c111 - c110);
++    float3 cxzy = c000 + f.x * (c100 - c000) + f.z * (c101 - c100) + f.y * (c111 - c101);
++    float3 czxy = c000 + f.z * (c001 - c000) + f.x * (c101 - c001) + f.y * (c111 - c101);
++    float3 cyxz = c000 + f.y * (c010 - c000) + f.x * (c110 - c010) + f.z * (c111 - c110);
++    float3 cyzx = c000 + f.y * (c010 - c000) + f.z * (c011 - c010) + f.x * (c111 - c011);
++    float3 czyx = c000 + f.z * (c001 - c000) + f.y * (c011 - c001) + f.x * (c111 - c011);
++
++    // Now select based on f
++    float3 result = select(
++                           select(
++                                  cxyz,
++                                  select(cxzy,
++                                         czxy,
++                                         (int3)(f.x >= f.z)),
++                                  (int3)(f.y >= f.z)),
++                           select(cyxz,
++                                  select(cyzx,
++                                         czyx,
++                                         (int3)(f.y >= f.z)),
++                                  (int3)(f.x >= f.z)),
++                           (int3)(f.x >= f.y));
++
++    return clamp(result, 0.0f, 1.0f);
++}
++
++__kernel void tonemap_lut(__global float3 *lut,
++                          __write_only image2d_t dst1,
++                          __read_only  image2d_t src1,
++                          __write_only image2d_t dst2,
++                          __read_only  image2d_t src2,
++#ifdef NON_SEMI_PLANAR_OUT
++                          __write_only image2d_t dst3,
++#endif
++#ifdef NON_SEMI_PLANAR_IN
++                          __read_only  image2d_t src3,
++#endif
++#ifdef ENABLE_DITHER
++                          __read_only  image2d_t dither,
++#endif
++#ifdef DOVI_RESHAPE
++                          __global float *dovi_buf,
++#endif
++                          float peak)
++{
++    int xi = get_global_id(0);
++    int yi = get_global_id(1);
++    // each work item process four pixels
++    int x = 2 * xi;
++    int y = 2 * yi;
++
++    int2 src1_sz = get_image_dim(src1);
++    int2 dst2_sz = get_image_dim(dst2);
++
++    if (xi >= dst2_sz.x || yi >= dst2_sz.y)
++        return;
++
++    float2 src1_sz_recip = native_recip(convert_float2(src1_sz));
++    float2 ncoords_yuv0 = convert_float2((int2)(x,     y)) * src1_sz_recip;
++    float2 ncoords_yuv1 = convert_float2((int2)(x + 1, y)) * src1_sz_recip;
++    float2 ncoords_yuv2 = convert_float2((int2)(x,     y + 1)) * src1_sz_recip;
++    float2 ncoords_yuv3 = convert_float2((int2)(x + 1, y + 1)) * src1_sz_recip;
++
++    float3 yuv0, yuv1, yuv2, yuv3;
++
++    yuv0.x = read_imagef(src1, sampler, (int2)(x,     y)).x;
++    yuv1.x = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
++    yuv2.x = read_imagef(src1, sampler, (int2)(x,     y + 1)).x;
++    yuv3.x = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
++
++#ifdef NON_SEMI_PLANAR_IN
++    yuv0.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv0).x,
++                       read_imagef(src3, l_sampler, ncoords_yuv0).x);
++    yuv1.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv1).x,
++                       read_imagef(src3, l_sampler, ncoords_yuv1).x);
++    yuv2.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv2).x,
++                       read_imagef(src3, l_sampler, ncoords_yuv2).x);
++    yuv3.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv3).x,
++                       read_imagef(src3, l_sampler, ncoords_yuv3).x);
++#else
++    yuv0.yz = read_imagef(src2, l_sampler, ncoords_yuv0).xy;
++    yuv1.yz = read_imagef(src2, l_sampler, ncoords_yuv1).xy;
++    yuv2.yz = read_imagef(src2, l_sampler, ncoords_yuv2).xy;
++    yuv3.yz = read_imagef(src2, l_sampler, ncoords_yuv3).xy;
++#endif
++
++#ifdef DOVI_RESHAPE
++    __global float *dovi_params = dovi_buf;
++    __global float *dovi_pivots = dovi_buf + 24;
++    __global float4 *dovi_coeffs = (__global float4 *)(dovi_buf + 48);
++    __global float4 *dovi_mmr = (__global float4 *)(dovi_buf + 144);
++    yuv0 = reshape_dovi_yuv(yuv0, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr);
++    yuv1 = reshape_dovi_yuv(yuv1, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr);
++    yuv2 = reshape_dovi_yuv(yuv2, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr);
++    yuv3 = reshape_dovi_yuv(yuv3, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr);
++#endif
++
++    float3 c0, c1, c2, c3;
++
++    c0 = apply_lut3d(lut, yuv0);
++    c1 = apply_lut3d(lut, yuv1);
++    c2 = apply_lut3d(lut, yuv2);
++    c3 = apply_lut3d(lut, yuv3);
++
++    float3 chroma = get_chroma_sample(c0, c1, c2, c3);
++
++    write_imagef(dst1, (int2)(x,     y), (float4)(c0.x, 0.0f, 0.0f, 1.0f));
++    write_imagef(dst1, (int2)(x + 1, y), (float4)(c1.x, 0.0f, 0.0f, 1.0f));
++    write_imagef(dst1, (int2)(x,     y + 1), (float4)(c2.x, 0.0f, 0.0f, 1.0f));
++    write_imagef(dst1, (int2)(x + 1, y + 1), (float4)(c3.x, 0.0f, 0.0f, 1.0f));
++#ifdef NON_SEMI_PLANAR_OUT
++    write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, 0.0f, 0.0f, 1.0f));
++    write_imagef(dst3, (int2)(xi, yi), (float4)(chroma.z, 0.0f, 0.0f, 1.0f));
++#else
++    write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, chroma.z, 0.0f, 1.0f));
++#endif
++}
++
++__kernel void build_lut(__global float3* lut, float peak)
++{
++    const int total_entries = LUT_SIZE * LUT_SIZE * LUT_SIZE;
++    int idx = get_global_id(0);
++    if (idx >= total_entries) return;
++    int z = idx / (LUT_SIZE * LUT_SIZE);
++    int rem = idx - (z * LUT_SIZE * LUT_SIZE);
++    int y = rem / LUT_SIZE;
++    int x = rem % LUT_SIZE;
++    float fx = (float)x / (LUT_SIZE - 1);
++    float fy = (float)y / (LUT_SIZE - 1);
++    float fz = (float)z / (LUT_SIZE - 1);
++    float3 c = (float3)(fx, fy, fz);
++#ifndef MAP_IN_DST_SPACE
++    c = map_to_src_space_from_yuv(c);
++#else
++    c = map_to_dst_space_from_yuv(c);
++#endif
++    float4 r4 = (float4)(c.x, c.x, c.x, c.x);
++    float4 g4 = (float4)(c.y, c.y, c.y, c.y);
++    float4 b4 = (float4)(c.z, c.z, c.z, c.z);
++#ifndef SKIP_TONEMAP
++  #ifdef TONE_MODE_ITP
++    map_four_pixels_itp(&r4, &g4, &b4, peak);
++  #else
++    map_four_pixels_rgb(&r4, &g4, &b4, peak);
++  #endif
++#endif
++    c = (float3)(r4.x, g4.x, b4.x);
++#ifndef MAP_IN_DST_SPACE
++    c = lrgb2lrgb(c);
++  #ifndef RGB2RGB_PASSTHROUGH
++    c = gamut_compress(c);
++  #endif
++    c = clamp(c, 0.0f, 1.0f);
++#endif
++    c = lrgb2yuv(c);
++    lut[idx] = clamp(c, 0.0f, 1.0f);
  }
 Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 ===================================================================
@@ -1190,8 +1374,6 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +    const AVPixFmtDescriptor *in_desc, *out_desc;
 +    int in_planes, out_planes;
 +
-+    float *lin_lut;
-+
 +#define params_cnt 8
 +#define pivots_cnt (7+1)
 +#define coeffs_cnt 8*4
@@ -1217,7 +1399,9 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      int                   initialised;
 +    int                   init_with_dovi;
      cl_kernel             kernel;
++    cl_kernel             lut_generation_kernel;
 +    cl_mem                dither_image;
++    cl_mem                lut_buffer;
      cl_command_queue      command_queue;
 -    cl_mem                util_mem;
  } TonemapOpenCLContext;
@@ -1239,7 +1423,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      [TONEMAP_NONE]     = "direct",
      [TONEMAP_LINEAR]   = "linear",
      [TONEMAP_GAMMA]    = "gamma",
-@@ -88,8 +129,54 @@ static const char *const tonemap_func[TO
+@@ -88,8 +129,18 @@ static const char *const tonemap_func[TO
      [TONEMAP_REINHARD] = "reinhard",
      [TONEMAP_HABLE]    = "hable",
      [TONEMAP_MOBIUS]   = "mobius",
@@ -1253,48 +1437,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +    { 0.01736321, -0.04725154,  1.03004253},
 +};
 +
-+static float linearize(float x, float ref_white, enum AVColorTransferCharacteristic trc_in)
-+{
-+    if (trc_in == AVCOL_TRC_SMPTE2084)
-+        return eotf_st2084(x, ref_white);
-+    if (trc_in == AVCOL_TRC_ARIB_STD_B67)
-+        return eotf_arib_b67(x);
-+    return x;
-+}
-+
-+#define LUT_SIZE (1 << 10)
-+static int compute_trc_luts(AVFilterContext *avctx)
-+{
-+    TonemapOpenCLContext *ctx = avctx->priv;
-+    int i;
-+
-+    if (!ctx->lin_lut && !(ctx->lin_lut = av_calloc(LUT_SIZE, sizeof(float))))
-+        return AVERROR(ENOMEM);
-+    for (i = 0; i < LUT_SIZE; i++) {
-+        float x = (float)i / (LUT_SIZE - 1);
-+        ctx->lin_lut[i] = FFMAX(linearize(x, REFERENCE_WHITE_ALT, ctx->trc_in), 0.0f);
-+    }
-+
-+    return 0;
-+}
-+
-+static void print_opencl_const_trc_luts(AVFilterContext *avctx, AVBPrint *buf)
-+{
-+    TonemapOpenCLContext *ctx = avctx->priv;
-+    int i;
-+
-+    if (ctx->lin_lut) {
-+        av_bprintf(buf, "__constant float lin_lut[%d] = {\n", LUT_SIZE);
-+        for (i = 0; i < LUT_SIZE; i++)
-+            av_bprintf(buf, " %.13ff,", ctx->lin_lut[i]);
-+        av_bprintf(buf, "};\n");
-+    }
-+}
++#define LUT_SIZE (65 * 65 * 65)
 +
  static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
                                double rgb2rgb[3][3]) {
      double rgb2xyz[3][3], xyz2rgb[3][3];
-@@ -108,23 +195,150 @@ static int get_rgb2rgb_matrix(enum AVCol
+@@ -108,23 +159,150 @@ static int get_rgb2rgb_matrix(enum AVCol
      return 0;
  }
  
@@ -1454,7 +1602,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
  
      switch(ctx->tonemap) {
      case TONEMAP_GAMMA:
-@@ -139,59 +353,207 @@ static int tonemap_opencl_init(AVFilterC
+@@ -139,59 +317,203 @@ static int tonemap_opencl_init(AVFilterC
          if (isnan(ctx->param))
              ctx->param = 0.3f;
          break;
@@ -1536,11 +1684,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +    }
 +
 +    if (ctx->tonemap_mode == TONEMAP_MODE_AUTO) {
-+        if (ctx->tradeoff) {
-+            ctx->tonemap_mode = TONEMAP_MODE_LUM;
-+        } else {
-+            ctx->tonemap_mode = TONEMAP_MODE_ITP;
-+        }
++        ctx->tonemap_mode = TONEMAP_MODE_ITP;
 +    }
 +
 +    av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n",
@@ -1677,7 +1821,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc);
  
      if (rgb2rgb_passthrough)
-@@ -199,19 +561,41 @@ static int tonemap_opencl_init(AVFilterC
+@@ -199,19 +521,41 @@ static int tonemap_opencl_init(AVFilterC
      else
          ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb);
  
@@ -1726,7 +1870,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
                 ctx->colorspace_out, av_color_space_name(ctx->colorspace_out));
          goto fail;
      }
-@@ -219,24 +603,23 @@ static int tonemap_opencl_init(AVFilterC
+@@ -219,24 +563,13 @@ static int tonemap_opencl_init(AVFilterC
      ff_fill_rgb2yuv_table(luma_dst, rgb2yuv);
      ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv);
  
@@ -1749,27 +1893,19 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 -
 -    if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67)
 -        av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n");
-+    if (ctx->tradeoff) {
-+        av_bprintf(&header, "#define LUT_TRC %d\n", LUT_SIZE - 1);
-+        if (ctx->trc_out != AVCOL_TRC_SMPTE2084) {
-+            av_bprintf(&header, "#define linearize %s\n", "linearize_lut");
-+            av_bprintf(&header, "#define delinearize %s\n", delinearize_funcs[ctx->trc_out]);
-+        }
-+        if (!ctx->lin_lut)
-+            if ((err = compute_trc_luts(avctx)) < 0)
-+                goto fail;
-+        print_opencl_const_trc_luts(avctx, &header);
-+    } else if (ctx->trc_out != AVCOL_TRC_SMPTE2084) {
++    if (ctx->trc_out != AVCOL_TRC_SMPTE2084) {
 +        av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]);
 +        av_bprintf(&header, "#define delinearize %s\n", delinearize_funcs[ctx->trc_out]);
 +    }
  
      av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
      opencl_sources[0] = header.str;
-@@ -254,46 +637,171 @@ static int tonemap_opencl_init(AVFilterC
+@@ -254,46 +587,206 @@ static int tonemap_opencl_init(AVFilterC
      CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
                       "command queue %d.\n", cle);
  
+-    ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
+-    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
 +    if (ctx->in_desc->comp[0].depth > ctx->out_desc->comp[0].depth) {
 +        cl_image_format image_format = {
 +            .image_channel_data_type = CL_UNORM_INT16,
@@ -1805,8 +1941,30 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to wait for event completion: %d.\n", cle);
 +    }
 +
-     ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
-     CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
++    if (ctx->tradeoff) {
++        size_t lut_size = LUT_SIZE;
++        size_t lut_buffer_size = lut_size * sizeof(cl_float3);
++        float peak = (float)ctx->peak;
++
++        ctx->lut_generation_kernel = clCreateKernel(ctx->ocf.program, "build_lut", &cle);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
++
++        CL_CREATE_BUFFER_FLAGS(ctx, lut_buffer, CL_MEM_READ_WRITE, lut_buffer_size, NULL);
++        CL_SET_KERNEL_ARG(ctx->lut_generation_kernel, 0, cl_mem, &ctx->lut_buffer);
++        CL_SET_KERNEL_ARG(ctx->lut_generation_kernel, 1, cl_float, &peak);
++
++        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->lut_generation_kernel, 1, NULL,
++                                     &lut_size, NULL,
++                                     0, NULL, NULL);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue build_lut kernel: %d.\n", cle);
++
++
++        ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap_lut", &cle);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
++    } else {
++        ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
++    }
  
 -    ctx->util_mem =
 -        clCreateBuffer(ctx->ocf.hwctx->context, 0,
@@ -1831,12 +1989,14 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
          clReleaseCommandQueue(ctx->command_queue);
      if (ctx->kernel)
          clReleaseKernel(ctx->kernel);
++    if (ctx->lut_generation_kernel)
++        clReleaseKernel(ctx->lut_generation_kernel);
 +    if (event)
 +        clReleaseEvent(event);
 +    if (ctx->dither_image)
 +        clReleaseMemObject(ctx->dither_image);
-+    if (ctx->lin_lut)
-+        av_freep(&ctx->lin_lut);
++    if (ctx->lut_buffer)
++        clReleaseMemObject(ctx->lut_buffer);
      return err;
  }
  
@@ -1863,9 +2023,6 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +    TonemapOpenCLContext *ctx = avctx->priv;
 +    cl_int cle;
 +
-+    if (ctx->lin_lut)
-+        av_freep(&ctx->lin_lut);
-+
 +    if (ctx->kernel) {
 +        cle = clReleaseKernel(ctx->kernel);
 +        if (cle != CL_SUCCESS)
@@ -1873,6 +2030,13 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +                   "kernel: %d.\n", cle);
 +    }
 +
++    if (ctx->lut_generation_kernel) {
++        cle = clReleaseKernel(ctx->lut_generation_kernel);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++                                        "lut_generation_kernel: %d.\n", cle);
++    }
++
 +    if (ctx->dither_image) {
 +        cle = clReleaseMemObject(ctx->dither_image);
 +        if (cle != CL_SUCCESS)
@@ -1880,6 +2044,13 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +            "dither image: %d.\n", cle);
 +    }
 +
++    if (ctx->lut_buffer) {
++        cle = clReleaseMemObject(ctx->lut_buffer);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++                                        "lut buffer: %d.\n", cle);
++    }
++
 +    if (ctx->command_queue) {
 +        cle = clReleaseCommandQueue(ctx->command_queue);
 +        if (cle != CL_SUCCESS)
@@ -1932,12 +2103,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
 +               av_get_pix_fmt_name(in_format));
 +        return AVERROR(ENOSYS);
-+    }
+     }
 +    if (!format_is_supported(out_format)) {
 +        av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
 +               av_get_pix_fmt_name(out_format));
 +        return AVERROR(ENOSYS);
-     }
++    }
 +    if (in_desc->comp[0].depth != 10 && in_desc->comp[0].depth != 16) {
 +        av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n",
 +               in_desc->comp[0].depth);
@@ -1956,7 +2127,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      ret = ff_opencl_filter_config_output(outlink);
      if (ret < 0)
          return ret;
-@@ -308,13 +816,46 @@ static int launch_kernel(AVFilterContext
+@@ -308,13 +801,49 @@ static int launch_kernel(AVFilterContext
      size_t global_work[2];
      size_t local_work[2];
      cl_int cle;
@@ -1976,19 +2147,26 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +        err = AVERROR(EIO);
 +        goto fail;
 +    }
- 
-     CL_SET_KERNEL_ARG(kernel, 0, cl_mem, &output->data[0]);
-     CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]);
-     CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]);
-     CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]);
--    CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem);
--    CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak);
 +
-+    idx_arg = 4;
++    idx_arg = 0;
++    if (ctx->tradeoff) {
++        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &ctx->lut_buffer);
++    }
++    CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &output->data[0]);
++    CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &input->data[0]);
++    CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &output->data[1]);
++    CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &input->data[1]);
++
 +    if (ctx->out_planes > 2) {
 +        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &output->data[2]);
 +    }
-+
+ 
+-    CL_SET_KERNEL_ARG(kernel, 0, cl_mem, &output->data[0]);
+-    CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]);
+-    CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]);
+-    CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]);
+-    CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem);
+-    CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak);
 +    if (ctx->in_planes > 2) {
 +        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &input->data[2]);
 +    }
@@ -2005,7 +2183,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
  
      local_work[0]  = 16;
      local_work[1]  = 16;
-@@ -338,13 +879,10 @@ static int tonemap_opencl_filter_frame(A
+@@ -338,13 +867,10 @@ static int tonemap_opencl_filter_frame(A
      AVFilterContext    *avctx = inlink->dst;
      AVFilterLink     *outlink = avctx->outputs[0];
      TonemapOpenCLContext *ctx = avctx->priv;
@@ -2020,7 +2198,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
  
      av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
             av_get_pix_fmt_name(input->format),
-@@ -363,8 +901,49 @@ static int tonemap_opencl_filter_frame(A
+@@ -363,8 +889,49 @@ static int tonemap_opencl_filter_frame(A
      if (err < 0)
          goto fail;
  
@@ -2072,7 +2250,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
  
      if (ctx->trc != -1)
          output->color_trc = ctx->trc;
-@@ -385,72 +964,50 @@ static int tonemap_opencl_filter_frame(A
+@@ -385,72 +952,50 @@ static int tonemap_opencl_filter_frame(A
      ctx->range_out = output->color_range;
      ctx->chroma_loc = output->chroma_location;
  
@@ -2128,11 +2306,11 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +        av_frame_remove_side_data(output, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
 +        av_frame_remove_side_data(output, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
 +    }
-+
-+    av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_RPU_BUFFER);
-+    av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_METADATA);
  
 -    av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n",
++    av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_RPU_BUFFER);
++    av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_METADATA);
++
 +    av_log(ctx, AV_LOG_DEBUG, "Tonemapping output: %s, %ux%u (%"PRId64").\n",
             av_get_pix_fmt_name(output->format),
             output->width, output->height, output->pts);
@@ -2168,7 +2346,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      av_frame_free(&input);
      av_frame_free(&output);
      return err;
-@@ -458,24 +1015,9 @@ fail:
+@@ -458,24 +1003,9 @@ fail:
  
  static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx)
  {
@@ -2195,7 +2373,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
  
      ff_opencl_filter_uninit(avctx);
  }
-@@ -483,37 +1025,50 @@ static av_cold void tonemap_opencl_unini
+@@ -483,37 +1013,50 @@ static av_cold void tonemap_opencl_unini
  #define OFFSET(x) offsetof(TonemapOpenCLContext, x)
  #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
  static const AVOption tonemap_opencl_options[] = {
@@ -2244,7 +2422,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 +        { "rgb",  "Per-channel based tonemap",        0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_RGB },  0, 0, FLAGS, "tonemap_mode" },
 +        { "lum",  "Relative luminance based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_LUM },  0, 0, FLAGS, "tonemap_mode" },
 +        { "itp",  "ICtCp intensity based tonemap",    0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_ITP },  0, 0, FLAGS, "tonemap_mode" },
-+        { "auto", "Select based on GPU spec",         0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_AUTO }, 0, 0, FLAGS, "tonemap_mode" },
++        { "auto", "Select the preferred mode",        0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_AUTO }, 0, 0, FLAGS, "tonemap_mode" },
 +    { "transfer", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" },
 +    { "t",        "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" },
 +        { "bt709",            0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT709 },         0, 0, FLAGS, "transfer" },