ENH: accelerate conversion from packed RGB pixel to packed RGBA pixel.

oddkiva · Dec 9, 2023 · 47b85dd · 47b85dd
1 parent b5ebe51
commit 47b85dd
Show file tree

Hide file tree

Showing 7 changed files with 133 additions and 15 deletions.
diff --git a/cpp/examples/Shakti/Vulkan/hello_vulkan_image/CMakeLists.txt b/cpp/examples/Shakti/Vulkan/hello_vulkan_image/CMakeLists.txt
@@ -3,7 +3,7 @@ target_link_libraries(
   hello_vulkan_image
   PRIVATE SignalHandler #
           DO::Sara::Core #
-          DO::Sara::ImageIO #
+          DO::Sara::ImageProcessing #
           DO::Sara::VideoIO #
           DO::Shakti::Vulkan)
 set_target_properties(hello_vulkan_image PROPERTIES FOLDER

diff --git a/cpp/examples/Shakti/Vulkan/hello_vulkan_image/main.cpp b/cpp/examples/Shakti/Vulkan/hello_vulkan_image/main.cpp
@@ -28,6 +28,7 @@
 
 #include <DO/Sara/Core/Image.hpp>
 #include <DO/Sara/Core/TicToc.hpp>
+#include <DO/Sara/ImageProcessing/FastColorConversion.hpp>
 #include <DO/Sara/VideoIO.hpp>
 
 
@@ -109,10 +110,11 @@ class VulkanImageRenderer : public kvk::GraphicsBackend
     }
 
     _vstream.open(_vpath);
-    const auto image_host = _vstream.frame().convert<sara::Rgba8>();
+    const auto image_host = sara::from_rgb8_to_rgba8(_vstream.frame());
 
-    const auto aspect_ratio = static_cast<float>(image_host.width()) / image_host.height();
-    for (auto& vertex: vertices)
+    const auto aspect_ratio =
+        static_cast<float>(image_host.width()) / image_host.height();
+    for (auto& vertex : vertices)
       vertex.pos.x() *= aspect_ratio;
 
     // General vulkan context objects.
@@ -169,11 +171,11 @@ class VulkanImageRenderer : public kvk::GraphicsBackend
 
       if (_vstream.read())
       {
-        if (_verbose)
-          sara::tic();
-        const auto image_host = _vstream.frame().convert<sara::Rgba8>();
-        if (_verbose)
-          sara::toc("RGB to RGBA");
+        // if (_verbose)
+        sara::tic();
+        const auto image_host = sara::from_rgb8_to_rgba8(_vstream.frame());
+        // if (_verbose)
+        sara::toc("RGB to RGBA");
 
         if (_verbose)
           sara::tic();

diff --git a/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.cpp b/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.cpp
@@ -12,16 +12,35 @@
 #include <DO/Sara/ImageProcessing/FastColorConversion.hpp>
 
 #ifdef DO_SARA_USE_HALIDE
-#include <DO/Shakti/Halide/RuntimeUtilities.hpp>
+#  include <DO/Shakti/Halide/RuntimeUtilities.hpp>
 
-#include "shakti_rgb8u_to_gray32f_cpu.h"
-#include "shakti_bgra8u_to_gray32f_cpu.h"
+#  include "shakti_bgra8u_to_gray32f_cpu.h"
+#  include "shakti_rgb8_to_rgba8_cpu.h"
+#  include "shakti_rgb8u_to_gray32f_cpu.h"
 #endif
 
 
 namespace DO::Sara {
 
-  auto from_rgb8_to_gray32f(const ImageView<Rgb8>& src, ImageView<float>& dst) -> void
+  auto from_rgb8_to_rgba8(const ImageView<Rgb8>& src, ImageView<Rgba8>& dst)
+      -> void
+  {
+    if (src.sizes() != dst.sizes())
+      throw std::domain_error{
+          "Color conversion error: image sizes are not equal!"};
+
+#ifdef DO_SARA_USE_HALIDE
+    auto src_buffer = Shakti::Halide::as_interleaved_runtime_buffer(src);
+    auto dst_buffer = Shakti::Halide::as_interleaved_runtime_buffer(dst);
+    shakti_rgb8_to_rgba8_cpu(src_buffer, dst_buffer);
+#else
+    // FALLBACK IMPLEMENTATION.
+    DO::Sara::convert(src, dst);
+#endif
+  }
+
+  auto from_rgb8_to_gray32f(const ImageView<Rgb8>& src, ImageView<float>& dst)
+      -> void
   {
     if (src.sizes() != dst.sizes())
       throw std::domain_error{
@@ -42,12 +61,14 @@ namespace DO::Sara {
     // - model name      : Intel(R) Core(TM) i7-6800K CPU @ 3.40GHz
     //
     // [Grayscale] 8.8687 ms
-    // This is 15 times slower compared to the Halide optimized CPU implementation
+    // This is 15 times slower compared to the Halide optimized CPU
+    // implementation
     DO::Sara::convert(src, dst);
 #endif
   }
 
-  auto from_bgra8_to_gray32f(const ImageView<Bgra8>& src, ImageView<float>& dst) -> void
+  auto from_bgra8_to_gray32f(const ImageView<Bgra8>& src, ImageView<float>& dst)
+      -> void
   {
     if (src.sizes() != dst.sizes())
       throw std::domain_error{

diff --git a/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.hpp b/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.hpp
@@ -20,6 +20,9 @@ namespace DO::Sara {
   auto from_bgra8_to_gray32f(const ImageView<Bgra8>& src, ImageView<float>& dst)
       -> void;
 
+  auto from_rgb8_to_rgba8(const ImageView<Rgb8>& src, ImageView<Rgba8>& dst)
+      -> void;
+
   inline auto from_rgb8_to_gray32f(const ImageView<Rgb8>& src)
   {
     auto dst = Image<float>{src.sizes()};
@@ -34,4 +37,11 @@ namespace DO::Sara {
     return dst;
   }
 
+  inline auto from_rgb8_to_rgba8(const ImageView<Rgb8>& src)
+  {
+    auto dst = Image<Rgba8>{src.sizes()};
+    from_rgb8_to_rgba8(src, dst);
+    return dst;
+  }
+
 }  // namespace DO::Sara
diff --git a/cpp/src/DO/Sara/UseDOSaraImageProcessing.cmake b/cpp/src/DO/Sara/UseDOSaraImageProcessing.cmake
@@ -18,6 +18,7 @@ if(SARA_USE_FROM_SOURCE)
                 # Fast color conversion
                 shakti_rgb8u_to_gray32f_cpu
                 shakti_bgra8u_to_gray32f_cpu
+                shakti_rgb8_to_rgba8_cpu
                 # Binary operations.
                 shakti_subtract_32f_cpu
                 # Cartesian to polar coordinates.

diff --git a/cpp/src/DO/Shakti/Halide/Generators/CMakeLists.txt b/cpp/src/DO/Shakti/Halide/Generators/CMakeLists.txt
@@ -21,6 +21,10 @@ shakti_halide_library_v2(
   NAME shakti_cast_uint8_to_float_cpu
   SRCS CastUint8ToFloat.cpp)
 
+shakti_halide_library_v2(
+  NAME shakti_rgb8_to_rgba8_cpu
+  SRCS FromRgb8ToRgba8.cpp)
+
 # ------------------------------------------------------------------------------
 # Image rotation CW 90
 # ------------------------------------------------------------------------------

diff --git a/cpp/src/DO/Shakti/Halide/Generators/FromRgb8ToRgba8.cpp b/cpp/src/DO/Shakti/Halide/Generators/FromRgb8ToRgba8.cpp
@@ -0,0 +1,80 @@
+// ========================================================================== //
+// This file is part of Sara, a basic set of libraries in C++ for computer
+// vision.
+//
+// Copyright (C) 2020-present David Ok <[email protected]>
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License v. 2.0. If a copy of the MPL was not distributed with this file,
+// you can obtain one at http://mozilla.org/MPL/2.0/.
+// ========================================================================== //
+
+#include <DO/Shakti/Halide/MyHalide.hpp>
+#include <Halide.h>
+
+
+namespace {
+
+  using namespace Halide;
+
+  class Rgb8ToRgba8 : public Halide::Generator<Rgb8ToRgba8>
+  {
+  public:
+    GeneratorParam<int> tile_x{"tile_x", 32};
+    GeneratorParam<int> tile_y{"tile_y", 8};
+
+    Input<Buffer<std::uint8_t>> input{"Rgb8u", 3};
+    Output<Buffer<std::uint8_t>> output{"Rgba8u", 3};
+
+    Var x{"x"}, y{"y"}, c{"c"}, xi{"xi"}, yi{"yi"};
+
+    void generate()
+    {
+      // Deal with interleaved RGB pixel format.
+      input.dim(0).set_stride(3).dim(2).set_stride(1);
+      input.dim(2).set_bounds(0, 3);
+
+      output.dim(0).set_stride(4).dim(2).set_stride(1);
+      output.dim(2).set_bounds(0, 4);
+
+      auto input_ext = BoundaryConditions::constant_exterior(input, 255);
+      output(x, y, c) = input_ext(x, y, c);
+
+      output.reorder(c, x, y).unroll(c);
+
+      schedule_algorithm();
+    }
+
+    void schedule_algorithm()
+    {
+      // GPU schedule.
+      if (get_target().has_gpu_feature())
+        output.gpu_tile(x, y, xi, yi, tile_x, tile_y);
+
+      // Hexagon schedule.
+      else if (get_target().features_any_of({Halide::Target::HVX_v62,  //
+                                             Halide::Target::HVX_v65,
+                                             Halide::Target::HVX_v66,
+                                             Halide::Target::HVX_128}))
+      {
+        const auto vector_size =
+            get_target().has_feature(Target::HVX_128) ? 128 : 64;
+
+        output.hexagon()
+            .prefetch(input, y, y, 2)
+            .split(y, y, yi, 128)
+            .parallel(y)
+            .vectorize(x, vector_size);
+      }
+
+      // CPU schedule.
+      else
+        output.split(y, y, yi, 8).parallel(y).vectorize(x, 8);
+    }
+  };
+
+}  // namespace
+
+
+HALIDE_REGISTER_GENERATOR(Rgb8ToRgba8, shakti_rgb8_to_rgba8_cpu)
+HALIDE_REGISTER_GENERATOR(Rgb8ToRgba8, shakti_rgb8_to_rgba8_gpu)