Skip to content

Commit

Permalink
ENH: accelerate conversion from packed RGB pixel to packed RGBA pixel.
Browse files Browse the repository at this point in the history
  • Loading branch information
Odd Kiva committed Dec 9, 2023
1 parent b5ebe51 commit 47b85dd
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ target_link_libraries(
hello_vulkan_image
PRIVATE SignalHandler #
DO::Sara::Core #
DO::Sara::ImageIO #
DO::Sara::ImageProcessing #
DO::Sara::VideoIO #
DO::Shakti::Vulkan)
set_target_properties(hello_vulkan_image PROPERTIES FOLDER
Expand Down
18 changes: 10 additions & 8 deletions cpp/examples/Shakti/Vulkan/hello_vulkan_image/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include <DO/Sara/Core/Image.hpp>
#include <DO/Sara/Core/TicToc.hpp>
#include <DO/Sara/ImageProcessing/FastColorConversion.hpp>
#include <DO/Sara/VideoIO.hpp>


Expand Down Expand Up @@ -109,10 +110,11 @@ class VulkanImageRenderer : public kvk::GraphicsBackend
}

_vstream.open(_vpath);
const auto image_host = _vstream.frame().convert<sara::Rgba8>();
const auto image_host = sara::from_rgb8_to_rgba8(_vstream.frame());

const auto aspect_ratio = static_cast<float>(image_host.width()) / image_host.height();
for (auto& vertex: vertices)
const auto aspect_ratio =
static_cast<float>(image_host.width()) / image_host.height();
for (auto& vertex : vertices)
vertex.pos.x() *= aspect_ratio;

// General vulkan context objects.
Expand Down Expand Up @@ -169,11 +171,11 @@ class VulkanImageRenderer : public kvk::GraphicsBackend

if (_vstream.read())
{
if (_verbose)
sara::tic();
const auto image_host = _vstream.frame().convert<sara::Rgba8>();
if (_verbose)
sara::toc("RGB to RGBA");
// if (_verbose)
sara::tic();
const auto image_host = sara::from_rgb8_to_rgba8(_vstream.frame());
// if (_verbose)
sara::toc("RGB to RGBA");

if (_verbose)
sara::tic();
Expand Down
33 changes: 27 additions & 6 deletions cpp/src/DO/Sara/ImageProcessing/FastColorConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,35 @@
#include <DO/Sara/ImageProcessing/FastColorConversion.hpp>

#ifdef DO_SARA_USE_HALIDE
#include <DO/Shakti/Halide/RuntimeUtilities.hpp>
# include <DO/Shakti/Halide/RuntimeUtilities.hpp>

#include "shakti_rgb8u_to_gray32f_cpu.h"
#include "shakti_bgra8u_to_gray32f_cpu.h"
# include "shakti_bgra8u_to_gray32f_cpu.h"
# include "shakti_rgb8_to_rgba8_cpu.h"
# include "shakti_rgb8u_to_gray32f_cpu.h"
#endif


namespace DO::Sara {

auto from_rgb8_to_gray32f(const ImageView<Rgb8>& src, ImageView<float>& dst) -> void
auto from_rgb8_to_rgba8(const ImageView<Rgb8>& src, ImageView<Rgba8>& dst)
-> void
{
if (src.sizes() != dst.sizes())
throw std::domain_error{
"Color conversion error: image sizes are not equal!"};

#ifdef DO_SARA_USE_HALIDE
auto src_buffer = Shakti::Halide::as_interleaved_runtime_buffer(src);
auto dst_buffer = Shakti::Halide::as_interleaved_runtime_buffer(dst);
shakti_rgb8_to_rgba8_cpu(src_buffer, dst_buffer);
#else
// FALLBACK IMPLEMENTATION.
DO::Sara::convert(src, dst);
#endif
}

auto from_rgb8_to_gray32f(const ImageView<Rgb8>& src, ImageView<float>& dst)
-> void
{
if (src.sizes() != dst.sizes())
throw std::domain_error{
Expand All @@ -42,12 +61,14 @@ namespace DO::Sara {
// - model name : Intel(R) Core(TM) i7-6800K CPU @ 3.40GHz
//
// [Grayscale] 8.8687 ms
// This is 15 times slower compared to the Halide optimized CPU implementation
// This is 15 times slower compared to the Halide optimized CPU
// implementation
DO::Sara::convert(src, dst);
#endif
}

auto from_bgra8_to_gray32f(const ImageView<Bgra8>& src, ImageView<float>& dst) -> void
auto from_bgra8_to_gray32f(const ImageView<Bgra8>& src, ImageView<float>& dst)
-> void
{
if (src.sizes() != dst.sizes())
throw std::domain_error{
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/DO/Sara/ImageProcessing/FastColorConversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ namespace DO::Sara {
auto from_bgra8_to_gray32f(const ImageView<Bgra8>& src, ImageView<float>& dst)
-> void;

auto from_rgb8_to_rgba8(const ImageView<Rgb8>& src, ImageView<Rgba8>& dst)
-> void;

inline auto from_rgb8_to_gray32f(const ImageView<Rgb8>& src)
{
auto dst = Image<float>{src.sizes()};
Expand All @@ -34,4 +37,11 @@ namespace DO::Sara {
return dst;
}

inline auto from_rgb8_to_rgba8(const ImageView<Rgb8>& src)
{
auto dst = Image<Rgba8>{src.sizes()};
from_rgb8_to_rgba8(src, dst);
return dst;
}

} // namespace DO::Sara
1 change: 1 addition & 0 deletions cpp/src/DO/Sara/UseDOSaraImageProcessing.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ if(SARA_USE_FROM_SOURCE)
# Fast color conversion
shakti_rgb8u_to_gray32f_cpu
shakti_bgra8u_to_gray32f_cpu
shakti_rgb8_to_rgba8_cpu
# Binary operations.
shakti_subtract_32f_cpu
# Cartesian to polar coordinates.
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/DO/Shakti/Halide/Generators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ shakti_halide_library_v2(
NAME shakti_cast_uint8_to_float_cpu
SRCS CastUint8ToFloat.cpp)

shakti_halide_library_v2(
NAME shakti_rgb8_to_rgba8_cpu
SRCS FromRgb8ToRgba8.cpp)

# ------------------------------------------------------------------------------
# Image rotation CW 90
# ------------------------------------------------------------------------------
Expand Down
80 changes: 80 additions & 0 deletions cpp/src/DO/Shakti/Halide/Generators/FromRgb8ToRgba8.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// ========================================================================== //
// This file is part of Sara, a basic set of libraries in C++ for computer
// vision.
//
// Copyright (C) 2020-present David Ok <[email protected]>
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License v. 2.0. If a copy of the MPL was not distributed with this file,
// you can obtain one at http://mozilla.org/MPL/2.0/.
// ========================================================================== //

#include <DO/Shakti/Halide/MyHalide.hpp>
#include <Halide.h>


namespace {

using namespace Halide;

class Rgb8ToRgba8 : public Halide::Generator<Rgb8ToRgba8>
{
public:
GeneratorParam<int> tile_x{"tile_x", 32};
GeneratorParam<int> tile_y{"tile_y", 8};

Input<Buffer<std::uint8_t>> input{"Rgb8u", 3};
Output<Buffer<std::uint8_t>> output{"Rgba8u", 3};

Var x{"x"}, y{"y"}, c{"c"}, xi{"xi"}, yi{"yi"};

void generate()
{
// Deal with interleaved RGB pixel format.
input.dim(0).set_stride(3).dim(2).set_stride(1);
input.dim(2).set_bounds(0, 3);

output.dim(0).set_stride(4).dim(2).set_stride(1);
output.dim(2).set_bounds(0, 4);

auto input_ext = BoundaryConditions::constant_exterior(input, 255);
output(x, y, c) = input_ext(x, y, c);

output.reorder(c, x, y).unroll(c);

schedule_algorithm();
}

void schedule_algorithm()
{
// GPU schedule.
if (get_target().has_gpu_feature())
output.gpu_tile(x, y, xi, yi, tile_x, tile_y);

// Hexagon schedule.
else if (get_target().features_any_of({Halide::Target::HVX_v62, //
Halide::Target::HVX_v65,
Halide::Target::HVX_v66,
Halide::Target::HVX_128}))
{
const auto vector_size =
get_target().has_feature(Target::HVX_128) ? 128 : 64;

output.hexagon()
.prefetch(input, y, y, 2)
.split(y, y, yi, 128)
.parallel(y)
.vectorize(x, vector_size);
}

// CPU schedule.
else
output.split(y, y, yi, 8).parallel(y).vectorize(x, 8);
}
};

} // namespace


HALIDE_REGISTER_GENERATOR(Rgb8ToRgba8, shakti_rgb8_to_rgba8_cpu)
HALIDE_REGISTER_GENERATOR(Rgb8ToRgba8, shakti_rgb8_to_rgba8_gpu)

0 comments on commit 47b85dd

Please sign in to comment.