diff --git a/cpp/examples/Shakti/Vulkan/hello_vulkan_image/CMakeLists.txt b/cpp/examples/Shakti/Vulkan/hello_vulkan_image/CMakeLists.txt index 4fa6efe86..d8082be54 100644 --- a/cpp/examples/Shakti/Vulkan/hello_vulkan_image/CMakeLists.txt +++ b/cpp/examples/Shakti/Vulkan/hello_vulkan_image/CMakeLists.txt @@ -3,7 +3,7 @@ target_link_libraries( hello_vulkan_image PRIVATE SignalHandler # DO::Sara::Core # - DO::Sara::ImageIO # + DO::Sara::ImageProcessing # DO::Sara::VideoIO # DO::Shakti::Vulkan) set_target_properties(hello_vulkan_image PROPERTIES FOLDER diff --git a/cpp/examples/Shakti/Vulkan/hello_vulkan_image/main.cpp b/cpp/examples/Shakti/Vulkan/hello_vulkan_image/main.cpp index bcb163fc3..ab37e9403 100644 --- a/cpp/examples/Shakti/Vulkan/hello_vulkan_image/main.cpp +++ b/cpp/examples/Shakti/Vulkan/hello_vulkan_image/main.cpp @@ -28,6 +28,7 @@ #include #include +#include #include @@ -109,10 +110,11 @@ class VulkanImageRenderer : public kvk::GraphicsBackend } _vstream.open(_vpath); - const auto image_host = _vstream.frame().convert(); + const auto image_host = sara::from_rgb8_to_rgba8(_vstream.frame()); - const auto aspect_ratio = static_cast(image_host.width()) / image_host.height(); - for (auto& vertex: vertices) + const auto aspect_ratio = + static_cast(image_host.width()) / image_host.height(); + for (auto& vertex : vertices) vertex.pos.x() *= aspect_ratio; // General vulkan context objects. @@ -169,11 +171,11 @@ class VulkanImageRenderer : public kvk::GraphicsBackend if (_vstream.read()) { - if (_verbose) - sara::tic(); - const auto image_host = _vstream.frame().convert(); - if (_verbose) - sara::toc("RGB to RGBA"); + // if (_verbose) + sara::tic(); + const auto image_host = sara::from_rgb8_to_rgba8(_vstream.frame()); + // if (_verbose) + sara::toc("RGB to RGBA"); if (_verbose) sara::tic(); diff --git a/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.cpp b/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.cpp index b5d44f90e..491e3682a 100644 --- a/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.cpp +++ b/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.cpp @@ -12,16 +12,35 @@ #include #ifdef DO_SARA_USE_HALIDE -#include +# include -#include "shakti_rgb8u_to_gray32f_cpu.h" -#include "shakti_bgra8u_to_gray32f_cpu.h" +# include "shakti_bgra8u_to_gray32f_cpu.h" +# include "shakti_rgb8_to_rgba8_cpu.h" +# include "shakti_rgb8u_to_gray32f_cpu.h" #endif namespace DO::Sara { - auto from_rgb8_to_gray32f(const ImageView& src, ImageView& dst) -> void + auto from_rgb8_to_rgba8(const ImageView& src, ImageView& dst) + -> void + { + if (src.sizes() != dst.sizes()) + throw std::domain_error{ + "Color conversion error: image sizes are not equal!"}; + +#ifdef DO_SARA_USE_HALIDE + auto src_buffer = Shakti::Halide::as_interleaved_runtime_buffer(src); + auto dst_buffer = Shakti::Halide::as_interleaved_runtime_buffer(dst); + shakti_rgb8_to_rgba8_cpu(src_buffer, dst_buffer); +#else + // FALLBACK IMPLEMENTATION. + DO::Sara::convert(src, dst); +#endif + } + + auto from_rgb8_to_gray32f(const ImageView& src, ImageView& dst) + -> void { if (src.sizes() != dst.sizes()) throw std::domain_error{ @@ -42,12 +61,14 @@ namespace DO::Sara { // - model name : Intel(R) Core(TM) i7-6800K CPU @ 3.40GHz // // [Grayscale] 8.8687 ms - // This is 15 times slower compared to the Halide optimized CPU implementation + // This is 15 times slower compared to the Halide optimized CPU + // implementation DO::Sara::convert(src, dst); #endif } - auto from_bgra8_to_gray32f(const ImageView& src, ImageView& dst) -> void + auto from_bgra8_to_gray32f(const ImageView& src, ImageView& dst) + -> void { if (src.sizes() != dst.sizes()) throw std::domain_error{ diff --git a/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.hpp b/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.hpp index c3fae0ef2..21ea433db 100644 --- a/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.hpp +++ b/cpp/src/DO/Sara/ImageProcessing/FastColorConversion.hpp @@ -20,6 +20,9 @@ namespace DO::Sara { auto from_bgra8_to_gray32f(const ImageView& src, ImageView& dst) -> void; + auto from_rgb8_to_rgba8(const ImageView& src, ImageView& dst) + -> void; + inline auto from_rgb8_to_gray32f(const ImageView& src) { auto dst = Image{src.sizes()}; @@ -34,4 +37,11 @@ namespace DO::Sara { return dst; } + inline auto from_rgb8_to_rgba8(const ImageView& src) + { + auto dst = Image{src.sizes()}; + from_rgb8_to_rgba8(src, dst); + return dst; + } + } // namespace DO::Sara diff --git a/cpp/src/DO/Sara/UseDOSaraImageProcessing.cmake b/cpp/src/DO/Sara/UseDOSaraImageProcessing.cmake index 87760c5a0..8a04eb58d 100644 --- a/cpp/src/DO/Sara/UseDOSaraImageProcessing.cmake +++ b/cpp/src/DO/Sara/UseDOSaraImageProcessing.cmake @@ -18,6 +18,7 @@ if(SARA_USE_FROM_SOURCE) # Fast color conversion shakti_rgb8u_to_gray32f_cpu shakti_bgra8u_to_gray32f_cpu + shakti_rgb8_to_rgba8_cpu # Binary operations. shakti_subtract_32f_cpu # Cartesian to polar coordinates. diff --git a/cpp/src/DO/Shakti/Halide/Generators/CMakeLists.txt b/cpp/src/DO/Shakti/Halide/Generators/CMakeLists.txt index 9a93cf6be..0d93da872 100644 --- a/cpp/src/DO/Shakti/Halide/Generators/CMakeLists.txt +++ b/cpp/src/DO/Shakti/Halide/Generators/CMakeLists.txt @@ -21,6 +21,10 @@ shakti_halide_library_v2( NAME shakti_cast_uint8_to_float_cpu SRCS CastUint8ToFloat.cpp) +shakti_halide_library_v2( + NAME shakti_rgb8_to_rgba8_cpu + SRCS FromRgb8ToRgba8.cpp) + # ------------------------------------------------------------------------------ # Image rotation CW 90 # ------------------------------------------------------------------------------ diff --git a/cpp/src/DO/Shakti/Halide/Generators/FromRgb8ToRgba8.cpp b/cpp/src/DO/Shakti/Halide/Generators/FromRgb8ToRgba8.cpp new file mode 100644 index 000000000..2a69c79fa --- /dev/null +++ b/cpp/src/DO/Shakti/Halide/Generators/FromRgb8ToRgba8.cpp @@ -0,0 +1,80 @@ +// ========================================================================== // +// This file is part of Sara, a basic set of libraries in C++ for computer +// vision. +// +// Copyright (C) 2020-present David Ok +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License v. 2.0. If a copy of the MPL was not distributed with this file, +// you can obtain one at http://mozilla.org/MPL/2.0/. +// ========================================================================== // + +#include +#include + + +namespace { + + using namespace Halide; + + class Rgb8ToRgba8 : public Halide::Generator + { + public: + GeneratorParam tile_x{"tile_x", 32}; + GeneratorParam tile_y{"tile_y", 8}; + + Input> input{"Rgb8u", 3}; + Output> output{"Rgba8u", 3}; + + Var x{"x"}, y{"y"}, c{"c"}, xi{"xi"}, yi{"yi"}; + + void generate() + { + // Deal with interleaved RGB pixel format. + input.dim(0).set_stride(3).dim(2).set_stride(1); + input.dim(2).set_bounds(0, 3); + + output.dim(0).set_stride(4).dim(2).set_stride(1); + output.dim(2).set_bounds(0, 4); + + auto input_ext = BoundaryConditions::constant_exterior(input, 255); + output(x, y, c) = input_ext(x, y, c); + + output.reorder(c, x, y).unroll(c); + + schedule_algorithm(); + } + + void schedule_algorithm() + { + // GPU schedule. + if (get_target().has_gpu_feature()) + output.gpu_tile(x, y, xi, yi, tile_x, tile_y); + + // Hexagon schedule. + else if (get_target().features_any_of({Halide::Target::HVX_v62, // + Halide::Target::HVX_v65, + Halide::Target::HVX_v66, + Halide::Target::HVX_128})) + { + const auto vector_size = + get_target().has_feature(Target::HVX_128) ? 128 : 64; + + output.hexagon() + .prefetch(input, y, y, 2) + .split(y, y, yi, 128) + .parallel(y) + .vectorize(x, vector_size); + } + + // CPU schedule. + else + output.split(y, y, yi, 8).parallel(y).vectorize(x, 8); + } + }; + +} // namespace + + +HALIDE_REGISTER_GENERATOR(Rgb8ToRgba8, shakti_rgb8_to_rgba8_cpu) +HALIDE_REGISTER_GENERATOR(Rgb8ToRgba8, shakti_rgb8_to_rgba8_gpu)