From d318d3d1343321f24363ca6e3ad04b451125f356 Mon Sep 17 00:00:00 2001 From: David OK Date: Tue, 5 Dec 2023 19:39:00 +0000 Subject: [PATCH 1/4] MAINT: update Docker image. --- .github/workflows/ci.yml | 2 +- build.py | 10 +++++++++- docker/Dockerfile | 6 +++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e37a50a3c..d67c3f573 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: # os: [macos-latest, windows-latest] include: - os: ubuntu-latest - container: 'oddkiva/sara-devel:cuda12.1.0-ubuntu22.04-trt8.6-swift5.9-halide16.0.0' + container: 'oddkiva/sara-devel:cuda12.1.0-ubuntu22.04-trt8.6-swift5.9.1-halide16.0.0' - os: ubuntu-latest container: 'oddkiva/sara-emsdk-devel:latest' diff --git a/build.py b/build.py index 9b8fc783c..129dc3484 100755 --- a/build.py +++ b/build.py @@ -28,7 +28,7 @@ UBUNTU_VERSION = "22.04" CUDA_VERSION = "12.1.0" TRT_VERSION = "8.6" -SWIFT_VERSION = "5.9" +SWIFT_VERSION = "5.9.1" HALIDE_VERSION = "16.0.0" # Docker @@ -304,6 +304,14 @@ def build_library_docker() -> None: ], SARA_SOURCE_DIR, ) + execute( + [ + "docker", + "push", + f"{SARA_DOCKER_IMAGE}", + ], + SARA_SOURCE_DIR + ) def build_book(): diff --git a/docker/Dockerfile b/docker/Dockerfile index 087806079..862d826c1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -75,9 +75,9 @@ RUN pip3 install \ pybind11 # Install Swift toolchain. -RUN wget https://download.swift.org/swift-5.9-release/ubuntu2204/swift-5.9-RELEASE/swift-5.9-RELEASE-ubuntu22.04.tar.gz -RUN tar xvzf swift-5.9-RELEASE-ubuntu22.04.tar.gz \ - && mv swift-5.9-RELEASE-ubuntu22.04 /opt +RUN wget https://download.swift.org/swift-5.9.1-release/ubuntu2204/swift-5.9.1-RELEASE/swift-5.9.1-RELEASE-ubuntu22.04.tar.gz +RUN tar xvzf swift-5.9.1-RELEASE-ubuntu22.04.tar.gz \ + && mv swift-5.9.1-RELEASE-ubuntu22.04 /opt # Install Halide. RUN wget https://github.com/halide/Halide/releases/download/v16.0.0/Halide-16.0.0-x86-64-linux-1e963ff817ef0968cc25d811a25a7350c8953ee6.tar.gz From 6ce3fe4169a400f5a7854b9a65b7d8b4918e9532 Mon Sep 17 00:00:00 2001 From: David OK Date: Mon, 11 Dec 2023 14:55:41 +0000 Subject: [PATCH 2/4] MAINT: reorganize DL inference code. --- cpp/drafts/CMakeLists.txt | 1 - cpp/drafts/NeuralNetworks/CMakeLists.txt | 2 - .../NeuralNetworks/Darknet/CMakeLists.txt | 12 ------ .../NeuralNetworks/TensorRT/CMakeLists.txt | 37 ------------------- cpp/examples/Sara/CMakeLists.txt | 2 + .../Sara/NeuralNetworks}/CMakeLists.txt | 1 - .../Sara/NeuralNetworks}/yolo_v4_example.cpp | 18 +++++---- cpp/examples/Shakti/CMakeLists.txt | 2 + .../Shakti/TensorRT}/CMakeLists.txt | 4 +- .../tensorrt_yolov4_tiny_example.cpp | 10 ++--- cpp/src/DO/Sara/CMakeLists.txt | 2 + cpp/src/DO/Sara/NeuralNetworks/CMakeLists.txt | 1 + .../NeuralNetworks/Darknet/CMakeLists.txt | 10 +++++ .../DO/Sara}/NeuralNetworks/Darknet/Debug.hpp | 10 ++--- .../DO/Sara}/NeuralNetworks/Darknet/Layer.cpp | 4 +- .../DO/Sara}/NeuralNetworks/Darknet/Layer.hpp | 2 +- .../Sara}/NeuralNetworks/Darknet/Network.hpp | 4 +- .../Sara}/NeuralNetworks/Darknet/Parser.cpp | 6 ++- .../Sara}/NeuralNetworks/Darknet/Parser.hpp | 8 ++-- .../NeuralNetworks/Darknet/YoloUtilities.cpp | 2 +- .../NeuralNetworks/Darknet/YoloUtilities.hpp | 0 cpp/src/DO/Shakti/Cuda/CMakeLists.txt | 2 + .../DO/Shakti/Cuda/TensorRT/CMakeLists.txt | 31 ++++++++++++++++ .../Shakti/Cuda}/TensorRT/DarknetParser.cpp | 11 ++++-- .../Shakti/Cuda}/TensorRT/DarknetParser.hpp | 9 +++-- .../DO/Shakti/Cuda}/TensorRT/Helpers.hpp | 0 .../DO/Shakti/Cuda}/TensorRT/IO.cpp | 2 +- .../DO/Shakti/Cuda}/TensorRT/IO.hpp | 2 +- .../Cuda}/TensorRT/InferenceExecutor.cpp | 2 +- .../Cuda}/TensorRT/InferenceExecutor.hpp | 5 +-- .../DO/Shakti/Cuda}/TensorRT/Yolo.cpp | 6 +-- .../DO/Shakti/Cuda}/TensorRT/Yolo.hpp | 0 .../DO/Shakti/Cuda}/TensorRT/YoloImpl.cu | 3 +- .../DO/Shakti/Cuda}/TensorRT/YoloImpl.hpp | 0 cpp/test/Sara/CMakeLists.txt | 2 + .../Sara/NeuralNetworks}/CMakeLists.txt | 0 ...neuralnetworks_yolo_v4_config_parsing.cpp} | 17 ++++----- cpp/test/Shakti/Cuda/CMakeLists.txt | 2 + .../Shakti/Cuda/TensorRT}/CMakeLists.txt | 10 +++-- .../test_neuralnetworks_tensorrt.cpp | 20 ++++++++-- ...neuralnetworks_tensorrt_darknet_parser.cpp | 23 ++++++++---- ...alnetworks_tensorrt_inference_executor.cpp | 22 ++++++++--- ...st_neuralnetworks_tensorrt_onnx_parser.cpp | 16 ++++++-- ...st_neuralnetworks_tensorrt_yolo_plugin.cpp | 23 ++++++++---- 44 files changed, 203 insertions(+), 143 deletions(-) delete mode 100644 cpp/drafts/NeuralNetworks/CMakeLists.txt delete mode 100644 cpp/drafts/NeuralNetworks/Darknet/CMakeLists.txt delete mode 100644 cpp/drafts/NeuralNetworks/TensorRT/CMakeLists.txt rename cpp/{drafts/NeuralNetworks/Darknet/examples => examples/Sara/NeuralNetworks}/CMakeLists.txt (96%) rename cpp/{drafts/NeuralNetworks/Darknet/examples => examples/Sara/NeuralNetworks}/yolo_v4_example.cpp (95%) rename cpp/{drafts/NeuralNetworks/TensorRT/examples => examples/Shakti/TensorRT}/CMakeLists.txt (91%) rename cpp/{drafts/NeuralNetworks/TensorRT/examples => examples/Shakti/TensorRT}/tensorrt_yolov4_tiny_example.cpp (95%) create mode 100644 cpp/src/DO/Sara/NeuralNetworks/CMakeLists.txt create mode 100644 cpp/src/DO/Sara/NeuralNetworks/Darknet/CMakeLists.txt rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/Debug.hpp (97%) rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/Layer.cpp (99%) rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/Layer.hpp (99%) rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/Network.hpp (98%) rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/Parser.cpp (97%) rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/Parser.hpp (89%) rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/YoloUtilities.cpp (98%) rename cpp/{drafts => src/DO/Sara}/NeuralNetworks/Darknet/YoloUtilities.hpp (100%) create mode 100644 cpp/src/DO/Shakti/Cuda/TensorRT/CMakeLists.txt rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/DarknetParser.cpp (98%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/DarknetParser.hpp (93%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/Helpers.hpp (100%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/IO.cpp (96%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/IO.hpp (96%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/InferenceExecutor.cpp (97%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/InferenceExecutor.hpp (96%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/Yolo.cpp (98%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/Yolo.hpp (100%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/YoloImpl.cu (98%) rename cpp/{drafts/NeuralNetworks => src/DO/Shakti/Cuda}/TensorRT/YoloImpl.hpp (100%) rename cpp/{drafts/NeuralNetworks/Darknet/test => test/Sara/NeuralNetworks}/CMakeLists.txt (100%) rename cpp/{drafts/NeuralNetworks/Darknet/test/test_neuralnetworks_config_parsing.cpp => test/Sara/NeuralNetworks/test_neuralnetworks_yolo_v4_config_parsing.cpp} (75%) rename cpp/{drafts/NeuralNetworks/TensorRT/test => test/Shakti/Cuda/TensorRT}/CMakeLists.txt (74%) rename cpp/{drafts/NeuralNetworks/TensorRT/test => test/Shakti/Cuda/TensorRT}/test_neuralnetworks_tensorrt.cpp (92%) rename cpp/{drafts/NeuralNetworks/TensorRT/test => test/Shakti/Cuda/TensorRT}/test_neuralnetworks_tensorrt_darknet_parser.cpp (91%) rename cpp/{drafts/NeuralNetworks/TensorRT/test => test/Shakti/Cuda/TensorRT}/test_neuralnetworks_tensorrt_inference_executor.cpp (73%) rename cpp/{drafts/NeuralNetworks/TensorRT/test => test/Shakti/Cuda/TensorRT}/test_neuralnetworks_tensorrt_onnx_parser.cpp (89%) rename cpp/{drafts/NeuralNetworks/TensorRT/test => test/Shakti/Cuda/TensorRT}/test_neuralnetworks_tensorrt_yolo_plugin.cpp (83%) diff --git a/cpp/drafts/CMakeLists.txt b/cpp/drafts/CMakeLists.txt index 149b1755f..08226d969 100644 --- a/cpp/drafts/CMakeLists.txt +++ b/cpp/drafts/CMakeLists.txt @@ -13,7 +13,6 @@ add_subdirectory(ChessboardDetection) add_subdirectory(Calibration) add_subdirectory(Compute) add_subdirectory(MatchPropagation) -add_subdirectory(NeuralNetworks) add_subdirectory(NuScenes) add_subdirectory(Taskflow) diff --git a/cpp/drafts/NeuralNetworks/CMakeLists.txt b/cpp/drafts/NeuralNetworks/CMakeLists.txt deleted file mode 100644 index 8c5587e96..000000000 --- a/cpp/drafts/NeuralNetworks/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_subdirectory(Darknet) -add_subdirectory(TensorRT) diff --git a/cpp/drafts/NeuralNetworks/Darknet/CMakeLists.txt b/cpp/drafts/NeuralNetworks/Darknet/CMakeLists.txt deleted file mode 100644 index b17fcaa44..000000000 --- a/cpp/drafts/NeuralNetworks/Darknet/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -file(GLOB DO_Sara_Darknet_SOURCE_FILES FILES *.hpp *.cpp) - -add_library(DO_Sara_Darknet ${DO_Sara_Darknet_SOURCE_FILES}) -add_library(DO::Sara::Darknet ALIAS DO_Sara_Darknet) - -target_link_libraries(DO_Sara_Darknet PUBLIC DO::Sara::Core - DO::Sara::ImageProcessing - Boost::filesystem) -set_property(TARGET DO_Sara_Darknet PROPERTY FOLDER "Libraries/Sara") - -add_subdirectory(test) -add_subdirectory(examples) diff --git a/cpp/drafts/NeuralNetworks/TensorRT/CMakeLists.txt b/cpp/drafts/NeuralNetworks/TensorRT/CMakeLists.txt deleted file mode 100644 index 97616cc83..000000000 --- a/cpp/drafts/NeuralNetworks/TensorRT/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -if(NOT CMAKE_CUDA_COMPILER OR NOT TensorRT_FOUND) - return() -endif () - -add_library( - DO_Sara_TensorRT # - Helpers.hpp # - IO.hpp # - IO.cpp # - InferenceExecutor.hpp # - InferenceExecutor.cpp # - DarknetParser.hpp # - DarknetParser.cpp # - Yolo.cpp - Yolo.hpp - YoloImpl.hpp - YoloImpl.cu) -add_library(DO::Sara::TensorRT ALIAS DO_Sara_TensorRT) -if (SARA_BUILD_SHARED_LIBS) - target_compile_definitions(DO_Sara_TensorRT PRIVATE DO_SARA_EXPORTS) -else () - target_compile_definitions(DO_Sara_TensorRT PUBLIC DO_SARA_STATIC) -endif () - -target_link_libraries( - DO_Sara_TensorRT - PUBLIC $<$:CUDA::cudart> # - TensorRT::TensorRT # - DO::Shakti::Cuda::Utilities - DO::Sara::Darknet) -set_property(TARGET DO_Sara_TensorRT PROPERTY FOLDER "Libraries/Sara") - -add_subdirectory(examples) - -if(SARA_BUILD_TESTS) - add_subdirectory(test) -endif() diff --git a/cpp/examples/Sara/CMakeLists.txt b/cpp/examples/Sara/CMakeLists.txt index d2a8d5929..dd71f81b5 100644 --- a/cpp/examples/Sara/CMakeLists.txt +++ b/cpp/examples/Sara/CMakeLists.txt @@ -12,3 +12,5 @@ add_subdirectory(MultiViewGeometry) if (SARA_BUILD_VIDEOIO) add_subdirectory(VideoIO) endif () + +add_subdirectory(NeuralNetworks) diff --git a/cpp/drafts/NeuralNetworks/Darknet/examples/CMakeLists.txt b/cpp/examples/Sara/NeuralNetworks/CMakeLists.txt similarity index 96% rename from cpp/drafts/NeuralNetworks/Darknet/examples/CMakeLists.txt rename to cpp/examples/Sara/NeuralNetworks/CMakeLists.txt index 746e222a1..eb658ae7c 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/examples/CMakeLists.txt +++ b/cpp/examples/Sara/NeuralNetworks/CMakeLists.txt @@ -13,7 +13,6 @@ foreach (file ${neuralnetworks_SOURCE_FILES}) DO::Sara::ImageIO DO::Sara::VideoIO DO::Sara::Darknet - Boost::filesystem $<$:OpenMP::OpenMP_CXX>) set_target_properties(${filename} PROPERTIES diff --git a/cpp/drafts/NeuralNetworks/Darknet/examples/yolo_v4_example.cpp b/cpp/examples/Sara/NeuralNetworks/yolo_v4_example.cpp similarity index 95% rename from cpp/drafts/NeuralNetworks/Darknet/examples/yolo_v4_example.cpp rename to cpp/examples/Sara/NeuralNetworks/yolo_v4_example.cpp index 3f6563b99..e98b4d3a9 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/examples/yolo_v4_example.cpp +++ b/cpp/examples/Sara/NeuralNetworks/yolo_v4_example.cpp @@ -9,29 +9,31 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include -#include -#include - #include #include + +#include + #include #include -#include #include -#include +#include +#include +#include + +#include #include #ifdef _OPENMP -#include +# include #endif namespace sara = DO::Sara; namespace d = DO::Sara::Darknet; -namespace fs = boost::filesystem; +namespace fs = std::filesystem; // The API. diff --git a/cpp/examples/Shakti/CMakeLists.txt b/cpp/examples/Shakti/CMakeLists.txt index 15423754d..c84ed6a0e 100644 --- a/cpp/examples/Shakti/CMakeLists.txt +++ b/cpp/examples/Shakti/CMakeLists.txt @@ -15,3 +15,5 @@ endif() add_subdirectory(OpenCL) add_subdirectory(Vulkan) + +add_subdirectory(TensorRT) diff --git a/cpp/drafts/NeuralNetworks/TensorRT/examples/CMakeLists.txt b/cpp/examples/Shakti/TensorRT/CMakeLists.txt similarity index 91% rename from cpp/drafts/NeuralNetworks/TensorRT/examples/CMakeLists.txt rename to cpp/examples/Shakti/TensorRT/CMakeLists.txt index 695e5f524..b59ba19b5 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/examples/CMakeLists.txt +++ b/cpp/examples/Shakti/TensorRT/CMakeLists.txt @@ -16,9 +16,9 @@ foreach(file ${TRT_SOURCE_FILES}) DO::Sara::VideoIO DO::Sara::ImageProcessing DO::Sara::Darknet - DO::Sara::TensorRT DO::Shakti::Cuda::MultiArray - DO::Shakti::Cuda::Utilities) + DO::Shakti::Cuda::Utilities + DO::Shakti::Cuda::TensorRT) set_target_properties( ${filename} PROPERTIES COMPILE_FLAGS ${SARA_DEFINITIONS} diff --git a/cpp/drafts/NeuralNetworks/TensorRT/examples/tensorrt_yolov4_tiny_example.cpp b/cpp/examples/Shakti/TensorRT/tensorrt_yolov4_tiny_example.cpp similarity index 95% rename from cpp/drafts/NeuralNetworks/TensorRT/examples/tensorrt_yolov4_tiny_example.cpp rename to cpp/examples/Shakti/TensorRT/tensorrt_yolov4_tiny_example.cpp index 691f9b357..d6a102460 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/examples/tensorrt_yolov4_tiny_example.cpp +++ b/cpp/examples/Shakti/TensorRT/tensorrt_yolov4_tiny_example.cpp @@ -9,20 +9,20 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include -#include -#include +#include +#include #include #include #include +#include #include -#include +#include namespace sara = DO::Sara; -namespace fs = boost::filesystem; +namespace fs = std::filesystem; namespace trt = sara::TensorRT; namespace d = sara::Darknet; diff --git a/cpp/src/DO/Sara/CMakeLists.txt b/cpp/src/DO/Sara/CMakeLists.txt index e4a3a3b65..9011f2cad 100644 --- a/cpp/src/DO/Sara/CMakeLists.txt +++ b/cpp/src/DO/Sara/CMakeLists.txt @@ -31,3 +31,5 @@ include(UseDOSaraMultiViewGeometry) include(UseDOSaraRANSAC) include(UseDOSaraVisualization) + +add_subdirectory(NeuralNetworks) diff --git a/cpp/src/DO/Sara/NeuralNetworks/CMakeLists.txt b/cpp/src/DO/Sara/NeuralNetworks/CMakeLists.txt new file mode 100644 index 000000000..94836c4e0 --- /dev/null +++ b/cpp/src/DO/Sara/NeuralNetworks/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Darknet) diff --git a/cpp/src/DO/Sara/NeuralNetworks/Darknet/CMakeLists.txt b/cpp/src/DO/Sara/NeuralNetworks/Darknet/CMakeLists.txt new file mode 100644 index 000000000..5f71f0fa9 --- /dev/null +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/CMakeLists.txt @@ -0,0 +1,10 @@ +file(GLOB DO_Sara_Darknet_SOURCE_FILES FILES *.hpp *.cpp) + +add_library(DO_Sara_Darknet ${DO_Sara_Darknet_SOURCE_FILES}) +add_library(DO::Sara::Darknet ALIAS DO_Sara_Darknet) + +target_link_libraries( + DO_Sara_Darknet + PUBLIC DO::Sara::Core DO::Sara::ImageProcessing + PRIVATE Boost::filesystem) +set_property(TARGET DO_Sara_Darknet PROPERTY FOLDER "Libraries/Sara") diff --git a/cpp/drafts/NeuralNetworks/Darknet/Debug.hpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Debug.hpp similarity index 97% rename from cpp/drafts/NeuralNetworks/Darknet/Debug.hpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/Debug.hpp index f6d61ff0d..97a35548c 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/Debug.hpp +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Debug.hpp @@ -11,13 +11,11 @@ #pragma once -#include - #include #include +#include -#include - +#include #include @@ -45,7 +43,7 @@ namespace DO::Sara::Darknet { // CAVEAT: this is sensitive to the CPU architecture endianness. inline auto read_all_intermediate_outputs(const std::string& dir_path) { - namespace fs = boost::filesystem; + namespace fs = std::filesystem; auto stringify = [](int n) { std::ostringstream ss; @@ -169,7 +167,7 @@ namespace DO::Sara::Darknet { inline auto check_yolov4_tiny_implementation(Network& model, const std::string& output_dir) { - namespace fs = boost::filesystem; + namespace fs = std::filesystem; if (!fs::exists(output_dir)) throw std::runtime_error{"Ouput directory " + output_dir + diff --git a/cpp/drafts/NeuralNetworks/Darknet/Layer.cpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Layer.cpp similarity index 99% rename from cpp/drafts/NeuralNetworks/Darknet/Layer.cpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/Layer.cpp index b042afaa0..4f48fb9a5 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/Layer.cpp +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Layer.cpp @@ -9,7 +9,9 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include +#include + +#include using namespace DO::Sara::Darknet; diff --git a/cpp/drafts/NeuralNetworks/Darknet/Layer.hpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Layer.hpp similarity index 99% rename from cpp/drafts/NeuralNetworks/Darknet/Layer.hpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/Layer.hpp index 387b90cfb..f75752007 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/Layer.hpp +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Layer.hpp @@ -14,7 +14,7 @@ #include #include -#include +#include namespace DO::Sara::Darknet { diff --git a/cpp/drafts/NeuralNetworks/Darknet/Network.hpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Network.hpp similarity index 98% rename from cpp/drafts/NeuralNetworks/Darknet/Network.hpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/Network.hpp index 782bcb2c9..8b4300a93 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/Network.hpp +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Network.hpp @@ -13,10 +13,8 @@ #include #include - #include - -#include +#include #include diff --git a/cpp/drafts/NeuralNetworks/Darknet/Parser.cpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Parser.cpp similarity index 97% rename from cpp/drafts/NeuralNetworks/Darknet/Parser.cpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/Parser.cpp index 7d2f201f0..c902bde8f 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/Parser.cpp +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Parser.cpp @@ -9,7 +9,9 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include +#include + +#include #include @@ -206,7 +208,7 @@ namespace DO::Sara::Darknet { } - auto load_yolov4_tiny_model(const boost::filesystem::path& model_dir_path) + auto load_yolov4_tiny_model(const std::filesystem::path& model_dir_path) -> Network { const auto cfg_filepath = model_dir_path / "yolov4-tiny.cfg"; diff --git a/cpp/drafts/NeuralNetworks/Darknet/Parser.hpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Parser.hpp similarity index 89% rename from cpp/drafts/NeuralNetworks/Darknet/Parser.hpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/Parser.hpp index 9ffb1d97d..98c04a4a1 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/Parser.hpp +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/Parser.hpp @@ -11,10 +11,10 @@ #pragma once -#include -#include +#include +#include -#include +#include namespace DO::Sara::Darknet { @@ -66,7 +66,7 @@ namespace DO::Sara::Darknet { }; - auto load_yolov4_tiny_model(const boost::filesystem::path& model_dir_path) + auto load_yolov4_tiny_model(const std::filesystem::path& model_dir_path) -> Network; } // namespace DO::Sara::Darknet diff --git a/cpp/drafts/NeuralNetworks/Darknet/YoloUtilities.cpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/YoloUtilities.cpp similarity index 98% rename from cpp/drafts/NeuralNetworks/Darknet/YoloUtilities.cpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/YoloUtilities.cpp index bad965e58..e282dcbe7 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/YoloUtilities.cpp +++ b/cpp/src/DO/Sara/NeuralNetworks/Darknet/YoloUtilities.cpp @@ -9,7 +9,7 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include +#include namespace DO::Sara::Darknet { diff --git a/cpp/drafts/NeuralNetworks/Darknet/YoloUtilities.hpp b/cpp/src/DO/Sara/NeuralNetworks/Darknet/YoloUtilities.hpp similarity index 100% rename from cpp/drafts/NeuralNetworks/Darknet/YoloUtilities.hpp rename to cpp/src/DO/Sara/NeuralNetworks/Darknet/YoloUtilities.hpp diff --git a/cpp/src/DO/Shakti/Cuda/CMakeLists.txt b/cpp/src/DO/Shakti/Cuda/CMakeLists.txt index 98ccb5dad..7c7c8309d 100644 --- a/cpp/src/DO/Shakti/Cuda/CMakeLists.txt +++ b/cpp/src/DO/Shakti/Cuda/CMakeLists.txt @@ -15,3 +15,5 @@ if (NvidiaVideoCodec_ROOT) endif () add_subdirectory(FeatureDetectors) + +add_subdirectory(TensorRT) diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/CMakeLists.txt b/cpp/src/DO/Shakti/Cuda/TensorRT/CMakeLists.txt new file mode 100644 index 000000000..957699d8e --- /dev/null +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/CMakeLists.txt @@ -0,0 +1,31 @@ +if(NOT CMAKE_CUDA_COMPILER OR NOT TensorRT_FOUND) + return() +endif() + +add_library( + DO_Shakti_Cuda_TensorRT # + Helpers.hpp # + IO.hpp # + IO.cpp # + InferenceExecutor.hpp # + InferenceExecutor.cpp # + DarknetParser.hpp # + DarknetParser.cpp # + Yolo.cpp + Yolo.hpp + YoloImpl.hpp + YoloImpl.cu) +add_library(DO::Shakti::Cuda::TensorRT ALIAS DO_Shakti_Cuda_TensorRT) +if(SARA_BUILD_SHARED_LIBS) + target_compile_definitions(DO_Shakti_Cuda_TensorRT PRIVATE DO_SARA_EXPORTS) +else() + target_compile_definitions(DO_Shakti_Cuda_TensorRT PUBLIC DO_SARA_STATIC) +endif() + +target_link_libraries( + DO_Shakti_Cuda_TensorRT + PUBLIC $<$:CUDA::cudart> # + TensorRT::TensorRT # + DO::Shakti::Cuda::Utilities # + DO::Sara::Darknet) +set_property(TARGET DO_Shakti_Cuda_TensorRT PROPERTY FOLDER "Libraries/Shakti") diff --git a/cpp/drafts/NeuralNetworks/TensorRT/DarknetParser.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.cpp similarity index 98% rename from cpp/drafts/NeuralNetworks/TensorRT/DarknetParser.cpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.cpp index 8979a59ab..c8805188f 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/DarknetParser.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.cpp @@ -9,12 +9,15 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include -#include -#include +#include +#include + +#include namespace DO::Sara::TensorRT { + using nvinfer1::IPluginV2; + static inline auto shape(const nvinfer1::ITensor& t) -> Eigen::Vector4i { @@ -293,7 +296,7 @@ namespace DO::Sara::TensorRT { // Create the YOLO plugin. const auto yolo_plugin = - std::unique_ptr{ + std::unique_ptr{ yolo_plugin_creator->createPlugin("", &fc), delete_plugin}; assert(yolo_plugin.get() != nullptr); diff --git a/cpp/drafts/NeuralNetworks/TensorRT/DarknetParser.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.hpp similarity index 93% rename from cpp/drafts/NeuralNetworks/TensorRT/DarknetParser.hpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.hpp index 07f588e0b..f9733bba7 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/DarknetParser.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.hpp @@ -11,11 +11,11 @@ #pragma once -#include -#include -#include +#include +#include #include +#include #include @@ -59,7 +59,8 @@ namespace DO::Sara::TensorRT { std::vector& fmaps) const -> void; auto add_maxpool_layer(const int layer_idx, - std::vector& fmaps) const -> void; + std::vector& fmaps) const + -> void; auto add_upsample_layer(const int layer_idx, std::vector& fmaps) const diff --git a/cpp/drafts/NeuralNetworks/TensorRT/Helpers.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/Helpers.hpp similarity index 100% rename from cpp/drafts/NeuralNetworks/TensorRT/Helpers.hpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/Helpers.hpp diff --git a/cpp/drafts/NeuralNetworks/TensorRT/IO.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp similarity index 96% rename from cpp/drafts/NeuralNetworks/TensorRT/IO.cpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp index aa2f9e33d..0aecedcd7 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/IO.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp @@ -9,7 +9,7 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include +#include namespace DO::Sara::TensorRT { diff --git a/cpp/drafts/NeuralNetworks/TensorRT/IO.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp similarity index 96% rename from cpp/drafts/NeuralNetworks/TensorRT/IO.hpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp index 69ea3b9a7..9e2369cda 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/IO.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp @@ -11,7 +11,7 @@ #pragma once -#include +#include #include #include diff --git a/cpp/drafts/NeuralNetworks/TensorRT/InferenceExecutor.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.cpp similarity index 97% rename from cpp/drafts/NeuralNetworks/TensorRT/InferenceExecutor.cpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.cpp index d11c4cb3a..0d07f7e40 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/InferenceExecutor.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.cpp @@ -9,7 +9,7 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include +#include namespace DO::Sara::TensorRT { diff --git a/cpp/drafts/NeuralNetworks/TensorRT/InferenceExecutor.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.hpp similarity index 96% rename from cpp/drafts/NeuralNetworks/TensorRT/InferenceExecutor.hpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.hpp index 2d9582a64..8958cef87 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/InferenceExecutor.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.hpp @@ -17,11 +17,10 @@ #include -#include - #include #include +#include namespace DO::Sara::TensorRT { @@ -44,7 +43,7 @@ namespace DO::Sara::TensorRT { std::array, 2>& out, // const bool synchronize = true) const -> void; - // private: + // private: CudaStreamUniquePtr _cuda_stream = make_cuda_stream(); RuntimeUniquePtr _runtime = {nullptr, &runtime_deleter}; CudaEngineUniquePtr _engine = {nullptr, &engine_deleter}; diff --git a/cpp/drafts/NeuralNetworks/TensorRT/Yolo.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp similarity index 98% rename from cpp/drafts/NeuralNetworks/TensorRT/Yolo.cpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp index 9906375aa..4dcd88d1d 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/Yolo.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp @@ -9,9 +9,9 @@ // you can obtain one at http://mozilla.org/MPL/2.0/. // ========================================================================== // -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/drafts/NeuralNetworks/TensorRT/Yolo.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.hpp similarity index 100% rename from cpp/drafts/NeuralNetworks/TensorRT/Yolo.hpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.hpp diff --git a/cpp/drafts/NeuralNetworks/TensorRT/YoloImpl.cu b/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu similarity index 98% rename from cpp/drafts/NeuralNetworks/TensorRT/YoloImpl.cu rename to cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu index b2efea219..0b8bd1b45 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/YoloImpl.cu +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu @@ -10,7 +10,8 @@ // ========================================================================== // #include -#include + +#include namespace DO::Sara::TensorRT { diff --git a/cpp/drafts/NeuralNetworks/TensorRT/YoloImpl.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.hpp similarity index 100% rename from cpp/drafts/NeuralNetworks/TensorRT/YoloImpl.hpp rename to cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.hpp diff --git a/cpp/test/Sara/CMakeLists.txt b/cpp/test/Sara/CMakeLists.txt index d9e46edcf..c438612de 100644 --- a/cpp/test/Sara/CMakeLists.txt +++ b/cpp/test/Sara/CMakeLists.txt @@ -33,3 +33,5 @@ add_subdirectory(KalmanFilter) add_subdirectory(MultipleObjectTracking) add_subdirectory(Visualization) + +add_subdirectory(NeuralNetworks) diff --git a/cpp/drafts/NeuralNetworks/Darknet/test/CMakeLists.txt b/cpp/test/Sara/NeuralNetworks/CMakeLists.txt similarity index 100% rename from cpp/drafts/NeuralNetworks/Darknet/test/CMakeLists.txt rename to cpp/test/Sara/NeuralNetworks/CMakeLists.txt diff --git a/cpp/drafts/NeuralNetworks/Darknet/test/test_neuralnetworks_config_parsing.cpp b/cpp/test/Sara/NeuralNetworks/test_neuralnetworks_yolo_v4_config_parsing.cpp similarity index 75% rename from cpp/drafts/NeuralNetworks/Darknet/test/test_neuralnetworks_config_parsing.cpp rename to cpp/test/Sara/NeuralNetworks/test_neuralnetworks_yolo_v4_config_parsing.cpp index 2bc8a3183..a7b2468d8 100644 --- a/cpp/drafts/NeuralNetworks/Darknet/test/test_neuralnetworks_config_parsing.cpp +++ b/cpp/test/Sara/NeuralNetworks/test_neuralnetworks_yolo_v4_config_parsing.cpp @@ -13,14 +13,15 @@ #include -#include -#include +#include +#include -#include #include +#include -namespace fs = boost::filesystem; + +namespace fs = std::filesystem; namespace sara = DO::Sara; @@ -28,17 +29,15 @@ BOOST_AUTO_TEST_SUITE(TestLayers) BOOST_AUTO_TEST_CASE(test_yolov4_tiny_config_parsing) { - namespace fs = boost::filesystem; - - const auto data_dir_path = - fs::canonical(fs::path{src_path("data")}); + const auto data_dir_path = fs::canonical(fs::path{src_path("data")}); const auto cfg_filepath = data_dir_path / "trained_models" / "yolov4-tiny.cfg"; const auto weights_filepath = data_dir_path / "trained_models" / "yolov4-tiny.weights"; BOOST_CHECK(fs::exists(cfg_filepath)); - auto net = sara::Darknet::NetworkParser{}.parse_config_file(cfg_filepath.string()); + auto net = + sara::Darknet::NetworkParser{}.parse_config_file(cfg_filepath.string()); if (fs::exists(weights_filepath)) sara::Darknet::NetworkWeightLoader{weights_filepath.string()}.load(net); } diff --git a/cpp/test/Shakti/Cuda/CMakeLists.txt b/cpp/test/Shakti/Cuda/CMakeLists.txt index 8c5eac01c..2116465f2 100644 --- a/cpp/test/Shakti/Cuda/CMakeLists.txt +++ b/cpp/test/Shakti/Cuda/CMakeLists.txt @@ -10,3 +10,5 @@ add_subdirectory(MultiArray) # TODO: update code for CUDA 12. add_subdirectory(ImageProcessing) add_subdirectory(Segmentation) add_subdirectory(FeatureDetectors) + +add_subdirectory(TensorRT) diff --git a/cpp/drafts/NeuralNetworks/TensorRT/test/CMakeLists.txt b/cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt similarity index 74% rename from cpp/drafts/NeuralNetworks/TensorRT/test/CMakeLists.txt rename to cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt index 446c9f495..7931f2e5f 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/test/CMakeLists.txt +++ b/cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt @@ -13,14 +13,18 @@ foreach(file ${test_tensorrt_SOURCE_FILES}) target_link_libraries( ${filename} PRIVATE ${Boost_LIBRARIES} # - DO::Sara::ImageIO DO::Sara::ImageProcessing DO::Sara::TensorRT - DO::Shakti::Cuda::MultiArray DO::Shakti::Cuda::Utilities) + DO::Sara::ImageIO # + DO::Sara::ImageProcessing # + DO::Shakti::Cuda::MultiArray # + DO::Shakti::Cuda::Utilities # + DO::Shakti::Cuda::TensorRT) set_target_properties( ${filename} PROPERTIES COMPILE_FLAGS ${SARA_DEFINITIONS} RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") - set_property(TARGET ${filename} PROPERTY FOLDER "Tests/Shakti/NeuralNetworks") + set_property(TARGET ${filename} # + PROPERTY FOLDER "Tests/Shakti/CUDA/NeuralNetworks") add_test(NAME ${filename} COMMAND $) endforeach() diff --git a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt.cpp similarity index 92% rename from cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt.cpp rename to cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt.cpp index 1e048f25b..716d70c56 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt.cpp @@ -1,3 +1,14 @@ +// ========================================================================== // +// This file is part of DO-CV, a basic set of libraries in C++ for computer +// vision. +// +// Copyright (C) 2023 David Ok +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License v. 2.0. If a copy of the MPL was not distributed with this file, +// you can obtain one at http://mozilla.org/MPL/2.0/. +// ========================================================================== // + #define BOOST_TEST_MODULE "NeuralNetworks/TensorRT/Basic Operations" #include @@ -7,11 +18,10 @@ #include #include +#include +#include #include -#include -#include - #include #include @@ -255,7 +265,9 @@ BOOST_AUTO_TEST_CASE(test_convolution_2d_operation) }; // Enqueue the CPU pinned <-> GPU tranfers and the convolution task. - if (!context->enqueueV2(device_buffers.data(), *cuda_stream, nullptr)) + auto placeholder = + context->enqueueV2(device_buffers.data(), *cuda_stream, nullptr); + if (!placeholder) { SARA_DEBUG << termcolor::red << "Execution failed!" << termcolor::reset << std::endl; diff --git a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_darknet_parser.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_darknet_parser.cpp similarity index 91% rename from cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_darknet_parser.cpp rename to cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_darknet_parser.cpp index dd1b29dd5..a41f360b5 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_darknet_parser.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_darknet_parser.cpp @@ -1,3 +1,14 @@ +// ========================================================================== // +// This file is part of DO-CV, a basic set of libraries in C++ for computer +// vision. +// +// Copyright (C) 2023 David Ok +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License v. 2.0. If a copy of the MPL was not distributed with this file, +// you can obtain one at http://mozilla.org/MPL/2.0/. +// ========================================================================== // + #define BOOST_TEST_MODULE "NeuralNetworks/TensorRT/Yolo-V4-Tiny" #include @@ -6,18 +17,16 @@ #include #include #include +#include #include +#include +#include +#include #include -#include -#include -#include -#include - - -namespace fs = boost::filesystem; +namespace fs = std::filesystem; namespace sara = DO::Sara; namespace shakti = DO::Shakti; namespace d = sara::Darknet; diff --git a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_inference_executor.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_inference_executor.cpp similarity index 73% rename from cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_inference_executor.cpp rename to cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_inference_executor.cpp index 73256406e..87cda1f31 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_inference_executor.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_inference_executor.cpp @@ -1,21 +1,31 @@ +// ========================================================================== // +// This file is part of DO-CV, a basic set of libraries in C++ for computer +// vision. +// +// Copyright (C) 2023 David Ok +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License v. 2.0. If a copy of the MPL was not distributed with this file, +// you can obtain one at http://mozilla.org/MPL/2.0/. +// ========================================================================== // + #define BOOST_TEST_MODULE "NeuralNetworks/TensorRT/InferenceExecutor" #include #include #include +#include #include - -#include -#include -#include -#include +#include +#include +#include #include #include -namespace fs = boost::filesystem; +namespace fs = std::filesystem; namespace sara = DO::Sara; namespace shakti = DO::Shakti; namespace d = sara::Darknet; diff --git a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_onnx_parser.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_onnx_parser.cpp similarity index 89% rename from cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_onnx_parser.cpp rename to cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_onnx_parser.cpp index 2b2c727ae..1cef5f0d2 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_onnx_parser.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_onnx_parser.cpp @@ -1,3 +1,14 @@ +// ========================================================================== // +// This file is part of DO-CV, a basic set of libraries in C++ for computer +// vision. +// +// Copyright (C) 2023 David Ok +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License v. 2.0. If a copy of the MPL was not distributed with this file, +// you can obtain one at http://mozilla.org/MPL/2.0/. +// ========================================================================== // + #define BOOST_TEST_MODULE "NeuralNetworks/TensorRT/YoloX-Tiny-ONNX-Conversion" #include @@ -8,11 +19,10 @@ #include #include +#include +#include #include -#include -#include - #include diff --git a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_yolo_plugin.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_yolo_plugin.cpp similarity index 83% rename from cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_yolo_plugin.cpp rename to cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_yolo_plugin.cpp index c4ce9ae32..bf5b30307 100644 --- a/cpp/drafts/NeuralNetworks/TensorRT/test/test_neuralnetworks_tensorrt_yolo_plugin.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_yolo_plugin.cpp @@ -1,17 +1,27 @@ +// ========================================================================== // +// This file is part of DO-CV, a basic set of libraries in C++ for computer +// vision. +// +// Copyright (C) 2023 David Ok +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License v. 2.0. If a copy of the MPL was not distributed with this file, +// you can obtain one at http://mozilla.org/MPL/2.0/. +// ========================================================================== // + #define BOOST_TEST_MODULE "NeuralNetworks/TensorRT" #if (defined(_WIN32) || defined(_WIN32_WCE)) && !defined(NOMINMAX) # define NOMINMAX #endif -#include -#include +#include +#include +#include #include #include -#include - #include @@ -78,9 +88,8 @@ BOOST_AUTO_TEST_CASE(test_that_the_yolo_plugin_is_automatically_registered) fc.nbFields = static_cast(fields.size()); const auto yolo_plugin = yolo_plugin_creator->createPlugin("yolo", &fc); - BOOST_CHECK_NE( - std::string(yolo_plugin->getPluginNamespace()).find(""), - std::string::npos); + BOOST_CHECK_NE(std::string(yolo_plugin->getPluginNamespace()).find(""), + std::string::npos); #if defined(TODO) // Resize the host tensor. From ccf8ab396042e1c8230b56459a01debfc8d874f8 Mon Sep 17 00:00:00 2001 From: David OK Date: Mon, 11 Dec 2023 15:03:06 +0000 Subject: [PATCH 3/4] MAINT: fix CMake script. --- cpp/examples/Shakti/TensorRT/CMakeLists.txt | 4 ++++ cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/cpp/examples/Shakti/TensorRT/CMakeLists.txt b/cpp/examples/Shakti/TensorRT/CMakeLists.txt index b59ba19b5..799c646c6 100644 --- a/cpp/examples/Shakti/TensorRT/CMakeLists.txt +++ b/cpp/examples/Shakti/TensorRT/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT CMAKE_CUDA_COMPILER OR NOT TensorRT_FOUND) + return() +endif() + file(GLOB TRT_SOURCE_FILES FILES *.cpp) foreach(file ${TRT_SOURCE_FILES}) diff --git a/cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt b/cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt index 7931f2e5f..de2ba8dbe 100644 --- a/cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt +++ b/cpp/test/Shakti/Cuda/TensorRT/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT CMAKE_CUDA_COMPILER OR NOT TensorRT_FOUND) + return() +endif() + file(GLOB test_tensorrt_SOURCE_FILES FILES test_*tensorrt*.cpp) foreach(file ${test_tensorrt_SOURCE_FILES}) From 7b039ce143bd9d69c44e46038859bf52b34b644b Mon Sep 17 00:00:00 2001 From: David OK Date: Mon, 11 Dec 2023 16:30:46 +0000 Subject: [PATCH 4/4] MAINT: change namespaces. --- .../TensorRT/tensorrt_yolov4_tiny_example.cpp | 2 +- .../DO/Shakti/Cuda/TensorRT/DarknetParser.cpp | 30 +- .../DO/Shakti/Cuda/TensorRT/DarknetParser.hpp | 11 +- cpp/src/DO/Shakti/Cuda/TensorRT/Helpers.hpp | 2 +- cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp | 4 +- cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp | 4 +- .../Cuda/TensorRT/InferenceExecutor.cpp | 101 ++-- .../Cuda/TensorRT/InferenceExecutor.hpp | 4 +- cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp | 529 +++++++++--------- cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.hpp | 4 +- cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu | 9 +- cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.hpp | 6 +- .../TensorRT/test_neuralnetworks_tensorrt.cpp | 8 +- ...neuralnetworks_tensorrt_darknet_parser.cpp | 2 +- ...alnetworks_tensorrt_inference_executor.cpp | 3 +- ...st_neuralnetworks_tensorrt_onnx_parser.cpp | 2 +- ...st_neuralnetworks_tensorrt_yolo_plugin.cpp | 2 +- 17 files changed, 361 insertions(+), 362 deletions(-) diff --git a/cpp/examples/Shakti/TensorRT/tensorrt_yolov4_tiny_example.cpp b/cpp/examples/Shakti/TensorRT/tensorrt_yolov4_tiny_example.cpp index d6a102460..e2e7aecdc 100644 --- a/cpp/examples/Shakti/TensorRT/tensorrt_yolov4_tiny_example.cpp +++ b/cpp/examples/Shakti/TensorRT/tensorrt_yolov4_tiny_example.cpp @@ -23,7 +23,7 @@ namespace sara = DO::Sara; namespace fs = std::filesystem; -namespace trt = sara::TensorRT; +namespace trt = DO::Shakti::TensorRT; namespace d = sara::Darknet; diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.cpp index c8805188f..16dbd5489 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.cpp @@ -15,10 +15,12 @@ #include -namespace DO::Sara::TensorRT { - using nvinfer1::IPluginV2; +namespace sara = DO::Sara; +namespace darknet = DO::Sara::Darknet; +namespace DO::Shakti::TensorRT { + static inline auto shape(const nvinfer1::ITensor& t) -> Eigen::Vector4i { const auto dims = t.getDimensions(); @@ -36,7 +38,7 @@ namespace DO::Sara::TensorRT { } auto YoloV4TinyConverter::conv2d(nvinfer1::ITensor* x, // - const TensorView_& w, + const sara::TensorView_& w, const Eigen::VectorXf& b, const int stride, const std::string& activation_layer, const std::optional& name) const @@ -108,7 +110,7 @@ namespace DO::Sara::TensorRT { SARA_DEBUG << "Converting convolutional layer " << layer_idx << " to TRT" << std::endl; const auto& conv_layer = - dynamic_cast(*hnet[layer_idx]); + dynamic_cast(*hnet[layer_idx]); std::cout << conv_layer << std::endl; // It's always the last one in Darknet cfg file. @@ -127,7 +129,7 @@ namespace DO::Sara::TensorRT { const int layer_idx, std::vector& fmaps) const -> void { const auto& route_layer = - dynamic_cast(*hnet[layer_idx]); + dynamic_cast(*hnet[layer_idx]); SARA_DEBUG << "convert route-slice layer " << layer_idx << "(" << route_layer.type << ")" << std::endl; std::cout << route_layer << std::endl; @@ -172,7 +174,7 @@ namespace DO::Sara::TensorRT { const int layer_idx, std::vector& fmaps) const -> void { const auto& route_layer = - dynamic_cast(*hnet[layer_idx]); + dynamic_cast(*hnet[layer_idx]); SARA_DEBUG << "convert route-concat layer " << layer_idx << "(" << route_layer.type << ")" << std::endl; std::cout << route_layer << std::endl; @@ -201,7 +203,7 @@ namespace DO::Sara::TensorRT { const int layer_idx, std::vector& fmaps) const -> void { const auto& maxpool_layer = - dynamic_cast(*hnet[layer_idx]); + dynamic_cast(*hnet[layer_idx]); SARA_DEBUG << "convert maxpool layer " << layer_idx << "(" << hnet[layer_idx]->type << ")" << std::endl; std::cout << maxpool_layer << std::endl; @@ -228,7 +230,7 @@ namespace DO::Sara::TensorRT { const int layer_idx, std::vector& fmaps) const -> void { const auto& upsample_layer = - dynamic_cast(*hnet[layer_idx]); + dynamic_cast(*hnet[layer_idx]); SARA_DEBUG << "convert layer " << layer_idx << "(" << upsample_layer.type << ")" << std::endl; std::cout << upsample_layer << std::endl; @@ -258,7 +260,7 @@ namespace DO::Sara::TensorRT { const int layer_idx, std::vector& fmaps) const -> void { const auto& yolo_layer = - dynamic_cast(*hnet[layer_idx]); + dynamic_cast(*hnet[layer_idx]); SARA_DEBUG << "convert yolo layer " << layer_idx << "(" << hnet[layer_idx]->type << ")" << std::endl; std::cout << yolo_layer << std::endl; @@ -296,7 +298,7 @@ namespace DO::Sara::TensorRT { // Create the YOLO plugin. const auto yolo_plugin = - std::unique_ptr{ + std::unique_ptr{ yolo_plugin_creator->createPlugin("", &fc), delete_plugin}; assert(yolo_plugin.get() != nullptr); @@ -316,7 +318,7 @@ namespace DO::Sara::TensorRT { SARA_DEBUG << "Creating the network from scratch!" << std::endl; // Define the input tensor. - const auto& input_layer = dynamic_cast(*hnet[0]); + const auto& input_layer = dynamic_cast(*hnet[0]); auto input_tensor = make_input_rgb_tensor(input_layer.width(), // input_layer.height()); @@ -337,7 +339,7 @@ namespace DO::Sara::TensorRT { else if (layer_type == "route") { const auto& route_layer = - dynamic_cast(*hnet[layer_idx]); + dynamic_cast(*hnet[layer_idx]); if (route_layer.layers.size() == 1) add_slice_layer(layer_idx, fmaps); @@ -369,7 +371,7 @@ namespace DO::Sara::TensorRT { const std::string& trained_model_dir) -> HostMemoryUniquePtr { // Load the CPU implementation. - auto hnet = Darknet::load_yolov4_tiny_model(trained_model_dir); + auto hnet = darknet::load_yolov4_tiny_model(trained_model_dir); // Create a TensorRT network. auto net_builder = make_builder(); @@ -384,4 +386,4 @@ namespace DO::Sara::TensorRT { return serialized_net; } -} // namespace DO::Sara::TensorRT +} // namespace DO::Shakti::TensorRT diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.hpp index f9733bba7..2f484554a 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/DarknetParser.hpp @@ -23,12 +23,12 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { struct YoloV4TinyConverter { using TrtNet = nvinfer1::INetworkDefinition; - using HostNet = std::vector>; + using HostNet = std::vector>; TrtNet* tnet; const HostNet& hnet; @@ -44,8 +44,9 @@ namespace DO::Sara::TensorRT { //! @brief zero-padding convolution. auto conv2d(nvinfer1::ITensor* x, // - const TensorView_& w, const Eigen::VectorXf& b, - const int stride, const std::string& activation_layer, + const DO::Sara::TensorView_& w, + const Eigen::VectorXf& b, const int stride, + const std::string& activation_layer, const std::optional& name = std::nullopt) const -> nvinfer1::ITensor*; @@ -77,4 +78,4 @@ namespace DO::Sara::TensorRT { auto convert_yolo_v4_tiny_network_from_darknet( const std::string& trained_model_dir) -> HostMemoryUniquePtr; -} // namespace DO::Sara::TensorRT +} // namespace DO::Shakti::TensorRT diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/Helpers.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/Helpers.hpp index 0678a4f33..273a2147b 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/Helpers.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/Helpers.hpp @@ -25,7 +25,7 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { //! @ingroup NeuralNetworks //! @defgroup TensorRT TensorRT helper functions. diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp index 0aecedcd7..dd28ff4c0 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.cpp @@ -12,7 +12,7 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { auto serialize_network_into_plan(const BuilderUniquePtr& network_builder, const NetworkUniquePtr& network, @@ -36,4 +36,4 @@ namespace DO::Sara::TensorRT { return plan; } -} // namespace DO::Sara::TensorRT +} // namespace DO::Shakti::TensorRT diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp index 9e2369cda..74884042d 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/IO.hpp @@ -17,7 +17,7 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { //! @brief Helper function for serializing TensorRT plugins. //! @@ -48,4 +48,4 @@ namespace DO::Sara::TensorRT { const bool use_fp16 = false) -> HostMemoryUniquePtr; -} // namespace DO::Sara::TensorRT +} // namespace DO::Shakti::TensorRT diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.cpp index 0d07f7e40..f6425e72b 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.cpp @@ -12,66 +12,65 @@ #include -namespace DO::Sara::TensorRT { +using namespace DO::Shakti::TensorRT; - InferenceExecutor::InferenceExecutor( - const HostMemoryUniquePtr& serialized_network) - { - // Create a runtime. - _runtime = {nvinfer1::createInferRuntime(Logger::instance()), - &runtime_deleter}; - // Create or load an engine. - _engine = {_runtime->deserializeCudaEngine(serialized_network->data(), - serialized_network->size()), - &engine_deleter}; +InferenceExecutor::InferenceExecutor( + const HostMemoryUniquePtr& serialized_network) +{ + // Create a runtime. + _runtime = {nvinfer1::createInferRuntime(Logger::instance()), + &runtime_deleter}; - // Create an execution context. - _context = {_engine->createExecutionContext(), &context_deleter}; - } + // Create or load an engine. + _engine = {_runtime->deserializeCudaEngine(serialized_network->data(), + serialized_network->size()), + &engine_deleter}; - auto InferenceExecutor::operator()(const PinnedTensor& in, - PinnedTensor& out, - const bool synchronize) const -> void - { - const auto device_tensors = std::array{ - const_cast(reinterpret_cast(in.data())), // - reinterpret_cast(out.data()) // - }; + // Create an execution context. + _context = {_engine->createExecutionContext(), &context_deleter}; +} - // Enqueue the CPU pinned <-> GPU tranfers and the convolution task. - if (!_context->enqueueV2(device_tensors.data(), *_cuda_stream, nullptr)) - { - SARA_DEBUG << termcolor::red << "Execution failed!" << termcolor::reset - << std::endl; - } +auto InferenceExecutor::operator()(const PinnedTensor& in, + PinnedTensor& out, + const bool synchronize) const -> void +{ + const auto device_tensors = std::array{ + const_cast(reinterpret_cast(in.data())), // + reinterpret_cast(out.data()) // + }; - // Wait for the completion of GPU operations. - if (synchronize) - cudaStreamSynchronize(*_cuda_stream); + // Enqueue the CPU pinned <-> GPU tranfers and the convolution task. + if (!_context->enqueueV2(device_tensors.data(), *_cuda_stream, nullptr)) + { + SARA_DEBUG << termcolor::red << "Execution failed!" << termcolor::reset + << std::endl; } - auto InferenceExecutor::operator()( // - const PinnedTensor& in, - std::array, 2>& out, // - const bool synchronize) const -> void - { - const auto device_tensors = std::array{ - const_cast(reinterpret_cast(in.data())), // - reinterpret_cast(out[0].data()), // - reinterpret_cast(out[1].data()) // - }; + // Wait for the completion of GPU operations. + if (synchronize) + cudaStreamSynchronize(*_cuda_stream); +} - // Enqueue the CPU pinned <-> GPU tranfers and the convolution task. - if (!_context->enqueueV2(device_tensors.data(), *_cuda_stream, nullptr)) - { - SARA_DEBUG << termcolor::red << "Execution failed!" << termcolor::reset - << std::endl; - } +auto InferenceExecutor::operator()( // + const PinnedTensor& in, + std::array, 2>& out, // + const bool synchronize) const -> void +{ + const auto device_tensors = std::array{ + const_cast(reinterpret_cast(in.data())), // + reinterpret_cast(out[0].data()), // + reinterpret_cast(out[1].data()) // + }; - // Wait for the completion of GPU operations. - if (synchronize) - cudaStreamSynchronize(*_cuda_stream); + // Enqueue the CPU pinned <-> GPU tranfers and the convolution task. + if (!_context->enqueueV2(device_tensors.data(), *_cuda_stream, nullptr)) + { + SARA_DEBUG << termcolor::red << "Execution failed!" << termcolor::reset + << std::endl; } -} // namespace DO::Sara::TensorRT + // Wait for the completion of GPU operations. + if (synchronize) + cudaStreamSynchronize(*_cuda_stream); +} diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.hpp index 8958cef87..2b4d3559a 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/InferenceExecutor.hpp @@ -23,13 +23,13 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { class DO_SARA_EXPORT InferenceExecutor { public: template - using PinnedTensor = Tensor_; + using PinnedTensor = Sara::Tensor_; InferenceExecutor() = default; diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp b/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp index 4dcd88d1d..0cba6b830 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.cpp @@ -19,289 +19,286 @@ #include -namespace DO::Sara::TensorRT { - - auto YoloPlugin::getOutputDataType( - [[maybe_unused]] const std::int32_t output_index, - const nvinfer1::DataType* input_types, - [[maybe_unused]] const std::int32_t num_inputs) const noexcept - -> nvinfer1::DataType - { - return input_types[0]; - } - - auto YoloPlugin::isOutputBroadcastAcrossBatch( - [[maybe_unused]] const std::int32_t output_index, - [[maybe_unused]] const bool* input_is_broadcasted, - [[maybe_unused]] const std::int32_t num_inputs) const noexcept -> bool - { - return false; - } - - auto YoloPlugin::canBroadcastInputAcrossBatch( - [[maybe_unused]] const std::int32_t input_index) const noexcept -> bool - { - return false; - } - - auto YoloPlugin::clone() const noexcept -> nvinfer1::IPluginV2Ext* - { - try - { - auto plugin = new YoloPlugin{_num_boxes_per_grid_cell, _num_classes, // - _h, _w, // - _scale_x_y}; - plugin->setPluginNamespace(_namespace.c_str()); - return plugin; - } - catch (const std::exception& e) - { - SARA_DEBUG << "EXCEPTION" << e.what() << std::endl; - } - - return nullptr; - } - - auto YoloPlugin::getPluginType() const noexcept -> const nvinfer1::AsciiChar* - { - return name; - } - - auto YoloPlugin::getPluginVersion() const noexcept - -> const nvinfer1::AsciiChar* - { - return version; - } - - auto YoloPlugin::getNbOutputs() const noexcept -> std::int32_t - { - return 1; - } - - auto YoloPlugin::getOutputDimensions( - [[maybe_unused]] const std::int32_t index, - [[maybe_unused]] const nvinfer1::Dims* inputs, - [[maybe_unused]] const std::int32_t nb_input_dims) noexcept - -> nvinfer1::Dims +using namespace DO::Shakti::TensorRT; + + +auto YoloPlugin::getOutputDataType( + [[maybe_unused]] const std::int32_t output_index, + const nvinfer1::DataType* input_types, + [[maybe_unused]] const std::int32_t num_inputs) const noexcept + -> nvinfer1::DataType +{ + return input_types[0]; +} + +auto YoloPlugin::isOutputBroadcastAcrossBatch( + [[maybe_unused]] const std::int32_t output_index, + [[maybe_unused]] const bool* input_is_broadcasted, + [[maybe_unused]] const std::int32_t num_inputs) const noexcept -> bool +{ + return false; +} + +auto YoloPlugin::canBroadcastInputAcrossBatch( + [[maybe_unused]] const std::int32_t input_index) const noexcept -> bool +{ + return false; +} + +auto YoloPlugin::clone() const noexcept -> nvinfer1::IPluginV2Ext* +{ + try { - return inputs[0]; + auto plugin = new YoloPlugin{_num_boxes_per_grid_cell, _num_classes, // + _h, _w, // + _scale_x_y}; + plugin->setPluginNamespace(_namespace.c_str()); + return plugin; } - - auto YoloPlugin::initialize() noexcept -> std::int32_t + catch (const std::exception& e) { - return 0; + SARA_DEBUG << "EXCEPTION" << e.what() << std::endl; } - auto YoloPlugin::terminate() noexcept -> void + return nullptr; +} + +auto YoloPlugin::getPluginType() const noexcept -> const nvinfer1::AsciiChar* +{ + return name; +} + +auto YoloPlugin::getPluginVersion() const noexcept -> const nvinfer1::AsciiChar* +{ + return version; +} + +auto YoloPlugin::getNbOutputs() const noexcept -> std::int32_t +{ + return 1; +} + +auto YoloPlugin::getOutputDimensions( + [[maybe_unused]] const std::int32_t index, + [[maybe_unused]] const nvinfer1::Dims* inputs, + [[maybe_unused]] const std::int32_t nb_input_dims) noexcept + -> nvinfer1::Dims +{ + return inputs[0]; +} + +auto YoloPlugin::initialize() noexcept -> std::int32_t +{ + return 0; +} + +auto YoloPlugin::terminate() noexcept -> void +{ +} + +auto YoloPlugin::getWorkspaceSize( + const std::int32_t /* max_batch_size */) const noexcept -> std::size_t +{ + return 0; +} + +auto YoloPlugin::enqueue([[maybe_unused]] const std::int32_t batch_size, + void const* const* inputs, void* const* outputs, + [[maybe_unused]] void* workspace, + cudaStream_t stream) noexcept -> std::int32_t +{ + try { - } + const auto in = reinterpret_cast(inputs[0]); + const auto out = reinterpret_cast(outputs[0]); + yolo(in, out, _num_boxes_per_grid_cell, _h, _w, _num_classes, _scale_x_y, + stream); - auto YoloPlugin::getWorkspaceSize( - const std::int32_t /* max_batch_size */) const noexcept -> std::size_t - { return 0; } - - auto YoloPlugin::enqueue([[maybe_unused]] const std::int32_t batch_size, - void const* const* inputs, void* const* outputs, - [[maybe_unused]] void* workspace, - cudaStream_t stream) noexcept -> std::int32_t - { - try - { - const auto in = reinterpret_cast(inputs[0]); - const auto out = reinterpret_cast(outputs[0]); - yolo(in, out, _num_boxes_per_grid_cell, _h, _w, _num_classes, _scale_x_y, - stream); - - return 0; - } - catch (const std::exception& e) - { - SARA_DEBUG << e.what() << std::endl; - } - - return -1; - } - - auto YoloPlugin::getSerializationSize() const noexcept -> size_t - { - const auto yolo_parameter_byte_size = // - sizeof(_num_boxes_per_grid_cell) + // - sizeof(_num_classes) + // - sizeof(_h) + // - sizeof(_w) + // - sizeof(_scale_x_y); - return yolo_parameter_byte_size; - } - - auto YoloPlugin::serialize(void* buffer) const noexcept -> void - { - auto cbuf = reinterpret_cast(buffer); - write_to_buffer(cbuf, _num_boxes_per_grid_cell); - write_to_buffer(cbuf, _num_classes); - write_to_buffer(cbuf, _h); - write_to_buffer(cbuf, _w); - write_to_buffer(cbuf, _scale_x_y); - } - - auto YoloPlugin::destroy() noexcept -> void - { - delete this; - } - - auto YoloPlugin::setPluginNamespace( - const nvinfer1::AsciiChar* plugin_namespace) noexcept -> void - { - _namespace = plugin_namespace; - } - - auto YoloPlugin::getPluginNamespace() const noexcept - -> const nvinfer1::AsciiChar* - { - return _namespace.c_str(); - } - - //! TODO - auto YoloPlugin::configurePlugin( - [[maybe_unused]] const nvinfer1::PluginTensorDesc* inputs, - [[maybe_unused]] const std::int32_t num_inputs, - [[maybe_unused]] const nvinfer1::PluginTensorDesc* outputs, - [[maybe_unused]] const std::int32_t num_outputs) noexcept -> void - { - } - - auto YoloPlugin::supportsFormatCombination( - [[maybe_unused]] const std::int32_t pos, // - const nvinfer1::PluginTensorDesc* in_out, - [[maybe_unused]] const std::int32_t nb_inputs, - [[maybe_unused]] const std::int32_t nb_outputs) const noexcept -> bool - { - assert(nb_inputs == 1 || nb_outputs == 1 || pos == 0); - - return (in_out[0].type == nvinfer1::DataType::kHALF || - in_out[0].type == nvinfer1::DataType::kFLOAT) && - in_out[0].format == nvinfer1::PluginFormat::kLINEAR; - } - - - YoloPluginCreator::YoloPluginCreator() - { - _plugin_attributes.reserve(5u); - _plugin_attributes.emplace_back("num_boxes_per_grid_cell", nullptr, - nvinfer1::PluginFieldType::kINT32, 1); - _plugin_attributes.emplace_back("num_classes", nullptr, - nvinfer1::PluginFieldType::kINT32, 1); - _plugin_attributes.emplace_back("height", nullptr, - nvinfer1::PluginFieldType::kINT32, 1); - _plugin_attributes.emplace_back("width", nullptr, - nvinfer1::PluginFieldType::kINT32, 1); - _plugin_attributes.emplace_back("scale_x_y", nullptr, - nvinfer1::PluginFieldType::kFLOAT32, 1); - - _fc.fields = _plugin_attributes.data(); - _fc.nbFields = static_cast(_plugin_attributes.size()); - } - - auto YoloPluginCreator::getPluginName() const noexcept - -> const nvinfer1::AsciiChar* - { - return YoloPlugin::name; - } - - auto YoloPluginCreator::getPluginVersion() const noexcept - -> const nvinfer1::AsciiChar* + catch (const std::exception& e) { - return YoloPlugin::version; + SARA_DEBUG << e.what() << std::endl; } - auto YoloPluginCreator::getFieldNames() noexcept - -> const nvinfer1::PluginFieldCollection* + return -1; +} + +auto YoloPlugin::getSerializationSize() const noexcept -> size_t +{ + const auto yolo_parameter_byte_size = // + sizeof(_num_boxes_per_grid_cell) + // + sizeof(_num_classes) + // + sizeof(_h) + // + sizeof(_w) + // + sizeof(_scale_x_y); + return yolo_parameter_byte_size; +} + +auto YoloPlugin::serialize(void* buffer) const noexcept -> void +{ + auto cbuf = reinterpret_cast(buffer); + write_to_buffer(cbuf, _num_boxes_per_grid_cell); + write_to_buffer(cbuf, _num_classes); + write_to_buffer(cbuf, _h); + write_to_buffer(cbuf, _w); + write_to_buffer(cbuf, _scale_x_y); +} + +auto YoloPlugin::destroy() noexcept -> void +{ + delete this; +} + +auto YoloPlugin::setPluginNamespace( + const nvinfer1::AsciiChar* plugin_namespace) noexcept -> void +{ + _namespace = plugin_namespace; +} + +auto YoloPlugin::getPluginNamespace() const noexcept + -> const nvinfer1::AsciiChar* +{ + return _namespace.c_str(); +} + +//! TODO +auto YoloPlugin::configurePlugin( + [[maybe_unused]] const nvinfer1::PluginTensorDesc* inputs, + [[maybe_unused]] const std::int32_t num_inputs, + [[maybe_unused]] const nvinfer1::PluginTensorDesc* outputs, + [[maybe_unused]] const std::int32_t num_outputs) noexcept -> void +{ +} + +auto YoloPlugin::supportsFormatCombination( + [[maybe_unused]] const std::int32_t pos, // + const nvinfer1::PluginTensorDesc* in_out, + [[maybe_unused]] const std::int32_t nb_inputs, + [[maybe_unused]] const std::int32_t nb_outputs) const noexcept -> bool +{ + assert(nb_inputs == 1 || nb_outputs == 1 || pos == 0); + + return (in_out[0].type == nvinfer1::DataType::kHALF || + in_out[0].type == nvinfer1::DataType::kFLOAT) && + in_out[0].format == nvinfer1::PluginFormat::kLINEAR; +} + + +YoloPluginCreator::YoloPluginCreator() +{ + _plugin_attributes.reserve(5u); + _plugin_attributes.emplace_back("num_boxes_per_grid_cell", nullptr, + nvinfer1::PluginFieldType::kINT32, 1); + _plugin_attributes.emplace_back("num_classes", nullptr, + nvinfer1::PluginFieldType::kINT32, 1); + _plugin_attributes.emplace_back("height", nullptr, + nvinfer1::PluginFieldType::kINT32, 1); + _plugin_attributes.emplace_back("width", nullptr, + nvinfer1::PluginFieldType::kINT32, 1); + _plugin_attributes.emplace_back("scale_x_y", nullptr, + nvinfer1::PluginFieldType::kFLOAT32, 1); + + _fc.fields = _plugin_attributes.data(); + _fc.nbFields = static_cast(_plugin_attributes.size()); +} + +auto YoloPluginCreator::getPluginName() const noexcept + -> const nvinfer1::AsciiChar* +{ + return YoloPlugin::name; +} + +auto YoloPluginCreator::getPluginVersion() const noexcept + -> const nvinfer1::AsciiChar* +{ + return YoloPlugin::version; +} + +auto YoloPluginCreator::getFieldNames() noexcept + -> const nvinfer1::PluginFieldCollection* +{ + return &_fc; +} + +auto YoloPluginCreator::createPlugin( + const nvinfer1::AsciiChar* trt_namespace, + const nvinfer1::PluginFieldCollection* fc) noexcept -> nvinfer1::IPluginV2* +{ + // All the necessary parameters for the YOLO layer. + auto num_boxes_per_grid_cell = std::int32_t{}; + auto num_classes = std::int32_t{}; + auto h = std::int32_t{}; + auto w = std::int32_t{}; + auto scale_x_y = float{}; + + // Parse the plugin field collection. + const auto fields = fc->fields; + const auto num_fields = fc->nbFields; + for (auto i = 0; i < num_fields; ++i) { - return &_fc; + if (!std::strcmp(fields[i].name, "num_boxes_per_grid_cell")) + num_boxes_per_grid_cell = + *reinterpret_cast(fields[i].data); + if (!std::strcmp(fields[i].name, "num_classes")) + num_classes = *reinterpret_cast(fields[i].data); + if (!std::strcmp(fields[i].name, "height")) + h = *reinterpret_cast(fields[i].data); + if (!std::strcmp(fields[i].name, "width")) + w = *reinterpret_cast(fields[i].data); + if (!std::strcmp(fields[i].name, "scale_x_y")) + scale_x_y = *reinterpret_cast(fields[i].data); } - auto YoloPluginCreator::createPlugin( - const nvinfer1::AsciiChar* trt_namespace, - const nvinfer1::PluginFieldCollection* fc) noexcept - -> nvinfer1::IPluginV2* + auto plugin = new YoloPlugin{ + num_boxes_per_grid_cell, // + num_classes, // + h, w, // + scale_x_y // + }; + plugin->setPluginNamespace(trt_namespace); + return plugin; +} + +auto YoloPluginCreator::getPluginNamespace() const noexcept + -> const nvinfer1::AsciiChar* +{ + return _namespace.c_str(); +} + +auto YoloPluginCreator::setPluginNamespace( + [[maybe_unused]] const nvinfer1::AsciiChar* plugin_namespace) noexcept + -> void +{ + _namespace = plugin_namespace; +} + +auto YoloPluginCreator::deserializePlugin( + const nvinfer1::AsciiChar* plugin_namespace, // + const void* serial_data, + [[maybe_unused]] const size_t serial_length) noexcept + -> nvinfer1::IPluginV2* +{ + try { - // All the necessary parameters for the YOLO layer. - auto num_boxes_per_grid_cell = std::int32_t{}; - auto num_classes = std::int32_t{}; - auto h = std::int32_t{}; - auto w = std::int32_t{}; - auto scale_x_y = float{}; - - // Parse the plugin field collection. - const auto fields = fc->fields; - const auto num_fields = fc->nbFields; - for (auto i = 0; i < num_fields; ++i) - { - if (!std::strcmp(fields[i].name, "num_boxes_per_grid_cell")) - num_boxes_per_grid_cell = - *reinterpret_cast(fields[i].data); - if (!std::strcmp(fields[i].name, "num_classes")) - num_classes = *reinterpret_cast(fields[i].data); - if (!std::strcmp(fields[i].name, "height")) - h = *reinterpret_cast(fields[i].data); - if (!std::strcmp(fields[i].name, "width")) - w = *reinterpret_cast(fields[i].data); - if (!std::strcmp(fields[i].name, "scale_x_y")) - scale_x_y = *reinterpret_cast(fields[i].data); - } - - auto plugin = new YoloPlugin{ - num_boxes_per_grid_cell, // - num_classes, // - h, w, // - scale_x_y // - }; - plugin->setPluginNamespace(trt_namespace); + auto buffer_ptr = reinterpret_cast(serial_data); + const auto num_boxes_per_grid_cell = + read_from_buffer(buffer_ptr); + const auto num_classes = read_from_buffer(buffer_ptr); + const auto h = read_from_buffer(buffer_ptr); + const auto w = read_from_buffer(buffer_ptr); + const auto scale_x_y = read_from_buffer(buffer_ptr); + + auto plugin = new YoloPlugin{num_boxes_per_grid_cell, // + num_classes, // + h, w, // + scale_x_y}; + plugin->setPluginNamespace(plugin_namespace); return plugin; } - - auto YoloPluginCreator::getPluginNamespace() const noexcept - -> const nvinfer1::AsciiChar* - { - return _namespace.c_str(); - } - - auto YoloPluginCreator::setPluginNamespace( - [[maybe_unused]] const nvinfer1::AsciiChar* plugin_namespace) noexcept - -> void + catch (std::exception const& e) { - _namespace = plugin_namespace; + SARA_DEBUG << "EXCEPTION: " << e.what() << std::endl; } - - auto YoloPluginCreator::deserializePlugin( - const nvinfer1::AsciiChar* plugin_namespace, // - const void* serial_data, - [[maybe_unused]] const size_t serial_length) noexcept - -> nvinfer1::IPluginV2* - { - try - { - auto buffer_ptr = reinterpret_cast(serial_data); - const auto num_boxes_per_grid_cell = - read_from_buffer(buffer_ptr); - const auto num_classes = read_from_buffer(buffer_ptr); - const auto h = read_from_buffer(buffer_ptr); - const auto w = read_from_buffer(buffer_ptr); - const auto scale_x_y = read_from_buffer(buffer_ptr); - - auto plugin = new YoloPlugin{num_boxes_per_grid_cell, // - num_classes, // - h, w, // - scale_x_y}; - plugin->setPluginNamespace(plugin_namespace); - return plugin; - } - catch (std::exception const& e) - { - SARA_DEBUG << "EXCEPTION: " << e.what() << std::endl; - } - return nullptr; - } - -} // namespace DO::Sara::TensorRT + return nullptr; +} diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.hpp index 7f4e061ae..93d6f5666 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/Yolo.hpp @@ -18,7 +18,7 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { class YoloPlugin : public nvinfer1::IPluginV2IOExt { @@ -172,4 +172,4 @@ namespace DO::Sara::TensorRT { REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); -} // namespace DO::Sara::TensorRT +} // namespace DO::Shakti::TensorRT diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu b/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu index 0b8bd1b45..187f4715d 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.cu @@ -14,7 +14,7 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { __global__ void yolo_kernel(const float* conv, float* yolo, // const int num_boxes_per_grid_cell, // @@ -112,8 +112,8 @@ namespace DO::Sara::TensorRT { SARA_CHECK(total_num_boxes); #endif - // By design CUDA can have at most 1024 threads per block, so let us use this - // limit. + // By design CUDA can have at most 1024 threads per block, so let us use + // this limit. static constexpr auto max_threads_per_block = 1024; const auto num_blocks = total_num_boxes % 1024 == 0 ? total_num_boxes / max_threads_per_block @@ -126,4 +126,5 @@ namespace DO::Sara::TensorRT { conv, yolo, num_boxes_per_grid_cell, grid_height, grid_width, num_classes, scale_x_y); } -} // namespace DO::Sara::TensorRT + +} // namespace DO::Shakti::TensorRT diff --git a/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.hpp b/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.hpp index 47349c7eb..d2df9b9c1 100644 --- a/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.hpp +++ b/cpp/src/DO/Shakti/Cuda/TensorRT/YoloImpl.hpp @@ -14,9 +14,9 @@ #include -namespace DO::Sara::TensorRT { +namespace DO::Shakti::TensorRT { - void yolo(const float* conv, float* yolo, // + void yolo(const float* conv, float* yolo, // const int num_boxes_per_grid_cell, // const int grid_height, // const int grid_width, // @@ -24,4 +24,4 @@ namespace DO::Sara::TensorRT { const float scale_x_y, // cudaStream_t stream); -} // namespace DO::Sara::TensorRT +} // namespace DO::Shakti::TensorRT diff --git a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt.cpp index 716d70c56..f89e2a4a7 100644 --- a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt.cpp @@ -28,7 +28,7 @@ namespace sara = DO::Sara; namespace shakti = DO::Shakti; -namespace trt = sara::TensorRT; +namespace trt = shakti::TensorRT; template @@ -159,9 +159,9 @@ BOOST_AUTO_TEST_CASE(test_convolution_2d_operation) for (const auto& device : devices) std::cout << device << std::endl; - auto cuda_stream = sara::TensorRT::make_cuda_stream(); + auto cuda_stream = trt::make_cuda_stream(); - auto builder = sara::TensorRT::make_builder(); + auto builder = trt::make_builder(); // Create a simple convolution operation. static constexpr auto n = 1; @@ -179,7 +179,7 @@ BOOST_AUTO_TEST_CASE(test_convolution_2d_operation) // Instantiate a network and automatically manager its memory. - auto network = sara::TensorRT::make_network(builder.get()); + auto network = trt::make_network(builder.get()); { SARA_DEBUG << termcolor::green << "Creating the network from scratch!" << std::endl; diff --git a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_darknet_parser.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_darknet_parser.cpp index a41f360b5..3eab9ea54 100644 --- a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_darknet_parser.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_darknet_parser.cpp @@ -30,7 +30,7 @@ namespace fs = std::filesystem; namespace sara = DO::Sara; namespace shakti = DO::Shakti; namespace d = sara::Darknet; -namespace trt = sara::TensorRT; +namespace trt = shakti::TensorRT; template diff --git a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_inference_executor.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_inference_executor.cpp index 87cda1f31..facf37fc9 100644 --- a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_inference_executor.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_inference_executor.cpp @@ -28,8 +28,7 @@ namespace fs = std::filesystem; namespace sara = DO::Sara; namespace shakti = DO::Shakti; -namespace d = sara::Darknet; -namespace trt = sara::TensorRT; +namespace trt = shakti::TensorRT; BOOST_AUTO_TEST_SUITE(TestTensorRT) diff --git a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_onnx_parser.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_onnx_parser.cpp index 1cef5f0d2..4a4e3293f 100644 --- a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_onnx_parser.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_onnx_parser.cpp @@ -30,7 +30,7 @@ namespace fs = std::filesystem; namespace sara = DO::Sara; namespace shakti = DO::Shakti; -namespace trt = sara::TensorRT; +namespace trt = shakti::TensorRT; namespace nvonnx = nvonnxparser; diff --git a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_yolo_plugin.cpp b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_yolo_plugin.cpp index bf5b30307..042c4b9be 100644 --- a/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_yolo_plugin.cpp +++ b/cpp/test/Shakti/Cuda/TensorRT/test_neuralnetworks_tensorrt_yolo_plugin.cpp @@ -27,7 +27,7 @@ namespace sara = DO::Sara; namespace shakti = DO::Shakti; -namespace trt = sara::TensorRT; +namespace trt = shakti::TensorRT; template