Merge pull request #358 from oddkiva/enh-yolo-v4-cpu

ENH: add yolo-v4 converter (CPU version only)
oddkiva · Dec 17, 2023 · 62a7c71 · 62a7c71
2 parents 7828a47 + 101ab17
commit 62a7c71
Show file tree

Hide file tree

Showing 120 changed files with 3,761 additions and 728 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ latex/
 CMakeLists.txt.user
 
 # Python
+**/__pycache__
 *.pyc
 *.pyo
 *.coverage

diff --git a/cpp/examples/Kalpana/Qt/kalpana_hello_coordinate_systems.cpp b/cpp/examples/Kalpana/Qt/kalpana_hello_coordinate_systems.cpp
@@ -251,7 +251,7 @@ class Window : public QOpenGLWindow
     SARA_DEBUG << "Initialize texture data on GPU" << std::endl;
 
     // Texture 0.
-    const auto image0_path = src_path("../../../data/ksmall.jpg");
+    const auto image0_path = src_path("../../../../data/ksmall.jpg");
     const auto image0 = QImage{image0_path}.mirrored();
     m_texture0 = new QOpenGLTexture{image0};
     m_texture0->setMinificationFilter(QOpenGLTexture::LinearMipMapLinear);
@@ -261,7 +261,7 @@ class Window : public QOpenGLWindow
     m_program->setUniformValue("texture0", 0);
 
     // Texture 1.
-    const auto image1_path = src_path("../../../data/sunflowerField.jpg");
+    const auto image1_path = src_path("../../../../data/sunflowerField.jpg");
     const auto image1 = QImage{image1_path}.mirrored();
     m_texture1 = new QOpenGLTexture{image1};
     m_texture1->setMinificationFilter(QOpenGLTexture::LinearMipMapLinear);

diff --git a/cpp/examples/Sara/NeuralNetworks/check_yolo_network.cpp b/cpp/examples/Sara/NeuralNetworks/check_yolo_network.cpp
@@ -0,0 +1,118 @@
+// ========================================================================== //
+// This file is part of Sara, a basic set of libraries in C++ for computer
+// vision.
+//
+// Copyright (C) 2021-present David Ok <[email protected]>
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License v. 2.0. If a copy of the MPL was not distributed with this file,
+// you can obtain one at http://mozilla.org/MPL/2.0/.
+// ========================================================================== //
+
+#include <DO/Sara/Core.hpp>
+#include <DO/Sara/Core/TicToc.hpp>
+#include <DO/Sara/Graphics.hpp>
+#include <DO/Sara/ImageIO.hpp>
+#include <DO/Sara/ImageProcessing.hpp>
+#include <DO/Sara/NeuralNetworks/Darknet/Network.hpp>
+#include <DO/Sara/NeuralNetworks/Darknet/Parser.hpp>
+#include <DO/Sara/NeuralNetworks/Darknet/YoloUtilities.hpp>
+#include <DO/Sara/VideoIO.hpp>
+
+#include <filesystem>
+#include <iomanip>
+
+#ifdef _OPENMP
+#  include <omp.h>
+#endif
+
+#define COMPARE_WITH_DARKNET_OUTPUT
+#if defined(COMPARE_WITH_DARKNET_OUTPUT)
+#  include <DO/Sara/NeuralNetworks/Darknet/Debug.hpp>
+#endif
+
+
+namespace d = DO::Sara::Darknet;
+namespace fs = std::filesystem;
+namespace sara = DO::Sara;
+
+
+inline auto check_yolo_implementation(d::Network& model,
+                                      const std::string& output_dir)
+{
+  if (!fs::exists(output_dir))
+    throw std::runtime_error{"Ouput directory " + output_dir +
+                             "does not exist!"};
+
+  // Check the weights.
+  d::check_convolutional_weights(model, output_dir);
+
+  const auto x = d::read_tensor(                     //
+      (fs::path{output_dir} / "input.bin").string()  //
+  );
+  const auto xt = x.transpose({0, 2, 3, 1});
+
+  const auto image = sara::ImageView<sara::Rgb32f>{
+      reinterpret_cast<sara::Rgb32f*>(const_cast<float*>(xt.data())),
+      {xt.size(2), xt.size(1)}};
+  const auto& image_resized = image;
+
+  sara::create_window(3 * image.width(), image.height());
+  sara::display(image);
+
+  model.debug = true;
+
+  model.forward(x);
+
+  // Compare my layer outputs with Darknet's.
+  const auto gt = d::read_all_intermediate_outputs(output_dir);
+
+  const auto& net = model.net;
+  for (auto layer = 1u; layer < net.size(); ++layer)
+  {
+    std::cout << "CHECKING LAYER " << layer << ": " << net[layer]->type
+              << std::endl
+              << *net[layer] << std::endl;
+    d::check_against_ground_truth(gt[layer - 1], net[layer]->output,
+                                  image_resized.sizes(),
+                                  /* max_diff_thres */ 2e-4f,
+                                  /* show_errors */ true);
+  }
+
+  SARA_DEBUG << "EVERYTHING OK" << std::endl;
+  SARA_DEBUG << "EVERYTHING OK" << std::endl;
+  SARA_DEBUG << "EVERYTHING OK" << std::endl;
+  SARA_DEBUG << "EVERYTHING OK" << std::endl;
+  SARA_DEBUG << "EVERYTHING OK" << std::endl;
+  SARA_DEBUG << "EVERYTHING OK" << std::endl;
+}
+
+
+auto graphics_main(int, char**) -> int
+{
+  const auto data_dir_path = fs::canonical(fs::path{src_path("data")});
+
+  static constexpr auto yolo_version = 4;
+  static constexpr auto is_tiny = false;
+  const auto yolo_dirpath = data_dir_path / "trained_models" /
+                            ("yolov" + std::to_string(yolo_version));
+  auto model =
+      sara::Darknet::load_yolo_model(yolo_dirpath, yolo_version, is_tiny);
+
+  const auto yolo_intermediate_output_dir = "/home/david/GitHub/darknet/yolov4";
+  check_yolo_implementation(model, yolo_intermediate_output_dir);
+
+  return 0;
+}
+
+
+auto main(int argc, char** argv) -> int
+{
+#ifndef __APPLE__
+  Eigen::initParallel();
+#endif
+
+  DO::Sara::GraphicsApplication app(argc, argv);
+  app.register_user_main(graphics_main);
+  return app.exec();
+}
diff --git a/cpp/examples/Sara/NeuralNetworks/yolo_v4_example.cpp b/cpp/examples/Sara/NeuralNetworks/yolo_v4_example.cpp
@@ -37,38 +37,50 @@ namespace fs = std::filesystem;
 
 
 // The API.
-auto detect_objects(const sara::ImageView<sara::Rgb32f>& image,
+auto detect_objects(const sara::ImageView<sara::Rgb8>& image,
                     sara::Darknet::Network& model)
 {
   auto& net = model.net;
   const auto& input_layer =
       dynamic_cast<const sara::Darknet::Input&>(*net.front());
 
-  // Resize the image to the network input sizes.
-  // TODO: optimize later.
-  const auto image_resized =
-      sara::resize(image, {input_layer.width(), input_layer.height()});
-  const auto image_tensor =
-      sara::tensor_view(image_resized)
-          .reshape(Eigen::Vector4i{1, image_resized.height(),
-                                   image_resized.width(), 3})
-          .transpose({0, 3, 1, 2});
+  sara::tic();
+  const auto image_transposed = sara::tensor_view(image).transpose({2, 0, 1});
+  static_assert(std::is_same_v<decltype(image_transposed),
+                               const sara::Tensor_<std::uint8_t, 3>>);
+  sara::toc("Image transpose");
+
+  sara::tic();
+  auto rgb_tensor = image_transposed.cwise_transform(
+      [](const std::uint8_t& v) { return v / 255.f; });
+  sara::toc("Image channel conversion");
+
+  sara::tic();
+  auto rgb_tensor_resized = sara::Tensor_<float, 4>{
+      {1, 3, input_layer.height(), input_layer.width()}};
+  for (auto i = 0; i < 3; ++i)
+  {
+    const auto src = sara::image_view(rgb_tensor[i]);
+    auto dst = sara::image_view(rgb_tensor_resized[0][i]);
+    sara::resize_v2(src, dst);
+  }
+  sara::toc("Image resize");
 
   // Feed the input to the network.
-  // TODO: optimize this method to avoid recopying again or better, eliminate
-  // the input layer.
-  model.forward(image_tensor);
+  model.forward(rgb_tensor_resized);
 
   // Accumulate all the detection from each YOLO layer.
   auto detections = std::vector<d::YoloBox>{};
   for (const auto& layer : net)
   {
     if (const auto yolo = dynamic_cast<const sara::Darknet::Yolo*>(layer.get()))
     {
-      const auto dets = d::get_yolo_boxes(       //
-          yolo->output[0],                       //
-          yolo->anchors, yolo->mask,             //
-          image_resized.sizes(), image.sizes(),  //
+      std::cout << *yolo << std::endl;
+      const auto dets = d::get_yolo_boxes(                           //
+          yolo->output[0],                                           //
+          yolo->anchors, yolo->mask,                                 //
+          {rgb_tensor_resized.size(3), rgb_tensor_resized.size(2)},  //
+          image.sizes(),                                             //
           0.25f);
       detections.insert(detections.end(), dets.begin(), dets.end());
     }
@@ -88,15 +100,15 @@ auto test_on_image(int argc, char** argv) -> void
 #endif
 
   const auto data_dir_path = fs::canonical(fs::path{src_path("data")});
-  const auto yolov4_tiny_dirpath = data_dir_path / "trained_models";
+  const auto yolov4_tiny_dirpath =
+      data_dir_path / "trained_models" / "yolov7-tiny";
   const auto image =
-      argc < 2
-          ? sara::imread<sara::Rgb32f>((data_dir_path / "dog.jpg").string())
-          : sara::imread<sara::Rgb32f>(argv[1]);
+      argc < 2 ? sara::imread<sara::Rgb8>((data_dir_path / "dog.jpg").string())
+               : sara::imread<sara::Rgb8>(argv[1]);
   sara::create_window(image.sizes());
   sara::display(image);
 
-  auto model = sara::Darknet::load_yolov4_tiny_model(yolov4_tiny_dirpath);
+  auto model = sara::Darknet::load_yolo_model(yolov4_tiny_dirpath, 7, true);
 
   sara::display(image);
   const auto dets = detect_objects(image, model);
@@ -136,8 +148,14 @@ auto test_on_video(int argc, char** argv) -> void
   auto frame = video_stream.frame();
 
   const auto data_dir_path = fs::canonical(fs::path{src_path("data")});
-  const auto yolov4_tiny_dirpath = data_dir_path / "trained_models";
-  auto model = sara::Darknet::load_yolov4_tiny_model(yolov4_tiny_dirpath);
+  const auto yolo_version = 4;
+  const auto is_tiny = false;
+  auto yolo_name = "yolov" + std::to_string(yolo_version);
+  if (is_tiny)
+    yolo_name += "-tiny";
+  const auto yolo_dirpath = data_dir_path / "trained_models" / yolo_name;
+  auto model = d::load_yolo_model(yolo_dirpath, yolo_version, is_tiny);
+
   model.profile = false;
 
   sara::create_window(frame.sizes());
@@ -159,11 +177,7 @@ auto test_on_video(int argc, char** argv) -> void
       continue;
 
     sara::tic();
-    const auto frame32f = video_stream.frame().convert<sara::Rgb32f>();
-    sara::toc("Color conversion");
-
-    sara::tic();
-    auto dets = detect_objects(frame32f, model);
+    auto dets = detect_objects(video_stream.frame(), model);
     sara::toc("Yolo");
 
     sara::display(frame);

diff --git a/cpp/examples/Shakti/TensorRT/CMakeLists.txt b/cpp/examples/Shakti/TensorRT/CMakeLists.txt
@@ -2,7 +2,7 @@ if(NOT CMAKE_CUDA_COMPILER OR NOT TensorRT_FOUND)
   return()
 endif()
 
-file(GLOB TRT_SOURCE_FILES FILES *.cpp)
+file(GLOB TRT_SOURCE_FILES FILES *.cu)
 
 foreach(file ${TRT_SOURCE_FILES})
   get_filename_component(filename ${file} NAME_WE)