diff --git a/perception/image_projection_based_fusion/CMakeLists.txt b/perception/image_projection_based_fusion/CMakeLists.txt index 0f4cb7112f74a..b6dfbaf402cc8 100644 --- a/perception/image_projection_based_fusion/CMakeLists.txt +++ b/perception/image_projection_based_fusion/CMakeLists.txt @@ -127,6 +127,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) ament_auto_add_library(pointpainting_lib SHARED src/pointpainting_fusion/node.cpp + src/pointpainting_fusion/pointcloud_densification.cpp src/pointpainting_fusion/pointpainting_trt.cpp src/pointpainting_fusion/voxel_generator.cpp ) diff --git a/perception/image_projection_based_fusion/config/pointpainting.param.yaml b/perception/image_projection_based_fusion/config/pointpainting.param.yaml index 30136bc8f47d0..9227e4fa9319a 100644 --- a/perception/image_projection_based_fusion/config/pointpainting.param.yaml +++ b/perception/image_projection_based_fusion/config/pointpainting.param.yaml @@ -5,6 +5,7 @@ head_onnx_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).onnx" head_engine_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).engine" trt_precision: fp16 + cloud_capacity: 2000000 post_process_params: # post-process params circle_nms_dist_threshold: 0.3 diff --git a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/pointcloud_densification.hpp b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/pointcloud_densification.hpp new file mode 100644 index 0000000000000..03609eb18e689 --- /dev/null +++ b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/pointcloud_densification.hpp @@ -0,0 +1,72 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef IMAGE_PROJECTION_BASED_FUSION__POINTPAINTING_FUSION__POINTCLOUD_DENSIFICATION_HPP_ +#define IMAGE_PROJECTION_BASED_FUSION__POINTPAINTING_FUSION__POINTCLOUD_DENSIFICATION_HPP_ + +#include +#include +#ifdef ROS_DISTRO_GALACTIC +#include +#else +#include +#endif + +#include + +#include +#include +#include + +namespace image_projection_based_fusion +{ +struct PointCloudWithTransform +{ + sensor_msgs::msg::PointCloud2 pointcloud_msg; + Eigen::Affine3f affine_past2world; +}; + +class PointCloudDensification +{ +public: + explicit PointCloudDensification(const centerpoint::DensificationParam & param); + + bool enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer); + + double getCurrentTimestamp() const { return current_timestamp_; } + Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; } + std::list::iterator getPointCloudCacheIter() + { + return pointcloud_cache_.begin(); + } + bool isCacheEnd(std::list::iterator iter) + { + return iter == pointcloud_cache_.end(); + } + unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); } + +private: + void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine); + void dequeue(); + + centerpoint::DensificationParam param_; + double current_timestamp_{0.0}; + Eigen::Affine3f affine_world2current_; + std::list pointcloud_cache_; +}; + +} // namespace image_projection_based_fusion + +#endif // IMAGE_PROJECTION_BASED_FUSION__POINTPAINTING_FUSION__POINTCLOUD_DENSIFICATION_HPP_ diff --git a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/pointpainting_trt.hpp b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/pointpainting_trt.hpp index d5892de6d7edb..c8318f79bbeed 100644 --- a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/pointpainting_trt.hpp +++ b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/pointpainting_trt.hpp @@ -23,7 +23,6 @@ namespace image_projection_based_fusion { -static constexpr size_t CAPACITY_POINT = 2000000; class PointPaintingTRT : public centerpoint::CenterPointTRT { public: diff --git a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/voxel_generator.hpp b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/voxel_generator.hpp old mode 100755 new mode 100644 index d0f44b9d58706..4cdca8e49ac7e --- a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/voxel_generator.hpp +++ b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/voxel_generator.hpp @@ -15,19 +15,35 @@ #ifndef IMAGE_PROJECTION_BASED_FUSION__POINTPAINTING_FUSION__VOXEL_GENERATOR_HPP_ #define IMAGE_PROJECTION_BASED_FUSION__POINTPAINTING_FUSION__VOXEL_GENERATOR_HPP_ +#include +#include #include #include +#include #include namespace image_projection_based_fusion { -class VoxelGenerator : public centerpoint::VoxelGenerator + +class VoxelGenerator { public: - using centerpoint::VoxelGenerator::VoxelGenerator; + explicit VoxelGenerator( + const centerpoint::DensificationParam & param, const centerpoint::CenterPointConfig & config); + + bool enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer); + + std::size_t generateSweepPoints(std::vector & points); + +protected: + std::unique_ptr pd_ptr_{nullptr}; - std::size_t generateSweepPoints(std::vector & points) override; + centerpoint::CenterPointConfig config_; + std::array range_; + std::array grid_size_; + std::array recip_voxel_size_; }; } // namespace image_projection_based_fusion diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp b/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp index 3fb2aca2171f7..4df660e65a8c1 100644 --- a/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp +++ b/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp @@ -110,6 +110,7 @@ PointPaintingFusionNode::PointPaintingFusionNode(const rclcpp::NodeOptions & opt this->declare_parameter("densification_params.num_past_frames"); // network param const std::string trt_precision = this->declare_parameter("trt_precision"); + const std::size_t cloud_capacity = this->declare_parameter("cloud_capacity"); const std::string encoder_onnx_path = this->declare_parameter("encoder_onnx_path"); const std::string encoder_engine_path = this->declare_parameter("encoder_engine_path"); @@ -188,9 +189,9 @@ PointPaintingFusionNode::PointPaintingFusionNode(const rclcpp::NodeOptions & opt centerpoint::DensificationParam densification_param( densification_world_frame_id, densification_num_past_frames); centerpoint::CenterPointConfig config( - class_names_.size(), point_feature_size, max_voxel_size, pointcloud_range, voxel_size, - downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold, - yaw_norm_thresholds, has_variance_); + class_names_.size(), point_feature_size, cloud_capacity, max_voxel_size, pointcloud_range, + voxel_size, downsample_factor, encoder_in_feature_size, score_threshold, + circle_nms_dist_threshold, yaw_norm_thresholds, has_variance_); // create detector detector_ptr_ = std::make_unique( diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/pointcloud_densification.cpp b/perception/image_projection_based_fusion/src/pointpainting_fusion/pointcloud_densification.cpp new file mode 100644 index 0000000000000..9edf6a73cd59b --- /dev/null +++ b/perception/image_projection_based_fusion/src/pointpainting_fusion/pointcloud_densification.cpp @@ -0,0 +1,103 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "image_projection_based_fusion/pointpainting_fusion/pointcloud_densification.hpp" + +#include + +#include + +#include +#ifdef ROS_DISTRO_GALACTIC +#include +#else +#include +#endif + +#include +#include + +namespace +{ +boost::optional getTransform( + const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id, + const std::string & source_frame_id, const rclcpp::Time & time) +{ + try { + geometry_msgs::msg::TransformStamped transform_stamped; + transform_stamped = tf_buffer.lookupTransform( + target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5)); + return transform_stamped.transform; + } catch (tf2::TransformException & ex) { + RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), ex.what()); + return boost::none; + } +} + +Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t) +{ + Eigen::Affine3f a; + a.matrix() = tf2::transformToEigen(t).matrix().cast(); + return a; +} + +} // namespace + +namespace image_projection_based_fusion +{ +PointCloudDensification::PointCloudDensification(const centerpoint::DensificationParam & param) +: param_(param) +{ +} + +bool PointCloudDensification::enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer) +{ + const auto header = pointcloud_msg.header; + + if (param_.pointcloud_cache_size() > 1) { + auto transform_world2current = + getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp); + if (!transform_world2current) { + return false; + } + auto affine_world2current = transformToEigen(transform_world2current.get()); + + enqueue(pointcloud_msg, affine_world2current); + } else { + enqueue(pointcloud_msg, Eigen::Affine3f::Identity()); + } + + dequeue(); + + return true; +} + +void PointCloudDensification::enqueue( + const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current) +{ + affine_world2current_ = affine_world2current; + current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds(); + PointCloudWithTransform pointcloud = {msg, affine_world2current.inverse()}; + pointcloud_cache_.push_front(pointcloud); +} + +void PointCloudDensification::dequeue() +{ + if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) { + pointcloud_cache_.pop_back(); + } +} + +} // namespace image_projection_based_fusion diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/voxel_generator.cpp b/perception/image_projection_based_fusion/src/pointpainting_fusion/voxel_generator.cpp index ea10cb1237436..cb3fc33d3e022 100755 --- a/perception/image_projection_based_fusion/src/pointpainting_fusion/voxel_generator.cpp +++ b/perception/image_projection_based_fusion/src/pointpainting_fusion/voxel_generator.cpp @@ -18,6 +18,32 @@ namespace image_projection_based_fusion { + +VoxelGenerator::VoxelGenerator( + const centerpoint::DensificationParam & param, const centerpoint::CenterPointConfig & config) +: config_(config) +{ + pd_ptr_ = std::make_unique(param); + range_[0] = config.range_min_x_; + range_[1] = config.range_min_y_; + range_[2] = config.range_min_z_; + range_[3] = config.range_max_x_; + range_[4] = config.range_max_y_; + range_[5] = config.range_max_z_; + grid_size_[0] = config.grid_size_x_; + grid_size_[1] = config.grid_size_y_; + grid_size_[2] = config.grid_size_z_; + recip_voxel_size_[0] = 1 / config.voxel_size_x_; + recip_voxel_size_[1] = 1 / config.voxel_size_y_; + recip_voxel_size_[2] = 1 / config.voxel_size_z_; +} + +bool VoxelGenerator::enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer) +{ + return pd_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer); +} + size_t VoxelGenerator::generateSweepPoints(std::vector & points) { Eigen::Vector3f point_current, point_past; diff --git a/perception/lidar_centerpoint/README.md b/perception/lidar_centerpoint/README.md index fa42655b6ca1f..d7e81f7add6ea 100644 --- a/perception/lidar_centerpoint/README.md +++ b/perception/lidar_centerpoint/README.md @@ -28,6 +28,22 @@ We trained the models using . ## Parameters +### ML Model Parameters + +Note that these parameters are associated with ONNX file, predefined during the training phase. Be careful to change ONNX file as well when changing this parameter. Also, whenever you update the ONNX file, do NOT forget to check these values. + +| Name | Type | Default Value | Description | +| -------------------------------------- | ------------ | ------------------------------------------------ | --------------------------------------------------------------------- | +| `model_params.class_names` | list[string] | ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"] | list of class names for model outputs | +| `model_params.point_feature_size` | int | `4` | number of features per point in the point cloud | +| `model_params.max_voxel_size` | int | `40000` | maximum number of voxels | +| `model_params.point_cloud_range` | list[double] | [-76.8, -76.8, -4.0, 76.8, 76.8, 6.0] | detection range [min_x, min_y, min_z, max_x, max_y, max_z] [m] | +| `model_params.voxel_size` | list[double] | [0.32, 0.32, 10.0] | size of each voxel [x, y, z] [m] | +| `model_params.downsample_factor` | int | `1` | downsample factor for coordinates | +| `model_params.encoder_in_feature_size` | int | `9` | number of input features to the encoder | +| `model_params.has_variance` | bool | `false` | true if the model outputs pose variance as well as pose for each bbox | +| `model_params.has_twist` | bool | `false` | true if the model outputs velocity as well as pose for each bbox | + ### Core Parameters | Name | Type | Default Value | Description | diff --git a/perception/lidar_centerpoint/config/centerpoint.param.yaml b/perception/lidar_centerpoint/config/centerpoint.param.yaml index 17ed9a447cdb8..53d77e8d1c42c 100644 --- a/perception/lidar_centerpoint/config/centerpoint.param.yaml +++ b/perception/lidar_centerpoint/config/centerpoint.param.yaml @@ -6,6 +6,7 @@ head_onnx_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).onnx" head_engine_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).engine" trt_precision: fp16 + cloud_capacity: 2000000 post_process_params: # post-process params circle_nms_dist_threshold: 0.5 diff --git a/perception/lidar_centerpoint/config/centerpoint_sigma.param.yaml b/perception/lidar_centerpoint/config/centerpoint_sigma.param.yaml new file mode 100644 index 0000000000000..bd5fc5f3567a5 --- /dev/null +++ b/perception/lidar_centerpoint/config/centerpoint_sigma.param.yaml @@ -0,0 +1,21 @@ +/**: + ros__parameters: + + # weight files + encoder_onnx_path: "$(var model_path)/pts_voxel_encoder_$(var model_name).onnx" + encoder_engine_path: "$(var model_path)/pts_voxel_encoder_$(var model_name).engine" + head_onnx_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).onnx" + head_engine_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).engine" + trt_precision: fp16 + cloud_capacity: 2000000 + post_process_params: + # post-process params + circle_nms_dist_threshold: 0.5 + iou_nms_target_class_names: ["CAR"] + iou_nms_search_distance_2d: 10.0 + iou_nms_threshold: 0.1 + yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0] + score_threshold: 0.35 + densification_params: + world_frame_id: map + num_past_frames: 1 diff --git a/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml b/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml index 17ed9a447cdb8..53d77e8d1c42c 100644 --- a/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml +++ b/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml @@ -6,6 +6,7 @@ head_onnx_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).onnx" head_engine_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).engine" trt_precision: fp16 + cloud_capacity: 2000000 post_process_params: # post-process params circle_nms_dist_threshold: 0.5 diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp index 19fdbe7a8b9c2..7b560cf46e2e3 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp @@ -24,14 +24,16 @@ class CenterPointConfig { public: explicit CenterPointConfig( - const std::size_t class_size, const float point_feature_size, const std::size_t max_voxel_size, - const std::vector & point_cloud_range, const std::vector & voxel_size, - const std::size_t downsample_factor, const std::size_t encoder_in_feature_size, - const float score_threshold, const float circle_nms_dist_threshold, - const std::vector yaw_norm_thresholds, const bool has_variance) + const std::size_t class_size, const float point_feature_size, const std::size_t cloud_capacity, + const std::size_t max_voxel_size, const std::vector & point_cloud_range, + const std::vector & voxel_size, const std::size_t downsample_factor, + const std::size_t encoder_in_feature_size, const float score_threshold, + const float circle_nms_dist_threshold, const std::vector yaw_norm_thresholds, + const bool has_variance) { class_size_ = class_size; point_feature_size_ = point_feature_size; + cloud_capacity_ = cloud_capacity; max_voxel_size_ = max_voxel_size; if (point_cloud_range.size() == 6) { range_min_x_ = static_cast(point_cloud_range[0]); @@ -85,6 +87,7 @@ class CenterPointConfig }; // input params + std::size_t cloud_capacity_{}; std::size_t class_size_{3}; const std::size_t point_dim_size_{3}; // x, y and z std::size_t point_feature_size_{4}; // x, y, z and time-lag diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp index 383e8ef719d4d..cbbf01c1e6405 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp @@ -15,15 +15,14 @@ #ifndef LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_ #define LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_ -#include -#include -#include -#include +#include "lidar_centerpoint/cuda_utils.hpp" +#include "lidar_centerpoint/network/network_trt.hpp" +#include "lidar_centerpoint/postprocess/postprocess_kernel.hpp" +#include "lidar_centerpoint/preprocess/voxel_generator.hpp" +#include "pcl/point_cloud.h" +#include "pcl/point_types.h" -#include - -#include -#include +#include "sensor_msgs/msg/point_cloud2.hpp" #include #include @@ -32,8 +31,6 @@ namespace centerpoint { -static constexpr size_t CAPACITY_POINT = 2000000; - class NetworkParam { public: @@ -105,9 +102,11 @@ class CenterPointTRT cuda::unique_ptr head_out_rot_d_{nullptr}; cuda::unique_ptr head_out_vel_d_{nullptr}; cuda::unique_ptr points_d_{nullptr}; + cuda::unique_ptr points_aux_d_{nullptr}; cuda::unique_ptr voxels_buffer_d_{nullptr}; cuda::unique_ptr mask_d_{nullptr}; cuda::unique_ptr num_voxels_d_{nullptr}; + cuda::unique_ptr shuffle_indices_d_{nullptr}; }; } // namespace centerpoint diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp index 5ab26d75a0a41..494d499dc2924 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp @@ -42,7 +42,7 @@ #ifndef LIDAR_CENTERPOINT__CUDA_UTILS_HPP_ #define LIDAR_CENTERPOINT__CUDA_UTILS_HPP_ -#include +#include "cuda_runtime_api.h" #include #include @@ -89,13 +89,13 @@ cuda::unique_ptr make_unique() return cuda::unique_ptr{p}; } -constexpr size_t CUDA_ALIGN = 256; +constexpr std::size_t CUDA_ALIGN = 256; template -inline size_t get_size_aligned(size_t num_elem) +inline std::size_t get_size_aligned(std::size_t num_elem) { - size_t size = num_elem * sizeof(T); - size_t extra_align = 0; + std::size_t size = num_elem * sizeof(T); + std::size_t extra_align = 0; if (size % CUDA_ALIGN != 0) { extra_align = CUDA_ALIGN - size % CUDA_ALIGN; } @@ -103,9 +103,9 @@ inline size_t get_size_aligned(size_t num_elem) } template -inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size) +inline T * get_next_ptr(std::size_t num_elem, void *& workspace, std::size_t & workspace_size) { - size_t size = get_size_aligned(num_elem); + std::size_t size = get_size_aligned(num_elem); if (size > workspace_size) { throw ::std::runtime_error("Workspace is too small!"); } diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp index cd1a0e58e115e..d961dd998af76 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp @@ -17,10 +17,10 @@ #include -#include -#include -#include -#include +#include "autoware_perception_msgs/msg/detected_object_kinematics.hpp" +#include "autoware_perception_msgs/msg/detected_objects.hpp" +#include "autoware_perception_msgs/msg/object_classification.hpp" +#include "autoware_perception_msgs/msg/shape.hpp" #include diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp index 639aa0ba8bad3..1bf77248efe08 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp @@ -15,8 +15,8 @@ #ifndef LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_ #define LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_ -#include -#include +#include "lidar_centerpoint/centerpoint_config.hpp" +#include "lidar_centerpoint/network/tensorrt_wrapper.hpp" #include diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp index 3dd00c25fd9e7..ad5e222ce01e8 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp @@ -15,8 +15,8 @@ #ifndef LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_ #define LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_ -#include -#include +#include "cuda.h" +#include "cuda_runtime_api.h" namespace centerpoint { diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp index 7f55ab6f5e414..7415dc192d7ab 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp @@ -15,10 +15,9 @@ #ifndef LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_ #define LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_ -#include -#include - -#include +#include "NvInfer.h" +#include "lidar_centerpoint/centerpoint_config.hpp" +#include "tensorrt_common/tensorrt_common.hpp" #include #include @@ -32,7 +31,7 @@ class TensorRTWrapper public: explicit TensorRTWrapper(const CenterPointConfig & config); - ~TensorRTWrapper(); + virtual ~TensorRTWrapper(); bool init( const std::string & onnx_path, const std::string & engine_path, const std::string & precision); @@ -50,7 +49,7 @@ class TensorRTWrapper private: bool parseONNX( const std::string & onnx_path, const std::string & engine_path, const std::string & precision, - size_t workspace_size = (1ULL << 30)); + std::size_t workspace_size = (1ULL << 30)); bool saveEngine(const std::string & engine_path); diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp index 4ccfa908e12d7..ddc1a072b76d0 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp @@ -15,13 +15,13 @@ #ifndef LIDAR_CENTERPOINT__NODE_HPP_ #define LIDAR_CENTERPOINT__NODE_HPP_ +#include "lidar_centerpoint/centerpoint_trt.hpp" +#include "lidar_centerpoint/detection_class_remapper.hpp" #include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp" #include #include #include -#include -#include #include #include @@ -51,7 +51,6 @@ class LidarCenterPointNode : public rclcpp::Node rclcpp::Subscription::SharedPtr pointcloud_sub_; rclcpp::Publisher::SharedPtr objects_pub_; - float score_threshold_{0.0}; std::vector class_names_; bool has_variance_{false}; bool has_twist_{false}; diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp index cd2d71d33806d..5b55636a3eec4 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp @@ -15,9 +15,8 @@ #ifndef LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ #define LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ -#include - -#include +#include "lidar_centerpoint/utils.hpp" +#include "thrust/device_vector.h" namespace centerpoint { diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp index d790e896dea9a..58431b48eb917 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp @@ -15,12 +15,10 @@ #ifndef LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ #define LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ -#include -#include - -#include -#include -#include +#include "cuda.h" +#include "cuda_runtime_api.h" +#include "lidar_centerpoint/centerpoint_config.hpp" +#include "lidar_centerpoint/utils.hpp" #include @@ -38,8 +36,6 @@ class PostProcessCUDA private: CenterPointConfig config_; - thrust::device_vector boxes3d_d_; - thrust::device_vector yaw_norm_thresholds_d_; }; } // namespace centerpoint diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp index 209fc74e6c58f..5b7b3beb343cc 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp @@ -15,17 +15,19 @@ #ifndef LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ #define LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ -#include -#include -#ifdef ROS_DISTRO_GALACTIC -#include -#else -#include -#endif +#include "tf2_ros/buffer.h" +#include "tf2_ros/transform_listener.h" #include #include #include +#ifdef ROS_DISTRO_GALACTIC +#include "tf2_sensor_msgs/tf2_sensor_msgs.h" +#else +#include "tf2_sensor_msgs/tf2_sensor_msgs.hpp" +#endif + +#include "lidar_centerpoint/cuda_utils.hpp" namespace centerpoint { @@ -48,7 +50,10 @@ class DensificationParam struct PointCloudWithTransform { - sensor_msgs::msg::PointCloud2 pointcloud_msg; + cuda::unique_ptr points_d{nullptr}; + std_msgs::msg::Header header; + std::size_t num_points{0}; + std::size_t point_step{0}; Eigen::Affine3f affine_past2world; }; @@ -58,7 +63,8 @@ class PointCloudDensification explicit PointCloudDensification(const DensificationParam & param); bool enqueuePointCloud( - const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer); + const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer, + cudaStream_t stream); double getCurrentTimestamp() const { return current_timestamp_; } Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; } @@ -73,7 +79,8 @@ class PointCloudDensification unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); } private: - void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine); + void enqueue( + const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine, cudaStream_t stream); void dequeue(); DensificationParam param_; diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp index 9488b67475509..62495d655723e 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp @@ -15,17 +15,26 @@ #ifndef LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_ #define LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_ -#include -#include +#include "cuda.h" +#include "cuda_runtime_api.h" namespace centerpoint { -cudaError_t generateVoxels_random_launch( - const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range, - float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size, - float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels, +cudaError_t generateSweepPoints_launch( + const float * input_points, std::size_t points_size, int input_point_step, float time_lag, + const float * transform, int num_features, float * output_points, cudaStream_t stream); + +cudaError_t shufflePoints_launch( + const float * points, const unsigned int * indices, float * shuffled_points, + const std::size_t points_size, const std::size_t max_size, const std::size_t offset, cudaStream_t stream); +cudaError_t generateVoxels_random_launch( + const float * points, std::size_t points_size, float min_x_range, float max_x_range, + float min_y_range, float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, + float pillar_y_size, float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, + float * voxels, cudaStream_t stream); + cudaError_t generateBaseFeatures_launch( unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size, unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs, diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp index be15d51e91715..3ade5e677cdbe 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp @@ -15,10 +15,10 @@ #ifndef LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_ #define LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_ -#include -#include +#include "lidar_centerpoint/centerpoint_config.hpp" +#include "lidar_centerpoint/preprocess/pointcloud_densification.hpp" -#include +#include "sensor_msgs/msg/point_cloud2.hpp" #include #include @@ -31,10 +31,11 @@ class VoxelGeneratorTemplate explicit VoxelGeneratorTemplate( const DensificationParam & param, const CenterPointConfig & config); - virtual std::size_t generateSweepPoints(std::vector & points) = 0; + virtual std::size_t generateSweepPoints(float * d_points, cudaStream_t stream) = 0; bool enqueuePointCloud( - const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer); + const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer, + cudaStream_t stream); protected: std::unique_ptr pd_ptr_{nullptr}; @@ -50,7 +51,7 @@ class VoxelGenerator : public VoxelGeneratorTemplate public: using VoxelGeneratorTemplate::VoxelGeneratorTemplate; - std::size_t generateSweepPoints(std::vector & points) override; + std::size_t generateSweepPoints(float * d_points, cudaStream_t stream) override; }; } // namespace centerpoint diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp index e33d86cd0aba7..102359e1a43f4 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp @@ -17,12 +17,9 @@ // ros packages cannot be included from cuda. -#include +#include "lidar_centerpoint/utils.hpp" -#include -#include -#include -#include +#include "autoware_perception_msgs/msg/detected_objects.hpp" #include #include @@ -38,7 +35,7 @@ uint8_t getSemanticType(const std::string & class_name); std::array convertPoseCovarianceMatrix(const Box3D & box3d); -std::array convertTwistCovarianceMatrix(const Box3D & box3d); +std::array convertTwistCovarianceMatrix(const Box3D & box3d, const float yaw); bool isCarLikeVehicleLabel(const uint8_t label); diff --git a/perception/lidar_centerpoint/lib/centerpoint_trt.cpp b/perception/lidar_centerpoint/lib/centerpoint_trt.cpp index a4281aa8260d7..a80d00a1f004f 100644 --- a/perception/lidar_centerpoint/lib/centerpoint_trt.cpp +++ b/perception/lidar_centerpoint/lib/centerpoint_trt.cpp @@ -14,13 +14,18 @@ #include "lidar_centerpoint/centerpoint_trt.hpp" +#include "lidar_centerpoint/centerpoint_config.hpp" +#include "lidar_centerpoint/network/scatter_kernel.hpp" +#include "lidar_centerpoint/preprocess/preprocess_kernel.hpp" + #include -#include -#include -#include +#include +#include +#include #include #include +#include #include #include @@ -84,7 +89,7 @@ void CenterPointTRT::initPtr() mask_size_ = config_.grid_size_x_ * config_.grid_size_y_; // host - points_.resize(CAPACITY_POINT * config_.point_feature_size_); + points_.resize(config_.cloud_capacity_ * config_.point_feature_size_); // device voxels_d_ = cuda::make_unique(voxels_size_); @@ -99,10 +104,25 @@ void CenterPointTRT::initPtr() head_out_dim_d_ = cuda::make_unique(grid_xy_size * config_.head_out_dim_size_); head_out_rot_d_ = cuda::make_unique(grid_xy_size * config_.head_out_rot_size_); head_out_vel_d_ = cuda::make_unique(grid_xy_size * config_.head_out_vel_size_); - points_d_ = cuda::make_unique(CAPACITY_POINT * config_.point_feature_size_); + points_d_ = cuda::make_unique(config_.cloud_capacity_ * config_.point_feature_size_); voxels_buffer_d_ = cuda::make_unique(voxels_buffer_size_); mask_d_ = cuda::make_unique(mask_size_); num_voxels_d_ = cuda::make_unique(1); + + points_aux_d_ = cuda::make_unique(config_.cloud_capacity_ * config_.point_feature_size_); + shuffle_indices_d_ = cuda::make_unique(config_.cloud_capacity_); + + std::vector indexes(config_.cloud_capacity_); + std::iota(indexes.begin(), indexes.end(), 0); + + std::default_random_engine e(0); + std::shuffle(indexes.begin(), indexes.end(), e); + + std::srand(std::time(nullptr)); + + CHECK_CUDA_ERROR(cudaMemcpyAsync( + shuffle_indices_d_.get(), indexes.data(), config_.cloud_capacity_ * sizeof(unsigned int), + cudaMemcpyHostToDevice, stream_)); } bool CenterPointTRT::detect( @@ -143,14 +163,17 @@ bool CenterPointTRT::detect( bool CenterPointTRT::preprocess( const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer) { - bool is_success = vg_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer); + bool is_success = vg_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer, stream_); if (!is_success) { return false; } - const auto count = vg_ptr_->generateSweepPoints(points_); - CHECK_CUDA_ERROR(cudaMemcpyAsync( - points_d_.get(), points_.data(), count * config_.point_feature_size_ * sizeof(float), - cudaMemcpyHostToDevice, stream_)); + + const std::size_t count = vg_ptr_->generateSweepPoints(points_aux_d_.get(), stream_); + const std::size_t random_offset = std::rand() % config_.cloud_capacity_; + CHECK_CUDA_ERROR(shufflePoints_launch( + points_aux_d_.get(), shuffle_indices_d_.get(), points_d_.get(), count, config_.cloud_capacity_, + random_offset, stream_)); + CHECK_CUDA_ERROR(cudaMemsetAsync(num_voxels_d_.get(), 0, sizeof(unsigned int), stream_)); CHECK_CUDA_ERROR( cudaMemsetAsync(voxels_buffer_d_.get(), 0, voxels_buffer_size_ * sizeof(float), stream_)); @@ -162,10 +185,10 @@ bool CenterPointTRT::preprocess( num_points_per_voxel_d_.get(), 0, config_.max_voxel_size_ * sizeof(float), stream_)); CHECK_CUDA_ERROR(generateVoxels_random_launch( - points_d_.get(), count, config_.range_min_x_, config_.range_max_x_, config_.range_min_y_, - config_.range_max_y_, config_.range_min_z_, config_.range_max_z_, config_.voxel_size_x_, - config_.voxel_size_y_, config_.voxel_size_z_, config_.grid_size_y_, config_.grid_size_x_, - mask_d_.get(), voxels_buffer_d_.get(), stream_)); + points_d_.get(), config_.cloud_capacity_, config_.range_min_x_, config_.range_max_x_, + config_.range_min_y_, config_.range_max_y_, config_.range_min_z_, config_.range_max_z_, + config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_, config_.grid_size_y_, + config_.grid_size_x_, mask_d_.get(), voxels_buffer_d_.get(), stream_)); CHECK_CUDA_ERROR(generateBaseFeatures_launch( mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_, diff --git a/perception/lidar_centerpoint/lib/detection_class_remapper.cpp b/perception/lidar_centerpoint/lib/detection_class_remapper.cpp index ecfbb4360ecf0..9805fc7a661d1 100644 --- a/perception/lidar_centerpoint/lib/detection_class_remapper.cpp +++ b/perception/lidar_centerpoint/lib/detection_class_remapper.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include "lidar_centerpoint/detection_class_remapper.hpp" namespace centerpoint { diff --git a/perception/lidar_centerpoint/lib/network/scatter_kernel.cu b/perception/lidar_centerpoint/lib/network/scatter_kernel.cu index 09cc83bf606fe..f139e230426ca 100644 --- a/perception/lidar_centerpoint/lib/network/scatter_kernel.cu +++ b/perception/lidar_centerpoint/lib/network/scatter_kernel.cu @@ -13,8 +13,7 @@ // limitations under the License. #include "lidar_centerpoint/network/scatter_kernel.hpp" - -#include +#include "lidar_centerpoint/utils.hpp" namespace { diff --git a/perception/lidar_centerpoint/lib/network/tensorrt_wrapper.cpp b/perception/lidar_centerpoint/lib/network/tensorrt_wrapper.cpp index 91fcce9a80f78..3fc522eea15c9 100644 --- a/perception/lidar_centerpoint/lib/network/tensorrt_wrapper.cpp +++ b/perception/lidar_centerpoint/lib/network/tensorrt_wrapper.cpp @@ -14,7 +14,7 @@ #include "lidar_centerpoint/network/tensorrt_wrapper.hpp" -#include +#include "NvOnnxParser.h" #include #include @@ -80,7 +80,7 @@ bool TensorRTWrapper::createContext() bool TensorRTWrapper::parseONNX( const std::string & onnx_path, const std::string & engine_path, const std::string & precision, - const size_t workspace_size) + const std::size_t workspace_size) { auto builder = tensorrt_common::TrtUniquePtr(nvinfer1::createInferBuilder(logger_)); diff --git a/perception/lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu b/perception/lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu index 398e75a55c44b..c208eefe135fb 100644 --- a/perception/lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu +++ b/perception/lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu @@ -21,12 +21,10 @@ Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ +#include "lidar_centerpoint/cuda_utils.hpp" #include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp" - -#include -#include - -#include +#include "lidar_centerpoint/utils.hpp" +#include "thrust/host_vector.h" namespace { diff --git a/perception/lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp b/perception/lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp index d750e4adab0e1..30a54b08b8ede 100644 --- a/perception/lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp +++ b/perception/lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp @@ -14,9 +14,10 @@ #include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp" -#include "autoware/universe_utils/geometry/geometry.hpp" -#include "object_recognition_utils/geometry.hpp" -#include "object_recognition_utils/object_recognition_utils.hpp" +#include +#include +#include + namespace centerpoint { using Label = autoware_perception_msgs::msg::ObjectClassification; diff --git a/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu b/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu index 34bbd2811041c..fb8e364b352fc 100644 --- a/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu +++ b/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu @@ -13,12 +13,10 @@ // limitations under the License. #include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp" - -#include - -#include -#include -#include +#include "lidar_centerpoint/postprocess/postprocess_kernel.hpp" +#include "thrust/count.h" +#include "thrust/device_vector.h" +#include "thrust/sort.h" namespace { @@ -139,10 +137,6 @@ __global__ void generateBoxes3D_kernel( PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config) { - const auto num_raw_boxes3d = config.down_grid_size_y_ * config.down_grid_size_x_; - boxes3d_d_ = thrust::device_vector(num_raw_boxes3d); - yaw_norm_thresholds_d_ = thrust::device_vector( - config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end()); } // cspell: ignore divup @@ -155,23 +149,26 @@ cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch( divup(config_.down_grid_size_y_, THREADS_PER_BLOCK), divup(config_.down_grid_size_x_, THREADS_PER_BLOCK)); dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); + auto boxes3d_d = + thrust::device_vector(config_.down_grid_size_y_ * config_.down_grid_size_x_); + auto yaw_norm_thresholds_d = thrust::device_vector( + config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end()); generateBoxes3D_kernel<<>>( out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_, config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_, config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_, - config_.has_variance_, thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()), - thrust::raw_pointer_cast(boxes3d_d_.data())); + config_.has_variance_, thrust::raw_pointer_cast(yaw_norm_thresholds_d.data()), + thrust::raw_pointer_cast(boxes3d_d.data())); // suppress by score const auto num_det_boxes3d = thrust::count_if( - thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), - is_score_greater(config_.score_threshold_)); + thrust::device, boxes3d_d.begin(), boxes3d_d.end(), is_score_greater(config_.score_threshold_)); if (num_det_boxes3d == 0) { return cudaGetLastError(); } thrust::device_vector det_boxes3d_d(num_det_boxes3d); thrust::copy_if( - thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(), + thrust::device, boxes3d_d.begin(), boxes3d_d.end(), det_boxes3d_d.begin(), is_score_greater(config_.score_threshold_)); // sort by score diff --git a/perception/lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp b/perception/lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp index 66c348fda50b4..2859d58d8f669 100644 --- a/perception/lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp +++ b/perception/lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp @@ -14,20 +14,19 @@ #include "lidar_centerpoint/preprocess/pointcloud_densification.hpp" -#include +#include "pcl_conversions/pcl_conversions.h" +#include "pcl_ros/transforms.hpp" -#include +#include "boost/optional.hpp" -#include +#include +#include #ifdef ROS_DISTRO_GALACTIC -#include +#include "tf2_eigen/tf2_eigen.h" #else -#include +#include "tf2_eigen/tf2_eigen.hpp" #endif -#include -#include - namespace { boost::optional getTransform( @@ -61,7 +60,8 @@ PointCloudDensification::PointCloudDensification(const DensificationParam & para } bool PointCloudDensification::enqueuePointCloud( - const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer) + const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer, + cudaStream_t stream) { const auto header = pointcloud_msg.header; @@ -73,9 +73,9 @@ bool PointCloudDensification::enqueuePointCloud( } auto affine_world2current = transformToEigen(transform_world2current.get()); - enqueue(pointcloud_msg, affine_world2current); + enqueue(pointcloud_msg, affine_world2current, stream); } else { - enqueue(pointcloud_msg, Eigen::Affine3f::Identity()); + enqueue(pointcloud_msg, Eigen::Affine3f::Identity(), stream); } dequeue(); @@ -84,12 +84,24 @@ bool PointCloudDensification::enqueuePointCloud( } void PointCloudDensification::enqueue( - const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current) + const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current, + cudaStream_t stream) { affine_world2current_ = affine_world2current; current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds(); - PointCloudWithTransform pointcloud = {msg, affine_world2current.inverse()}; - pointcloud_cache_.push_front(pointcloud); + + assert(sizeof(uint8_t) * msg.width * msg.height * msg.point_step % sizeof(float) == 0); + auto points_d = cuda::make_unique( + sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(float)); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + points_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step, + cudaMemcpyHostToDevice, stream)); + + PointCloudWithTransform pointcloud = { + std::move(points_d), msg.header, msg.width * msg.height, msg.point_step, + affine_world2current.inverse()}; + + pointcloud_cache_.push_front(std::move(pointcloud)); } void PointCloudDensification::dequeue() diff --git a/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu b/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu index 218aaee125c02..01f62f22e892b 100644 --- a/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu +++ b/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu @@ -28,9 +28,12 @@ * limitations under the License. */ +#include "lidar_centerpoint/cuda_utils.hpp" #include "lidar_centerpoint/preprocess/preprocess_kernel.hpp" +#include "lidar_centerpoint/utils.hpp" -#include +#include +#include namespace { @@ -41,10 +44,97 @@ const std::size_t ENCODER_IN_FEATURE_SIZE = 9; // the same as encoder_in_featur namespace centerpoint { + +__global__ void generateSweepPoints_kernel( + const float * input_points, std::size_t points_size, int input_point_step, float time_lag, + const float * transform_array, int num_features, float * output_points) +{ + int point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx >= points_size) return; + + const float input_x = input_points[point_idx * input_point_step + 0]; + const float input_y = input_points[point_idx * input_point_step + 1]; + const float input_z = input_points[point_idx * input_point_step + 2]; + + // transform_array is expected to be column-major + output_points[point_idx * num_features + 0] = transform_array[0] * input_x + + transform_array[4] * input_y + + transform_array[8] * input_z + transform_array[12]; + output_points[point_idx * num_features + 1] = transform_array[1] * input_x + + transform_array[5] * input_y + + transform_array[9] * input_z + transform_array[13]; + output_points[point_idx * num_features + 2] = transform_array[2] * input_x + + transform_array[6] * input_y + + transform_array[10] * input_z + transform_array[14]; + output_points[point_idx * num_features + 3] = time_lag; +} + +cudaError_t generateSweepPoints_launch( + const float * input_points, std::size_t points_size, int input_point_step, float time_lag, + const float * transform_array, int num_features, float * output_points, cudaStream_t stream) +{ + auto transform_d = cuda::make_unique(16); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + transform_d.get(), transform_array, 16 * sizeof(float), cudaMemcpyHostToDevice, stream)); + + dim3 blocks((points_size + 256 - 1) / 256); + dim3 threads(256); + assert(num_features == 4); + + generateSweepPoints_kernel<<>>( + input_points, points_size, input_point_step, time_lag, transform_d.get(), num_features, + output_points); + + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void shufflePoints_kernel( + const float * points, const unsigned int * indices, float * shuffled_points, + const std::size_t points_size, const std::size_t max_size, const std::size_t offset) +{ + int point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx >= max_size) return; + + int src_idx = indices[(point_idx + offset) % max_size]; + int dst_idx = point_idx; + + if (dst_idx >= points_size) { + shuffled_points[4 * dst_idx + 0] = INFINITY; + shuffled_points[4 * dst_idx + 1] = INFINITY; + shuffled_points[4 * dst_idx + 2] = INFINITY; + shuffled_points[4 * dst_idx + 3] = INFINITY; + } else { + shuffled_points[4 * dst_idx + 0] = points[4 * src_idx + 0]; + shuffled_points[4 * dst_idx + 1] = points[4 * src_idx + 1]; + shuffled_points[4 * dst_idx + 2] = points[4 * src_idx + 2]; + shuffled_points[4 * dst_idx + 3] = points[4 * src_idx + 3]; + } +} + +cudaError_t shufflePoints_launch( + const float * points, const unsigned int * indices, float * shuffled_points, + const std::size_t points_size, const std::size_t max_size, const std::size_t offset, + cudaStream_t stream) +{ + dim3 blocks((max_size + 256 - 1) / 256); + dim3 threads(256); + + if (blocks.x == 0) { + return cudaGetLastError(); + } + + shufflePoints_kernel<<>>( + points, indices, shuffled_points, points_size, max_size, offset); + cudaError_t err = cudaGetLastError(); + return err; +} + __global__ void generateVoxels_random_kernel( - const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range, - float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size, - float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels) + const float * points, std::size_t points_size, float min_x_range, float max_x_range, + float min_y_range, float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, + float pillar_y_size, float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, + float * voxels) { int point_idx = blockIdx.x * blockDim.x + threadIdx.x; if (point_idx >= points_size) return; @@ -71,10 +161,10 @@ __global__ void generateVoxels_random_kernel( } cudaError_t generateVoxels_random_launch( - const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range, - float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size, - float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels, - cudaStream_t stream) + const float * points, std::size_t points_size, float min_x_range, float max_x_range, + float min_y_range, float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, + float pillar_y_size, float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, + float * voxels, cudaStream_t stream) { dim3 blocks((points_size + 256 - 1) / 256); dim3 threads(256); diff --git a/perception/lidar_centerpoint/lib/preprocess/voxel_generator.cpp b/perception/lidar_centerpoint/lib/preprocess/voxel_generator.cpp index 07a1a2de725f5..9148ed993c9c3 100644 --- a/perception/lidar_centerpoint/lib/preprocess/voxel_generator.cpp +++ b/perception/lidar_centerpoint/lib/preprocess/voxel_generator.cpp @@ -14,7 +14,12 @@ #include "lidar_centerpoint/preprocess/voxel_generator.hpp" -#include +#include "lidar_centerpoint/centerpoint_trt.hpp" +#include "lidar_centerpoint/preprocess/preprocess_kernel.hpp" + +#include "sensor_msgs/point_cloud2_iterator.hpp" + +#include namespace centerpoint { @@ -38,35 +43,40 @@ VoxelGeneratorTemplate::VoxelGeneratorTemplate( } bool VoxelGeneratorTemplate::enqueuePointCloud( - const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer) + const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer, + cudaStream_t stream) { - return pd_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer); + return pd_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer, stream); } -std::size_t VoxelGenerator::generateSweepPoints(std::vector & points) +std::size_t VoxelGenerator::generateSweepPoints(float * points_d, cudaStream_t stream) { - Eigen::Vector3f point_current, point_past; - size_t point_counter{}; + std::size_t point_counter = 0; for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter); pc_cache_iter++) { - auto pc_msg = pc_cache_iter->pointcloud_msg; + auto sweep_num_points = pc_cache_iter->num_points; + auto point_step = pc_cache_iter->point_step; auto affine_past2current = pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world; float time_lag = static_cast( - pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds()); - - for (sensor_msgs::PointCloud2ConstIterator x_iter(pc_msg, "x"), y_iter(pc_msg, "y"), - z_iter(pc_msg, "z"); - x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) { - point_past << *x_iter, *y_iter, *z_iter; - point_current = affine_past2current * point_past; + pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds()); - points.at(point_counter * config_.point_feature_size_) = point_current.x(); - points.at(point_counter * config_.point_feature_size_ + 1) = point_current.y(); - points.at(point_counter * config_.point_feature_size_ + 2) = point_current.z(); - points.at(point_counter * config_.point_feature_size_ + 3) = time_lag; - ++point_counter; + if (point_counter + sweep_num_points > config_.cloud_capacity_) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_centerpoint"), + "Requested number of points exceeds the maximum capacity. Current points = " + << point_counter); + break; } + + static_assert(std::is_same::value); + static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major."); + generateSweepPoints_launch( + pc_cache_iter->points_d.get(), sweep_num_points, point_step / sizeof(float), time_lag, + affine_past2current.matrix().data(), config_.point_feature_size_, + points_d + config_.point_feature_size_ * point_counter, stream); + + point_counter += sweep_num_points; } return point_counter; } diff --git a/perception/lidar_centerpoint/lib/ros_utils.cpp b/perception/lidar_centerpoint/lib/ros_utils.cpp index 039d340cc2dc4..f9dfbee73a88c 100644 --- a/perception/lidar_centerpoint/lib/ros_utils.cpp +++ b/perception/lidar_centerpoint/lib/ros_utils.cpp @@ -50,7 +50,7 @@ void box3DToDetectedObject( // pose and shape // mmdet3d yaw format to ros yaw format - float yaw = -box3d.yaw - autoware::universe_utils::pi / 2; + const float yaw = -box3d.yaw - autoware::universe_utils::pi / 2; obj.kinematics.pose_with_covariance.pose.position = autoware::universe_utils::createPoint(box3d.x, box3d.y, box3d.z); obj.kinematics.pose_with_covariance.pose.orientation = @@ -67,14 +67,18 @@ void box3DToDetectedObject( if (has_twist) { float vel_x = box3d.vel_x; float vel_y = box3d.vel_y; + + // twist of the object is based on the object coordinate system geometry_msgs::msg::Twist twist; - twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2)); - twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - yaw); + twist.linear.x = std::cos(yaw) * vel_x + std::sin(yaw) * vel_y; + twist.linear.y = -std::sin(yaw) * vel_x + std::cos(yaw) * vel_y; + twist.angular.z = 0; // angular velocity is not supported + obj.kinematics.twist_with_covariance.twist = twist; obj.kinematics.has_twist = has_twist; if (has_variance) { obj.kinematics.has_twist_covariance = has_variance; - obj.kinematics.twist_with_covariance.covariance = convertTwistCovarianceMatrix(box3d); + obj.kinematics.twist_with_covariance.covariance = convertTwistCovarianceMatrix(box3d, yaw); } } } @@ -111,12 +115,21 @@ std::array convertPoseCovarianceMatrix(const Box3D & box3d) return pose_covariance; } -std::array convertTwistCovarianceMatrix(const Box3D & box3d) +std::array convertTwistCovarianceMatrix(const Box3D & box3d, const float yaw) { using POSE_IDX = autoware::universe_utils::xyzrpy_covariance_index::XYZRPY_COV_IDX; + + // twist covariance matrix is based on the object coordinate system std::array twist_covariance{}; - twist_covariance[POSE_IDX::X_X] = box3d.vel_x_variance; - twist_covariance[POSE_IDX::Y_Y] = box3d.vel_y_variance; + const float cos_yaw = std::cos(yaw); + const float sin_yaw = std::sin(yaw); + twist_covariance[POSE_IDX::X_X] = + box3d.vel_x_variance * cos_yaw * cos_yaw + box3d.vel_y_variance * sin_yaw * sin_yaw; + twist_covariance[POSE_IDX::X_Y] = + (box3d.vel_y_variance - box3d.vel_x_variance) * sin_yaw * cos_yaw; + twist_covariance[POSE_IDX::Y_X] = twist_covariance[POSE_IDX::X_Y]; + twist_covariance[POSE_IDX::Y_Y] = + box3d.vel_x_variance * sin_yaw * sin_yaw + box3d.vel_y_variance * cos_yaw * cos_yaw; return twist_covariance; } diff --git a/perception/lidar_centerpoint/lib/utils.cpp b/perception/lidar_centerpoint/lib/utils.cpp index b6e0a54ab6de9..ade0bce8a4ff3 100644 --- a/perception/lidar_centerpoint/lib/utils.cpp +++ b/perception/lidar_centerpoint/lib/utils.cpp @@ -19,7 +19,7 @@ namespace centerpoint { // cspell: ignore divup -std::size_t divup(const std::size_t a, const std::size_t b) +std::size_t divup(const std::size_t a, const std::size_t b) // cppcheck-suppress unusedFunction { if (a == 0) { throw std::runtime_error("A dividend of divup isn't positive."); diff --git a/perception/lidar_centerpoint/package.xml b/perception/lidar_centerpoint/package.xml index 280e2219fb1f1..4c6fa471c297a 100644 --- a/perception/lidar_centerpoint/package.xml +++ b/perception/lidar_centerpoint/package.xml @@ -16,7 +16,6 @@ autoware_universe_utils object_recognition_utils pcl_ros - python3-open3d rclcpp rclcpp_components tensorrt_common diff --git a/perception/lidar_centerpoint/schema/centerpoint.schemal.json b/perception/lidar_centerpoint/schema/centerpoint.schemal.json index b11c115cc2466..c3268f0e90d9b 100644 --- a/perception/lidar_centerpoint/schema/centerpoint.schemal.json +++ b/perception/lidar_centerpoint/schema/centerpoint.schemal.json @@ -12,6 +12,12 @@ "default": "fp16", "enum": ["fp32", "fp16"] }, + "cloud_capacity": { + "type": "integer", + "description": "Capacity of the point cloud buffer (should be set to at least the maximum theoretical number of points).", + "default": 2000000, + "minimum": 1 + }, "post_process_params": { "type": "object", "properties": { diff --git a/perception/lidar_centerpoint/src/node.cpp b/perception/lidar_centerpoint/src/node.cpp index f9687a6342c9c..c42ef785aed51 100644 --- a/perception/lidar_centerpoint/src/node.cpp +++ b/perception/lidar_centerpoint/src/node.cpp @@ -14,17 +14,7 @@ #include "lidar_centerpoint/node.hpp" -#include -#include -#include -#include -#include - -#ifdef ROS_DISTRO_GALACTIC -#include -#else -#include -#endif +#include "pcl_ros/transforms.hpp" #include #include @@ -33,6 +23,17 @@ #include #include +#ifdef ROS_DISTRO_GALACTIC +#include "tf2_geometry_msgs/tf2_geometry_msgs.h" +#else +#include "tf2_geometry_msgs/tf2_geometry_msgs.hpp" +#endif + +#include "lidar_centerpoint/centerpoint_config.hpp" +#include "lidar_centerpoint/preprocess/pointcloud_densification.hpp" +#include "lidar_centerpoint/ros_utils.hpp" +#include "lidar_centerpoint/utils.hpp" + namespace centerpoint { LidarCenterPointNode::LidarCenterPointNode(const rclcpp::NodeOptions & node_options) @@ -49,6 +50,9 @@ LidarCenterPointNode::LidarCenterPointNode(const rclcpp::NodeOptions & node_opti const int densification_num_past_frames = this->declare_parameter("densification_params.num_past_frames"); const std::string trt_precision = this->declare_parameter("trt_precision"); + const std::size_t cloud_capacity = this->declare_parameter( + "cloud_capacity", 2000000); // NOTE(knzo25): this is a temporal hack since the launcher for + // this release does not have the corresponding parameter yet const std::string encoder_onnx_path = this->declare_parameter("encoder_onnx_path"); const std::string encoder_engine_path = this->declare_parameter("encoder_engine_path"); @@ -103,9 +107,9 @@ LidarCenterPointNode::LidarCenterPointNode(const rclcpp::NodeOptions & node_opti "The size of voxel_size != 3: use the default parameters."); } CenterPointConfig config( - class_names_.size(), point_feature_size, max_voxel_size, point_cloud_range, voxel_size, - downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold, - yaw_norm_thresholds, has_variance_); + class_names_.size(), point_feature_size, cloud_capacity, max_voxel_size, point_cloud_range, + voxel_size, downsample_factor, encoder_in_feature_size, score_threshold, + circle_nms_dist_threshold, yaw_norm_thresholds, has_variance_); detector_ptr_ = std::make_unique(encoder_param, head_param, densification_param, config); diff --git a/perception/lidar_centerpoint/test/test_postprocess_kernel.cpp b/perception/lidar_centerpoint/test/test_postprocess_kernel.cpp index fde4fcbee3d6c..cf09df79a96d3 100644 --- a/perception/lidar_centerpoint/test/test_postprocess_kernel.cpp +++ b/perception/lidar_centerpoint/test/test_postprocess_kernel.cpp @@ -29,6 +29,7 @@ void PostprocessKernelTest::SetUp() constexpr std::size_t class_size{5}; constexpr std::size_t point_feature_size{4}; + const std::size_t cloud_capacity{2000000}; constexpr std::size_t max_voxel_size{100000000}; const std::vector point_cloud_range{-76.8, -76.8, -4.0, 76.8, 76.8, 6.0}; const std::vector voxel_size{0.32, 0.32, 10.0}; @@ -40,7 +41,7 @@ void PostprocessKernelTest::SetUp() constexpr bool has_variance{false}; config_ptr_ = std::make_unique( - class_size, point_feature_size, max_voxel_size, point_cloud_range, voxel_size, + class_size, point_feature_size, cloud_capacity, max_voxel_size, point_cloud_range, voxel_size, downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold, yaw_norm_thresholds, has_variance); @@ -94,12 +95,12 @@ TEST_F(PostprocessKernelTest, SingleDetectionTest) constexpr float detection_x = 70.f; constexpr float detection_y = -38.4f; constexpr float detection_z = 1.0; - constexpr float detection_log_w = std::log(7.0); - constexpr float detection_log_l = std::log(1.0); - constexpr float detection_log_h = std::log(2.0); + const float detection_log_w = std::log(7.0); + const float detection_log_l = std::log(1.0); + const float detection_log_h = std::log(2.0); constexpr float detection_yaw = M_PI_4; - constexpr float detection_yaw_sin = std::sin(detection_yaw); - constexpr float detection_yaw_cos = std::sin(detection_yaw); + const float detection_yaw_sin = std::sin(detection_yaw); + const float detection_yaw_cos = std::sin(detection_yaw); constexpr float detection_vel_x = 5.0; constexpr float detection_vel_y = -5.0; @@ -240,9 +241,9 @@ TEST_F(PostprocessKernelTest, CircleNMSTest) constexpr float detection_x = 70.f; constexpr float detection_y = -38.4f; constexpr float detection_z = 1.0; - constexpr float detection_log_w = std::log(7.0); - constexpr float detection_log_l = std::log(1.0); - constexpr float detection_log_h = std::log(2.0); + const float detection_log_w = std::log(7.0); + const float detection_log_l = std::log(1.0); + const float detection_log_h = std::log(2.0); constexpr float detection_yaw1_sin = 0.0; constexpr float detection_yaw1_cos = 1.0; constexpr float detection_yaw2_sin = 1.0; diff --git a/perception/lidar_centerpoint/test/test_postprocess_kernel.hpp b/perception/lidar_centerpoint/test/test_postprocess_kernel.hpp index d0c9123da9e77..8f35b98119afe 100644 --- a/perception/lidar_centerpoint/test/test_postprocess_kernel.hpp +++ b/perception/lidar_centerpoint/test/test_postprocess_kernel.hpp @@ -31,17 +31,17 @@ class PostprocessKernelTest : public testing::Test void SetUp() override; void TearDown() override; - cudaStream_t stream_{nullptr}; + cudaStream_t stream_{}; - std::unique_ptr postprocess_cuda_ptr_; - std::unique_ptr config_ptr_; + std::unique_ptr postprocess_cuda_ptr_{}; + std::unique_ptr config_ptr_{}; - cuda::unique_ptr head_out_heatmap_d_; - cuda::unique_ptr head_out_offset_d_; - cuda::unique_ptr head_out_z_d_; - cuda::unique_ptr head_out_dim_d_; - cuda::unique_ptr head_out_rot_d_; - cuda::unique_ptr head_out_vel_d_; + cuda::unique_ptr head_out_heatmap_d_{}; + cuda::unique_ptr head_out_offset_d_{}; + cuda::unique_ptr head_out_z_d_{}; + cuda::unique_ptr head_out_dim_d_{}; + cuda::unique_ptr head_out_rot_d_{}; + cuda::unique_ptr head_out_vel_d_{}; }; } // namespace centerpoint diff --git a/perception/lidar_centerpoint/test/test_preprocess_kernel.hpp b/perception/lidar_centerpoint/test/test_preprocess_kernel.hpp index 57ff51966a417..1a3e850026886 100644 --- a/perception/lidar_centerpoint/test/test_preprocess_kernel.hpp +++ b/perception/lidar_centerpoint/test/test_preprocess_kernel.hpp @@ -28,7 +28,7 @@ class PreprocessKernelTest : public testing::Test void SetUp() override; void TearDown() override; - cudaStream_t stream_{nullptr}; + cudaStream_t stream_{}; std::size_t max_voxel_size_{}; std::size_t max_point_in_voxel_size_{}; diff --git a/perception/lidar_centerpoint/test/test_ros_utils.cpp b/perception/lidar_centerpoint/test/test_ros_utils.cpp index 6044a3b6e77c9..2c900da4e2e0f 100644 --- a/perception/lidar_centerpoint/test/test_ros_utils.cpp +++ b/perception/lidar_centerpoint/test/test_ros_utils.cpp @@ -122,12 +122,13 @@ TEST(TestSuite, convertPoseCovarianceMatrix) TEST(TestSuite, convertTwistCovarianceMatrix) { centerpoint::Box3D box3d; - box3d.vel_x_variance = 0.1; + box3d.vel_x_variance = 0.5; box3d.vel_y_variance = 0.2; + float yaw = 0; - std::array twist_covariance = centerpoint::convertTwistCovarianceMatrix(box3d); + std::array twist_covariance = centerpoint::convertTwistCovarianceMatrix(box3d, yaw); - EXPECT_FLOAT_EQ(twist_covariance[0], 0.1); + EXPECT_FLOAT_EQ(twist_covariance[0], 0.5); EXPECT_FLOAT_EQ(twist_covariance[7], 0.2); } diff --git a/perception/lidar_centerpoint/test/test_voxel_generator.cpp b/perception/lidar_centerpoint/test/test_voxel_generator.cpp index 5b0b3cd112e6c..ea4565aba5a5c 100644 --- a/perception/lidar_centerpoint/test/test_voxel_generator.cpp +++ b/perception/lidar_centerpoint/test/test_voxel_generator.cpp @@ -14,9 +14,9 @@ #include "test_voxel_generator.hpp" -#include +#include "gtest/gtest.h" -#include +#include "sensor_msgs/point_cloud2_iterator.hpp" void VoxelGeneratorTest::SetUp() { @@ -31,6 +31,7 @@ void VoxelGeneratorTest::SetUp() class_size_ = 5; point_feature_size_ = 4; + cloud_capacity_ = 2000000; max_voxel_size_ = 100000000; point_cloud_range_ = std::vector{-76.8, -76.8, -4.0, 76.8, 76.8, 6.0}; voxel_size_ = std::vector{0.32, 0.32, 10.0}; @@ -95,6 +96,8 @@ void VoxelGeneratorTest::SetUp() transform2_ = transform1_; transform2_.header.stamp = cloud2_->header.stamp; transform2_.transform.translation.x = world_origin_x + delta_pointcloud_x_; + + cudaStreamCreate(&stream_); } void VoxelGeneratorTest::TearDown() @@ -108,17 +111,26 @@ TEST_F(VoxelGeneratorTest, SingleFrame) centerpoint::DensificationParam param(world_frame_, num_past_frames); centerpoint::CenterPointConfig config( - class_size_, point_feature_size_, max_voxel_size_, point_cloud_range_, voxel_size_, - downsample_factor_, encoder_in_feature_size_, score_threshold_, circle_nms_dist_threshold_, - yaw_norm_thresholds_, has_variance_); + class_size_, point_feature_size_, cloud_capacity_, max_voxel_size_, point_cloud_range_, + voxel_size_, downsample_factor_, encoder_in_feature_size_, score_threshold_, + circle_nms_dist_threshold_, yaw_norm_thresholds_, has_variance_); centerpoint::VoxelGenerator voxel_generator(param, config); std::vector points; points.resize(capacity_ * config.point_feature_size_); std::fill(points.begin(), points.end(), std::nan("")); - bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_); - std::size_t generated_points_num = voxel_generator.generateSweepPoints(points); + auto points_d = cuda::make_unique(capacity_ * config.point_feature_size_); + cudaMemcpy( + points_d.get(), points.data(), capacity_ * config.point_feature_size_ * sizeof(float), + cudaMemcpyHostToDevice); + + bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_, stream_); + std::size_t generated_points_num = voxel_generator.generateSweepPoints(points_d.get(), stream_); + + cudaMemcpy( + points.data(), points_d.get(), capacity_ * config.point_feature_size_ * sizeof(float), + cudaMemcpyDeviceToHost); EXPECT_TRUE(status1); EXPECT_EQ(points_per_pointcloud_, generated_points_num); @@ -146,18 +158,27 @@ TEST_F(VoxelGeneratorTest, TwoFramesNoTf) centerpoint::DensificationParam param(world_frame_, num_past_frames); centerpoint::CenterPointConfig config( - class_size_, point_feature_size_, max_voxel_size_, point_cloud_range_, voxel_size_, - downsample_factor_, encoder_in_feature_size_, score_threshold_, circle_nms_dist_threshold_, - yaw_norm_thresholds_, has_variance_); + class_size_, point_feature_size_, cloud_capacity_, max_voxel_size_, point_cloud_range_, + voxel_size_, downsample_factor_, encoder_in_feature_size_, score_threshold_, + circle_nms_dist_threshold_, yaw_norm_thresholds_, has_variance_); centerpoint::VoxelGenerator voxel_generator(param, config); std::vector points; points.resize(capacity_ * config.point_feature_size_); std::fill(points.begin(), points.end(), std::nan("")); - bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_); - bool status2 = voxel_generator.enqueuePointCloud(*cloud2_, *tf2_buffer_); - std::size_t generated_points_num = voxel_generator.generateSweepPoints(points); + auto points_d = cuda::make_unique(capacity_ * config.point_feature_size_); + cudaMemcpy( + points_d.get(), points.data(), capacity_ * config.point_feature_size_ * sizeof(float), + cudaMemcpyHostToDevice); + + bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_, stream_); + bool status2 = voxel_generator.enqueuePointCloud(*cloud2_, *tf2_buffer_, stream_); + std::size_t generated_points_num = voxel_generator.generateSweepPoints(points_d.get(), stream_); + + cudaMemcpy( + points.data(), points_d.get(), capacity_ * config.point_feature_size_ * sizeof(float), + cudaMemcpyDeviceToHost); EXPECT_FALSE(status1); EXPECT_FALSE(status2); @@ -171,21 +192,30 @@ TEST_F(VoxelGeneratorTest, TwoFrames) centerpoint::DensificationParam param(world_frame_, num_past_frames); centerpoint::CenterPointConfig config( - class_size_, point_feature_size_, max_voxel_size_, point_cloud_range_, voxel_size_, - downsample_factor_, encoder_in_feature_size_, score_threshold_, circle_nms_dist_threshold_, - yaw_norm_thresholds_, has_variance_); + class_size_, point_feature_size_, cloud_capacity_, max_voxel_size_, point_cloud_range_, + voxel_size_, downsample_factor_, encoder_in_feature_size_, score_threshold_, + circle_nms_dist_threshold_, yaw_norm_thresholds_, has_variance_); centerpoint::VoxelGenerator voxel_generator(param, config); std::vector points; points.resize(capacity_ * config.point_feature_size_); std::fill(points.begin(), points.end(), std::nan("")); + auto points_d = cuda::make_unique(capacity_ * config.point_feature_size_); + cudaMemcpy( + points_d.get(), points.data(), capacity_ * config.point_feature_size_ * sizeof(float), + cudaMemcpyHostToDevice); + tf2_buffer_->setTransform(transform1_, "authority1"); tf2_buffer_->setTransform(transform2_, "authority1"); - bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_); - bool status2 = voxel_generator.enqueuePointCloud(*cloud2_, *tf2_buffer_); - std::size_t generated_points_num = voxel_generator.generateSweepPoints(points); + bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_, stream_); + bool status2 = voxel_generator.enqueuePointCloud(*cloud2_, *tf2_buffer_, stream_); + std::size_t generated_points_num = voxel_generator.generateSweepPoints(points_d.get(), stream_); + + cudaMemcpy( + points.data(), points_d.get(), capacity_ * config.point_feature_size_ * sizeof(float), + cudaMemcpyDeviceToHost); EXPECT_TRUE(status1); EXPECT_TRUE(status2); diff --git a/perception/lidar_centerpoint/test/test_voxel_generator.hpp b/perception/lidar_centerpoint/test/test_voxel_generator.hpp index 8fb7d8dffa44d..5ca97fedc368c 100644 --- a/perception/lidar_centerpoint/test/test_voxel_generator.hpp +++ b/perception/lidar_centerpoint/test/test_voxel_generator.hpp @@ -33,34 +33,37 @@ class VoxelGeneratorTest : public testing::Test void TearDown() override; // These need to be public so that they can be accessed in the test cases - rclcpp::Node::SharedPtr node_; + rclcpp::Node::SharedPtr node_{}; - std::unique_ptr cloud1_, cloud2_; - geometry_msgs::msg::TransformStamped transform1_, transform2_; + std::unique_ptr cloud1_{}, cloud2_{}; + geometry_msgs::msg::TransformStamped transform1_{}, transform2_{}; - std::unique_ptr densification_param_ptr_; - std::unique_ptr config_ptr_; + std::unique_ptr densification_param_ptr_{}; + std::unique_ptr config_ptr_{}; - std::unique_ptr tf2_buffer_; + std::unique_ptr tf2_buffer_{}; - std::string world_frame_; - std::string lidar_frame_; - std::size_t points_per_pointcloud_; - std::size_t capacity_; - double delta_pointcloud_x_; + std::string world_frame_{}; + std::string lidar_frame_{}; + std::size_t points_per_pointcloud_{}; + std::size_t capacity_{}; + double delta_pointcloud_x_{}; - std::size_t class_size_; - float point_feature_size_; - std::size_t max_voxel_size_; + std::size_t class_size_{}; + float point_feature_size_{}; + std::size_t cloud_capacity_{}; + std::size_t max_voxel_size_{}; - std::vector point_cloud_range_; - std::vector voxel_size_; - std::size_t downsample_factor_; - std::size_t encoder_in_feature_size_; - float score_threshold_; - float circle_nms_dist_threshold_; - std::vector yaw_norm_thresholds_; - bool has_variance_; + std::vector point_cloud_range_{}; + std::vector voxel_size_{}; + std::size_t downsample_factor_{}; + std::size_t encoder_in_feature_size_{}; + float score_threshold_{}; + float circle_nms_dist_threshold_{}; + std::vector yaw_norm_thresholds_{}; + bool has_variance_{}; + + cudaStream_t stream_{}; }; #endif // TEST_VOXEL_GENERATOR_HPP_ diff --git a/perception/lidar_transfusion/config/transfusion.param.yaml b/perception/lidar_transfusion/config/transfusion.param.yaml index feabe359caf1f..2f1d56872dad3 100644 --- a/perception/lidar_transfusion/config/transfusion.param.yaml +++ b/perception/lidar_transfusion/config/transfusion.param.yaml @@ -3,6 +3,7 @@ # network class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"] trt_precision: fp16 + cloud_capacity: 2000000 voxels_num: [5000, 30000, 60000] # [min, opt, max] point_cloud_range: [-76.8, -76.8, -3.0, 76.8, 76.8, 5.0] # [x_min, y_min, z_min, x_max, y_max, z_max] voxel_size: [0.3, 0.3, 8.0] # [x, y, z] diff --git a/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp index 5c25a936d5392..0f7c72d065193 100644 --- a/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp +++ b/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp @@ -89,13 +89,13 @@ cuda::unique_ptr make_unique() return cuda::unique_ptr{p}; } -constexpr size_t CUDA_ALIGN = 256; +constexpr std::size_t CUDA_ALIGN = 256; template -inline size_t get_size_aligned(size_t num_elem) +inline std::size_t get_size_aligned(size_t num_elem) { - size_t size = num_elem * sizeof(T); - size_t extra_align = 0; + std::size_t size = num_elem * sizeof(T); + std::size_t extra_align = 0; if (size % CUDA_ALIGN != 0) { extra_align = CUDA_ALIGN - size % CUDA_ALIGN; } @@ -103,9 +103,9 @@ inline size_t get_size_aligned(size_t num_elem) } template -inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size) +inline T * get_next_ptr(size_t num_elem, void *& workspace, std::size_t & workspace_size) { - size_t size = get_size_aligned(num_elem); + std::size_t size = get_size_aligned(num_elem); if (size > workspace_size) { throw ::std::runtime_error("Workspace is too small!"); } diff --git a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp index 31976de56a9da..4b1262c545533 100644 --- a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp +++ b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp @@ -25,10 +25,13 @@ class TransfusionConfig { public: TransfusionConfig( - const std::vector & voxels_num, const std::vector & point_cloud_range, - const std::vector & voxel_size, const float circle_nms_dist_threshold, + const std::size_t cloud_capacity, const std::vector & voxels_num, + const std::vector & point_cloud_range, const std::vector & voxel_size, + const std::size_t num_proposals, const float circle_nms_dist_threshold, const std::vector & yaw_norm_thresholds, const float score_threshold) { + cloud_capacity_ = cloud_capacity; + if (voxels_num.size() == 3) { max_voxels_ = voxels_num[2]; @@ -61,6 +64,9 @@ class TransfusionConfig voxel_y_size_ = static_cast(voxel_size[1]); voxel_z_size_ = static_cast(voxel_size[2]); } + if (num_proposals > 0) { + num_proposals_ = num_proposals; + } if (score_threshold > 0.0) { score_threshold_ = score_threshold; } @@ -79,7 +85,7 @@ class TransfusionConfig } ///// INPUT PARAMETERS ///// - const std::size_t cloud_capacity_{1000000}; + std::size_t cloud_capacity_{}; ///// KERNEL PARAMETERS ///// const std::size_t threads_for_voxel_{256}; // threads number for a block const std::size_t points_per_voxel_{20}; @@ -107,7 +113,7 @@ class TransfusionConfig const std::size_t out_size_factor_{4}; const std::size_t max_num_points_per_pillar_{points_per_voxel_}; const std::size_t num_point_values_{4}; - const std::size_t num_proposals_{200}; + std::size_t num_proposals_{200}; // the number of feature maps for pillar scatter const std::size_t num_feature_scatter_{pillar_feature_size_}; // the score threshold for classification diff --git a/perception/lidar_transfusion/schema/transfusion.schema.json b/perception/lidar_transfusion/schema/transfusion.schema.json index 41d8d887236a8..0bbbdc888a261 100644 --- a/perception/lidar_transfusion/schema/transfusion.schema.json +++ b/perception/lidar_transfusion/schema/transfusion.schema.json @@ -21,6 +21,12 @@ "default": "fp16", "enum": ["fp16", "fp32"] }, + "cloud_capacity": { + "type": "integer", + "description": "Capacity of the point cloud buffer (should be set to at least the maximum theoretical number of points).", + "default": 2000000, + "minimum": 1 + }, "voxels_num": { "type": "array", "items": { diff --git a/perception/lidar_transfusion/src/lidar_transfusion_node.cpp b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp index e3ea6b3780de8..a07e385208748 100644 --- a/perception/lidar_transfusion/src/lidar_transfusion_node.cpp +++ b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp @@ -27,10 +27,14 @@ LidarTransfusionNode::LidarTransfusionNode(const rclcpp::NodeOptions & options) class_names_ = this->declare_parameter>("class_names", descriptor); const std::string trt_precision = this->declare_parameter("trt_precision", descriptor); + const std::size_t cloud_capacity = + this->declare_parameter("cloud_capacity", descriptor); const auto voxels_num = this->declare_parameter>("voxels_num", descriptor); const auto point_cloud_range = this->declare_parameter>("point_cloud_range", descriptor); const auto voxel_size = this->declare_parameter>("voxel_size", descriptor); + const std::size_t num_proposals = + this->declare_parameter("num_proposals", descriptor); const std::string onnx_path = this->declare_parameter("onnx_path", descriptor); const std::string engine_path = this->declare_parameter("engine_path", descriptor); @@ -73,8 +77,8 @@ LidarTransfusionNode::LidarTransfusionNode(const rclcpp::NodeOptions & options) DensificationParam densification_param( densification_world_frame_id, densification_num_past_frames); TransfusionConfig config( - voxels_num, point_cloud_range, voxel_size, circle_nms_dist_threshold, yaw_norm_thresholds, - score_threshold); + cloud_capacity, voxels_num, point_cloud_range, voxel_size, num_proposals, + circle_nms_dist_threshold, yaw_norm_thresholds, score_threshold); const auto allow_remapping_by_area_matrix = this->declare_parameter>("allow_remapping_by_area_matrix", descriptor);