diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 91c198a2..41d92cd0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,16 +7,29 @@ jobs: # https://help.github.com/en/articles/virtual-environments-for-github-actions#supported-virtual-environments runs-on: ubuntu-18.04 + strategy: + matrix: + python-version: [3.6, 3.7, 3.8] steps: - - uses: actions/checkout@v1 - - run: sudo apt-get install libopencv-dev libgflags-dev # dependencies - - run: sh scripts/download-test-data.sh - - run: sh scripts/download-tinyvgg-model.sh - - run: sh scripts/download-openpose-thin-model.sh - - run: sh scripts/download-openpose-res50-model.sh - - run: sh scripts/download-openpose-coco-model.sh - - run: cmake . -DBUILD_TESTS=1 -DBUILD_FAKE=1 -DBUILD_EXAMPLES=1 -DBUILD_LIB=1 -DBUILD_USER_CODES=0 -DEXECUTABLE_OUTPUT_PATH=./bin + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Initialize Python Env + run: python3 -m pip install --upgrade pip + - name: Install System Dependencies + run: sudo apt-get install libopencv-dev libgflags-dev # dependencies + - name: Check download scripts. + run: | + sh scripts/download-test-data.sh + sh scripts/download-tinyvgg-model.sh + sh scripts/download-openpose-thin-model.sh + sh scripts/download-openpose-res50-model.sh + sh scripts/download-openpose-coco-model.sh + - name: Build Project(NO GPU) + run: cmake . -DBUILD_TESTS=1 -DBUILD_FAKE=1 -DBUILD_EXAMPLES=1 -DBUILD_LIB=1 -DBUILD_USER_CODES=0 -DEXECUTABLE_OUTPUT_PATH=./bin - run: cmake --build . --config Release # - run: ctest -C Release diff --git a/.gitignore b/.gitignore index c3a5e31e..e68640fc 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ venv _build docs/make.bat examples/user_codes/*.cpp +debug.* !docs/Makefile !docs/markdown/images/* \ No newline at end of file diff --git a/README.md b/README.md index 62ef7cc9..ff0a7d9d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ HyperPose is a library for building human pose estimation systems that can efficiently operate in the wild. -> **Note**: We are in the process of migrating our APIs from 1.0 to 2.0. The migration is expected to finish by July 2020. +> **News**: The PoseProposal inference model is released! See the HyperPose models on [Google Drive](https://drive.google.com/drive/folders/1w9EjMkrjxOmMw3Rf6fXXkiv_ge7M99jR?usp=sharing). ## Features @@ -19,14 +19,14 @@ You can install HyperPose and learn its APIs through [Documentation](https://hyp ## Example -We provide an example to show human pose estimation achieved by HyperPose. You need to install CUDA Toolkit 10+, TensorRT 7+, OpenCV 3.2+ and gFlags (cmake version), and enable C++ 17 support. Once the prerequisite are ready, run the following script: +We provide an example to show human pose estimation achieved by HyperPose. You need to install CUDA Toolkit 10+, TensorRT 7+, OpenCV 3.2+ and gFlags (cmake version), and enable C++ 17 support. Once the prerequisite are met, run the following script: ```bash -sudo apt -y install git cmake build-essential subversion curl libgflags-dev # libopencv-dev # [optional] +sudo apt -y install git cmake build-essential subversion libgflags-dev libopencv-dev sh scripts/download-test-data.sh # Install data for examples. sh scripts/download-tinyvgg-model.sh # Install tiny-vgg model. mkdir build && cd build -cmake .. -DCMAKE_BUILD_TYPE=RELEASE && make -j$(nproc) # Build library && examples. +cmake .. -DCMAKE_BUILD_TYPE=RELEASE && make -j # Build library && examples. ./example.operator_api_batched_images_paf # The ouput images will be in the build folder. ``` diff --git a/docs/markdown/design/design.md b/docs/markdown/design/design.md index 82dbfd55..086c1fac 100644 --- a/docs/markdown/design/design.md +++ b/docs/markdown/design/design.md @@ -47,7 +47,7 @@ int main() { using namespace hyperpose; const cv::Size network_resolution{384, 256}; - const dnn::uff uff_model{ "../data/models/hao28-600000-256x384.uff", "image", {"outputs/conf", "outputs/paf"} }; + const dnn::uff uff_model{ "../data/models/TinyVGG-V1-HW=256x384.uff", "image", {"outputs/conf", "outputs/paf"} }; // * Input video. auto capture = cv::VideoCapture("../data/media/video.avi"); @@ -106,7 +106,7 @@ int main() { using namespace hyperpose; const cv::Size network_resolution{384, 256}; - const dnn::uff uff_model{ "../data/models/hao28-600000-256x384.uff", "image", {"outputs/conf", "outputs/paf"} }; + const dnn::uff uff_model{ "../data/models/TinyVGG-V1-HW=256x384.uff", "image", {"outputs/conf", "outputs/paf"} }; // * Input video. auto capture = cv::VideoCapture("../data/media/video.avi"); diff --git a/docs/markdown/install/prediction.md b/docs/markdown/install/prediction.md index 435fe6ab..e61c4c9f 100644 --- a/docs/markdown/install/prediction.md +++ b/docs/markdown/install/prediction.md @@ -6,10 +6,13 @@ * CMake 3.5+ * Third-Party * OpenCV3.2+. - * [CUDA 10](https://developer.nvidia.com/cuda-downloads), [TensorRT 7](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt_304/tensorrt-install-guide/index.html). + * [CUDA 10.2](https://developer.nvidia.com/cuda-downloads), [CuDNN 7.6.5](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html), [TensorRT 7.0](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). (For Linux users, [Debian Installation](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing-debian) is highly recommended.) * gFlags(optional, for examples/tests) -> Older versions of the packages may also work but not tested. +> Other versions of the packages may also work but not tested. + +> Different TensorRT version requires specific CUDA and CuDNN version. For specific CUDA and CuDNN requirements of TensorRT7, please refer to [this](https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#platform-matrix). +> Also, for Ubuntu 18.04 users, this [3rd party blog](https://ddkang.github.io/2020/01/02/installing-tensorrt.html) may help you. ## Build On Ubuntu 18.04 @@ -18,7 +21,7 @@ sudo apt -y install cmake libopencv-dev # You may also install OpenCV from source to get best performance. -# >>> Install CUDA/TensorRT +# >>> Install CUDA/CuDNN/TensorRT # >>> Build gFlags(Optional) from source. Install it if you want to run the examples. wget https://github.com/gflags/gflags/archive/v2.2.2.zip @@ -32,7 +35,7 @@ sudo make install git clone https://github.com/tensorlayer/hyperpose.git cd hyperpose mkdir build && cd build -cmake .. -DCMAKE_BUILD_TYPE=RELEASE && make -j$(nproc) +cmake .. -DCMAKE_BUILD_TYPE=Release && make -j ``` ## Build with User Codes diff --git a/docs/markdown/performance/prediction.md b/docs/markdown/performance/prediction.md index 724454df..d0c67d06 100644 --- a/docs/markdown/performance/prediction.md +++ b/docs/markdown/performance/prediction.md @@ -13,8 +13,6 @@ > **Environment**: System@Ubuntu18.04, GPU@1070Ti, CPU@i7(12 logic cores). > > **Tested Video Source**: Crazy Updown Funk(resolution@640x360, frame_count@7458, source@[YouTube](https://www.youtube.com/watch?v=2DiQUX11YaY)) -> -> **Availability**: All model above are available [here](https://github.com/tensorlayer/pretrained-models/tree/master/models/hyperpose). > OpenPose performance is not tested with batch processing as it seems not to be implemented. (see [here](https://github.com/CMU-Perceptual-Computing-Lab/openpose/issues/100)) diff --git a/docs/markdown/performance/supports.md b/docs/markdown/performance/supports.md index e57b8943..5d2e18d4 100644 --- a/docs/markdown/performance/supports.md +++ b/docs/markdown/performance/supports.md @@ -16,11 +16,8 @@ ### Supported Post-Processing Methods - Part Association Field(PAF) -- Pose Proposal Networks(Coming Soon) +- Pose Proposal Networks ### Released Prediction Models -- [Tiny VGG](https://github.com/tensorlayer/pretrained-models/blob/master/models/hyperpose/hao28-600000-256x384.uff) -- [OpenPose-COCO](https://github.com/tensorlayer/pretrained-models/blob/master/models/hyperpose/openpose_coco.onnx) -- [OpenPose-Thin](https://github.com/tensorlayer/pretrained-models/blob/master/models/hyperpose/openpose_thin.onnx) -- [ResNet18(for PAF)](https://github.com/tensorlayer/pretrained-models/blob/master/models/hyperpose/lopps_resnet50.onnx) \ No newline at end of file +We released the models on [Google Drive](TinyVGG-V1-HW=256x384.uff). `.onnx` and `.uff` files are for inference. diff --git a/docs/markdown/quick_start/prediction.md b/docs/markdown/quick_start/prediction.md index 3794c6f2..cb6d0a13 100644 --- a/docs/markdown/quick_start/prediction.md +++ b/docs/markdown/quick_start/prediction.md @@ -30,9 +30,10 @@ sh scripts/download-openpose-thin-model.sh # ~20 MB sh scripts/download-tinyvgg-model.sh # ~30 MB sh scripts/download-openpose-res50-model.sh # ~45 MB sh scripts/download-openpose-coco-model.sh # ~200 MB +sh scripts/download-ppn-res50-model.sh # ~50 MB (PoseProposal Algorithm) ``` -> You can download them manually to `${HyperPose}/data/models/` via [LINK](https://github.com/tensorlayer/pretrained-models/tree/master/models/hyperpose) **if the network is not working**. +> You can download them manually to `${HyperPose}/data/models/` via [LINK](https://drive.google.com/drive/folders/1w9EjMkrjxOmMw3Rf6fXXkiv_ge7M99jR?usp=sharing) **if the network is not working**. ## Predict a sequence of images @@ -46,7 +47,7 @@ sh scripts/download-openpose-coco-model.sh # ~200 MB # Take images in ../data/media as a big batch and do prediction. ./example.operator_api_batched_images_paf -# The same as: `./example.operator_api_batched_images_paf --model_file ../data/models/hao28-600000-256x384.uff --input_folder ../data/media --input_width 384 --input_height 256` +# The same as: `./example.operator_api_batched_images_paf --model_file ../data/models/TinyVGG-V1-HW=256x384.uff --input_folder ../data/media --input_width 384 --input_height 256` ``` The output images will be in the build folder. @@ -54,9 +55,15 @@ The output images will be in the build folder. ### Using a precise model ```bash -./example.operator_api_batched_images_paf --model_file ../data/models/openpose_thin.onnx --input_width 432 --input_height 368 +./example.operator_api_batched_images_paf --model_file ../data/models/openpose-thin-V2-HW=368x432.onnx --input_width 432 --input_height 368 -./example.operator_api_batched_images_paf --model_file ../data/models/openpose_coco.onnx --input_width 656 --input_height 368 +./example.operator_api_batched_images_paf --model_file ../data/models/openpose-coco-V2-HW=368x656.onnx --input_width 656 --input_height 368 +``` + +### Use PoseProposal model + +```bash +./example.operator_api_batched_images_pose_proposal --model_file ../data/models/ppn-resnet50-V2-HW=384x384.onnx --input_width 368 --input_height 368 ``` ### Convert models into TensorRT Engine Protobuf format diff --git a/docs/markdown/tutorial/faq.md b/docs/markdown/tutorial/faq.md index 7159dc6f..f011834c 100644 --- a/docs/markdown/tutorial/faq.md +++ b/docs/markdown/tutorial/faq.md @@ -24,7 +24,7 @@ Refer to [here](https://www.learnopencv.com/tag/install/). Download them manually: -- All prediction models are available [here](https://github.com/tensorlayer/pretrained-models/tree/master/models/hyperpose). +- All prediction models are available on [Google Drive](https://drive.google.com/drive/folders/1w9EjMkrjxOmMw3Rf6fXXkiv_ge7M99jR?usp=sharing). - The test data are taken from the [OpenPose Project](https://github.com/CMU-Perceptual-Computing-Lab/openpose/tree/master/examples/media). ## Training @@ -34,7 +34,7 @@ Download them manually: ### TensorRT Error? - See the `tensorrt.log`. (it contains more informations about logging and is located in where you execute the binary) -- You may meet `ERROR: Tensor image cannot be both input and output` when using the `hao28-600000-256x384.uff` model. And just ignore it. +- You may meet `ERROR: Tensor image cannot be both input and output` when using the `TinyVGG-V1-HW=256x384.uff` model. And just ignore it. ### Performance? diff --git a/docs/markdown/tutorial/prediction.md b/docs/markdown/tutorial/prediction.md index 69b4cf32..0fa1ea95 100644 --- a/docs/markdown/tutorial/prediction.md +++ b/docs/markdown/tutorial/prediction.md @@ -133,7 +133,7 @@ int main() { using namespace hyperpose; const cv::Size network_resolution{384, 256}; - const dnn::uff uff_model{ "../data/models/hao28-600000-256x384.uff", "image", {"outputs/conf", "outputs/paf"} }; + const dnn::uff uff_model{ "../data/models/TinyVGG-V1-HW=256x384.uff", "image", {"outputs/conf", "outputs/paf"} }; // * Input video. auto capture = cv::VideoCapture("../data/media/video.avi"); diff --git a/examples/gen_serialized_engine.example.cpp b/examples/gen_serialized_engine.example.cpp index c1901f77..f172a930 100644 --- a/examples/gen_serialized_engine.example.cpp +++ b/examples/gen_serialized_engine.example.cpp @@ -4,7 +4,7 @@ #include // Model flags -DEFINE_string(model_file, "../data/models/hao28-600000-256x384.uff", "Path to uff model."); +DEFINE_string(model_file, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to uff model."); DEFINE_bool(logging, false, "Print the logging information or not."); DEFINE_string(input_name, "image", "The input node name of your model file. (for Uff model, input/output name tags required)"); diff --git a/examples/operator_api_batched_images_paf.example.cpp b/examples/operator_api_batched_images_paf.example.cpp index a4ce674b..1464d526 100644 --- a/examples/operator_api_batched_images_paf.example.cpp +++ b/examples/operator_api_batched_images_paf.example.cpp @@ -4,7 +4,7 @@ #include // Model flags -DEFINE_string(model_file, "../data/models/hao28-600000-256x384.uff", "Path to uff model."); +DEFINE_string(model_file, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to uff model."); DEFINE_bool(logging, false, "Print the logging information or not."); diff --git a/examples/operator_api_batched_images_pose_proposal.example.cpp b/examples/operator_api_batched_images_pose_proposal.example.cpp index cadaa4d7..7371259e 100644 --- a/examples/operator_api_batched_images_pose_proposal.example.cpp +++ b/examples/operator_api_batched_images_pose_proposal.example.cpp @@ -4,7 +4,7 @@ #include // Model flags -DEFINE_string(model_file, "../data/models/ppn.onnx", "Path to uff model."); +DEFINE_string(model_file, "../data/models/ppn-resnet50-V2-HW=384x384.onnx", "Path to uff model."); DEFINE_int32(input_width, 384, "Width of input image."); DEFINE_int32(input_height, 384, "Height of input image."); diff --git a/examples/operator_api_imshow_paf.example.cpp b/examples/operator_api_imshow_paf.example.cpp index bb787cb4..9c1be59f 100644 --- a/examples/operator_api_imshow_paf.example.cpp +++ b/examples/operator_api_imshow_paf.example.cpp @@ -3,7 +3,7 @@ #include // Model flags -DEFINE_string(model_file, "../data/models/hao28-600000-256x384.uff", "Path to uff model."); +DEFINE_string(model_file, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to uff model."); DEFINE_string(input_name, "image", "The input node name of your uff model file."); DEFINE_string(output_name_list, "outputs/conf,outputs/paf", "The output node names(maybe more than one) of your uff model file."); diff --git a/examples/operator_api_imshow_pose_proposal.example.cpp b/examples/operator_api_imshow_pose_proposal.example.cpp new file mode 100644 index 00000000..b6f08473 --- /dev/null +++ b/examples/operator_api_imshow_pose_proposal.example.cpp @@ -0,0 +1,82 @@ +#include "utils.hpp" +#include +#include + +// Model flags +DEFINE_string(model_file, "../data/models/ppn-resnet50-V2-HW=384x384.onnx", "Path to uff model."); +DEFINE_int32(input_width, 384, "Width of input image."); +DEFINE_int32(input_height, 384, "Height of input image."); + +DEFINE_bool(logging, false, "Print the logging information or not."); + +DEFINE_string(input_video, "../data/media/video.avi", "The input video path."); +DEFINE_bool(camera, false, "Using the camera as input video."); + +int main(int argc, char** argv) +{ + gflags::ParseCommandLineFlags(&argc, &argv, true); + + // * Input video. + auto capture = FLAGS_camera ? cv::VideoCapture(0) : cv::VideoCapture(FLAGS_input_video); + if (!capture.isOpened()) + example_log() << "Cannot open cv::VideoCapture."; + + // * Create TensorRT engine. + namespace hp = hyperpose; + if (FLAGS_logging) + hp::enable_logging(); + + auto engine = [&] { + using namespace hp::dnn; + constexpr std::string_view onnx_suffix = ".onnx"; + constexpr std::string_view uff_suffix = ".uff"; + + if (std::equal(onnx_suffix.crbegin(), onnx_suffix.crend(), FLAGS_model_file.crbegin())) + return tensorrt(onnx{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, 1); + + example_log() << "Your model file's suffix is not [.onnx | .uff]. Your model file path: " << FLAGS_model_file; + example_log() << "Trying to be viewed as a serialized TensorRT model."; + + return tensorrt(tensorrt_serialized{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, 1); + }(); + + // * post-processing: Using Pose Proposal. + hp::parser::pose_proposal parser{ engine.input_size() }; + + using clk_t = std::chrono::high_resolution_clock; + + example_log() << "Inference Started. Use ESC to quit."; + + while (capture.isOpened()) { + + cv::Mat mat; + capture >> mat; + if (mat.empty()) { + example_log() << "Got empty cv::Mat"; + break; + } + + auto beg = clk_t::now(); + + { + // * TensorRT Inference. + auto feature_maps = engine.inference({ mat }); + + // * Post-Processing. + auto poses = parser.process(feature_maps.front()); + + for (auto&& pose : poses) + hp::draw_human(mat, pose); + } + + double fps = 1000. / std::chrono::duration(clk_t::now() - beg).count(); + + cv::putText(mat, "FPS: " + std::to_string(fps), { 10, 10 }, cv::FONT_HERSHEY_SIMPLEX, 0.5, { 0, 255, 0 }, 2); + cv::imshow("HyperPose Prediction", mat); + + if (cv::waitKey(1) == 27) + break; + } + + example_log() << "Inference Done!"; +} \ No newline at end of file diff --git a/examples/operator_api_video_paf.example.cpp b/examples/operator_api_video_paf.example.cpp index 6be5a5c5..ac975e84 100644 --- a/examples/operator_api_video_paf.example.cpp +++ b/examples/operator_api_video_paf.example.cpp @@ -3,7 +3,7 @@ #include // Model flags -DEFINE_string(model_file, "../data/models/hao28-600000-256x384.uff", "Path to uff model."); +DEFINE_string(model_file, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to uff model."); DEFINE_string(input_name, "image", "The input node name of your uff model file."); DEFINE_string(output_name_list, "outputs/conf,outputs/paf", "The output node names(maybe more than one) of your uff model file."); diff --git a/examples/operator_api_video_pose_proposal.example.cpp b/examples/operator_api_video_pose_proposal.example.cpp index 96743388..5cdc923f 100644 --- a/examples/operator_api_video_pose_proposal.example.cpp +++ b/examples/operator_api_video_pose_proposal.example.cpp @@ -3,7 +3,7 @@ #include // Model flags -DEFINE_string(model_file, "../data/models/ppn.onnx", "Path to uff model."); +DEFINE_string(model_file, "../data/models/ppn-resnet50-V2-HW=384x384.onnx", "Path to uff model."); DEFINE_int32(input_width, 384, "Width of input image."); DEFINE_int32(input_height, 384, "Height of input image."); DEFINE_int32(max_batch_size, 8, "Max batch size for inference engine to execute."); @@ -52,7 +52,7 @@ int main(int argc, char** argv) }(); // * post-processing: Using Pose Proposal - hp::parser::pose_proposal parser{ engine.input_size(), 0.05 }; + hp::parser::pose_proposal parser{ engine.input_size() }; using clk_t = std::chrono::high_resolution_clock; diff --git a/examples/stream_api_video_paf.example.cpp b/examples/stream_api_video_paf.example.cpp index ed37f427..36b8670d 100644 --- a/examples/stream_api_video_paf.example.cpp +++ b/examples/stream_api_video_paf.example.cpp @@ -3,7 +3,7 @@ #include #include -DEFINE_string(model_file, "../data/models/hao28-600000-256x384.uff", +DEFINE_string(model_file, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to uff model."); DEFINE_string(input_name, "image", "The input node name of your uff model file."); DEFINE_string(output_name_list, "outputs/conf,outputs/paf", "The output node names(maybe more than one) of your uff model file."); diff --git a/examples/stream_api_video_pose_proposal.example.cpp b/examples/stream_api_video_pose_proposal.example.cpp index 6f251082..ca3672ec 100644 --- a/examples/stream_api_video_pose_proposal.example.cpp +++ b/examples/stream_api_video_pose_proposal.example.cpp @@ -3,7 +3,7 @@ #include #include -DEFINE_string(model_file, "../data/models/ppn.onnx", "Path to uff model."); +DEFINE_string(model_file, "../data/models/ppn-resnet50-V2-HW=384x384.onnx", "Path to uff model."); DEFINE_int32(input_width, 384, "Width of input image."); DEFINE_int32(input_height, 384, "Height of input image."); DEFINE_int32(max_batch_size, 8, "Max batch size for inference engine to execute."); diff --git a/examples/tutorial/minimum_operator_api_video.example.cpp b/examples/tutorial/minimum_operator_api_video.example.cpp index 9d8068f2..eeef9c3e 100644 --- a/examples/tutorial/minimum_operator_api_video.example.cpp +++ b/examples/tutorial/minimum_operator_api_video.example.cpp @@ -5,7 +5,7 @@ int main() using namespace hyperpose; const cv::Size network_resolution{ 384, 256 }; - const dnn::uff uff_model{ "../data/models/hao28-600000-256x384.uff", "image", { "outputs/conf", "outputs/paf" } }; + const dnn::uff uff_model{ "../data/models/TinyVGG-V1-HW=256x384.uff", "image", { "outputs/conf", "outputs/paf" } }; // * Input video. auto capture = cv::VideoCapture("../data/media/video.avi"); diff --git a/examples/tutorial/minimum_stream_api_video.example.cpp b/examples/tutorial/minimum_stream_api_video.example.cpp index df7ef290..7474295d 100644 --- a/examples/tutorial/minimum_stream_api_video.example.cpp +++ b/examples/tutorial/minimum_stream_api_video.example.cpp @@ -5,7 +5,7 @@ int main() using namespace hyperpose; const cv::Size network_resolution{ 384, 256 }; - const dnn::uff uff_model{ "../data/models/hao28-600000-256x384.uff", "image", { "outputs/conf", "outputs/paf" } }; + const dnn::uff uff_model{ "../data/models/TinyVGG-V1-HW=256x384.uff", "image", { "outputs/conf", "outputs/paf" } }; // * Input video. auto capture = cv::VideoCapture("../data/media/video.avi"); diff --git a/include/hyperpose/operator/parser/paf.hpp b/include/hyperpose/operator/parser/paf.hpp index 87572389..350e64f6 100644 --- a/include/hyperpose/operator/parser/paf.hpp +++ b/include/hyperpose/operator/parser/paf.hpp @@ -18,13 +18,13 @@ namespace parser { public: /// \brief Constructor indicating the image size and thresholds. /// - /// \param resolution_size The size(width, height) of expected resolution for the post-processing. /// \param conf_thresh The activation threshold. /// \param paf_thresh The threshold of Part Affinity Field. + /// \param resolution_size The size(width, height) of expected resolution for the post-processing. /// \note Before doing PAF, the (width, height) of feature map will be expanded to `resolution_size` to perform /// a more accurate post processing. And `resolution_size` will be N x the size of first input tensor if it's /// not set. (now, N is 4) - paf(cv::Size resolution_size = cv::Size(UNINITIALIZED_VAL, UNINITIALIZED_VAL), float conf_thresh = 0.05, float paf_thresh = 0.05); + explicit paf(float conf_thresh = 0.05, float paf_thresh = 0.05, cv::Size resolution_size = cv::Size(UNINITIALIZED_VAL, UNINITIALIZED_VAL)); /// \brief Function to process one image. /// @@ -77,12 +77,11 @@ namespace parser { ~paf(); private: - cv::Size m_resolution_size; - float m_paf_thresh, m_conf_thresh; - static constexpr std::nullptr_t UNINITIALIZED_PTR = nullptr; static constexpr int UNINITIALIZED_VAL = -1; + float m_conf_thresh, m_paf_thresh; + cv::Size m_resolution_size; int m_n_joints = UNINITIALIZED_VAL, m_n_connections = UNINITIALIZED_VAL; cv::Size m_feature_size = { UNINITIALIZED_VAL, UNINITIALIZED_VAL }; diff --git a/include/hyperpose/operator/parser/proposal_network.hpp b/include/hyperpose/operator/parser/proposal_network.hpp index 718fc317..948e4be5 100644 --- a/include/hyperpose/operator/parser/proposal_network.hpp +++ b/include/hyperpose/operator/parser/proposal_network.hpp @@ -1,29 +1,53 @@ #pragma once +/// \file proposal_network.hpp +/// \brief The post-processing implementation of Pose Proposal Network. + #include "../../utility/data.hpp" #include #include +#include namespace hyperpose { namespace parser { + /// \brief The post-processing implementation of Pose Proposal Network. + /// \see https://openaccess.thecvf.com/content_ECCV_2018/papers/Sekii_Pose_Proposal_Networks_ECCV_2018_paper.pdf class pose_proposal { public: - pose_proposal(cv::Size net_resolution, float point_thresh = 0.15, float limb_thresh = 0.02, float mns_thresh = 0.3, int max_person = 32) - : m_net_resolution(net_resolution) - , m_point_thresh(point_thresh) - , m_limb_thresh(limb_thresh) - , m_nms_thresh(mns_thresh) - , m_max_person(max_person) - { - } + /// \brief Constructor of pose_proposal. + /// + /// \param net_resolution The input resolution of the DNN model. + /// \param point_thresh The threshold of key points. + /// \param limb_thresh The threshold of limbs. + /// \param mns_thresh The threshold of NMS algorithm. + /// \note Example of `net_resolution`: If the input resolution of your DNN model is (384 x 384), then that is the parameter. + explicit pose_proposal(cv::Size net_resolution, float point_thresh = 0.10, float limb_thresh = 0.05, float mns_thresh = 0.3); + /// \brief Function to infer the pose topology of given tensor. + /// + /// \param conf_point + /// \param conf_iou + /// \param x + /// \param y + /// \param w + /// \param h + /// \param edge + /// + /// \note To use this function, the output of your PoseProposal model should be 6 tensors: `[key point confidence, iou conf, center_x, center_y, box_width, box_height, edge confidence]`. + /// This is natively supported by our training framework. + /// + /// \return A list of inferred human poses. std::vector process( const feature_map_t& conf_point, const feature_map_t& conf_iou, const feature_map_t& x, const feature_map_t& y, const feature_map_t& w, const feature_map_t& h, const feature_map_t& edge); + /// \brief Another form of parsing function. + /// + /// \param feature_map_list A list of tensors as shown in another `process` function. + /// \return A list of inferred human poses. inline std::vector process(const std::vector& feature_map_list) { assert(feature_map_list.size() == 7); @@ -37,17 +61,23 @@ namespace parser { feature_map_list.at(6)); } + /// \brief Set the key point threshold. + /// \param thresh key point threshold. void set_point_thresh(float thresh); + + /// \brief Set the limb threshold. + /// \param thresh limb threshold. void set_limb_thresh(float thresh); + + /// \brief Set the NMS threshold. + /// \param thresh NMS threshold. void set_nms_thresh(float thresh); - void set_max_person(int n_person); private: cv::Size m_net_resolution; float m_point_thresh; float m_limb_thresh; float m_nms_thresh; - int m_max_person; }; } diff --git a/scripts/auto-format.sh b/scripts/auto-format.sh index bac428a4..0c4d7c7c 100755 --- a/scripts/auto-format.sh +++ b/scripts/auto-format.sh @@ -2,6 +2,8 @@ set -e export CLANG_FORMAT=clang-format +[ $(which $CLANG_FORMAT) ] || python3 -m pip install clang-format + format_dir() { find $1 -regex '.*\.\(cpp\|hpp\|cc\|cxx\)' -exec $CLANG_FORMAT -style=file -i {} \; } diff --git a/scripts/download-openpose-coco-model.sh b/scripts/download-openpose-coco-model.sh index 4c351f57..66ef3172 100755 --- a/scripts/download-openpose-coco-model.sh +++ b/scripts/download-openpose-coco-model.sh @@ -2,17 +2,23 @@ set -e -model_name="openpose_coco.onnx" +[ $(which gdown) ] || (echo "Downloading gdown via PIP" && python3 -m pip install gdown) + +model_name="openpose-coco-V2-HW=368x656.onnx" model_md5="9f422740c7d41d93d6fe16408b0274ef" +gdrive_file_id="15A0SQyPlU2W-Btcf6Ngi6DY0_1CY50d7" cd $(dirname $0) mkdir -p ../data/models cd ../data/models -if [ ! -f "$model_name" -o "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then +if [ ! -f "$model_name" ] || [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then echo "Installing $model_name ..." - URL="https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/hyperpose/$model_name" - curl -vLOJ $URL + python3 -c "import gdown ; gdown.download('"https://drive.google.com/uc?id=$gdrive_file_id"', quiet=False)" fi -echo "$model_name installed!" +if [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then + echo "Failed to install $model_name. The MD5 code doesn't match!" +else + echo "$model_name installed!" +fi \ No newline at end of file diff --git a/scripts/download-openpose-res50-model.sh b/scripts/download-openpose-res50-model.sh index 50daaf4b..7a51f54b 100644 --- a/scripts/download-openpose-res50-model.sh +++ b/scripts/download-openpose-res50-model.sh @@ -2,17 +2,23 @@ set -e -model_name="lopps_resnet50.onnx" +[ $(which gdown) ] || (echo "Downloading gdown via PIP" && python3 -m pip install gdown) + +model_name="lopps-resnet50-V2-HW=368x432.onnx" model_md5="38c0ad11c76d23f438e1bd1a32101409" +gdrive_file_id="1tb8jnXkoiscfr-ZVydAALg7dtUwAKdEd" cd $(dirname $0) mkdir -p ../data/models cd ../data/models -if [ ! -f "$model_name" -o "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then +if [ ! -f "$model_name" ] || [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then echo "Installing $model_name ..." - URL="https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/hyperpose/$model_name" - curl -vLOJ $URL + python3 -c "import gdown ; gdown.download('"https://drive.google.com/uc?id=$gdrive_file_id"', quiet=False)" fi -echo "$model_name installed!" +if [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then + echo "Failed to install $model_name. The MD5 code doesn't match!" +else + echo "$model_name installed!" +fi \ No newline at end of file diff --git a/scripts/download-openpose-thin-model.sh b/scripts/download-openpose-thin-model.sh index df4e3b64..786d8099 100644 --- a/scripts/download-openpose-thin-model.sh +++ b/scripts/download-openpose-thin-model.sh @@ -2,17 +2,23 @@ set -e -model_name="openpose_thin.onnx" +[ $(which gdown) ] || (echo "Downloading gdown via PIP" && python3 -m pip install gdown) + +model_name="openpose-thin-V2-HW=368x432.onnx" model_md5="65e26d62fd71dc0047c4c319fa3d9096" +gdrive_file_id="1xqXNFPJgsSjgv-AWdqnobcpRmdIu42eh" cd $(dirname $0) mkdir -p ../data/models cd ../data/models -if [ ! -f "$model_name" -o "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then +if [ ! -f "$model_name" ] || [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then echo "Installing $model_name ..." - URL="https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/hyperpose/$model_name" - curl -vLOJ $URL + python3 -c "import gdown ; gdown.download('"https://drive.google.com/uc?id=$gdrive_file_id"', quiet=False)" fi -echo "$model_name installed!" +if [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then + echo "Failed to install $model_name. The MD5 code doesn't match!" +else + echo "$model_name installed!" +fi \ No newline at end of file diff --git a/scripts/download-ppn-res50-model.sh b/scripts/download-ppn-res50-model.sh new file mode 100644 index 00000000..9fecc378 --- /dev/null +++ b/scripts/download-ppn-res50-model.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -e + +[ $(which gdown) ] || (echo "Downloading gdown via PIP" && python3 -m pip install gdown) + +model_name="ppn-resnet50-V2-HW=384x384.onnx" +model_md5="0d1df2e61c0f550185d562ec67a5f2ca" +gdrive_file_id="1qMSipZ5_QMyRuNQ7ux5isNxwr678ctwG" + +cd $(dirname $0) +mkdir -p ../data/models +cd ../data/models + +if [ ! -f "$model_name" ] || [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then + echo "Installing $model_name ..." + python3 -c "import gdown ; gdown.download('"https://drive.google.com/uc?id=$gdrive_file_id"', quiet=False)" +fi + +if [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then + echo "Failed to install $model_name. The MD5 code doesn't match!" +else + echo "$model_name installed!" +fi \ No newline at end of file diff --git a/scripts/download-tinyvgg-model.sh b/scripts/download-tinyvgg-model.sh index aaf11f2b..1629a29f 100644 --- a/scripts/download-tinyvgg-model.sh +++ b/scripts/download-tinyvgg-model.sh @@ -2,17 +2,23 @@ set -e -model_name="hao28-600000-256x384.uff" +[ $(which gdown) ] || (echo "Downloading gdown via PIP" && python3 -m pip install gdown) + +model_name="TinyVGG-V1-HW=256x384.uff" model_md5="6551931d16e55cc9370c5c13d91383c3" +gdrive_file_id="1KlKjNMaruJnNYEXQKqzHGqECBAmwB92T" cd $(dirname $0) mkdir -p ../data/models cd ../data/models -if [ ! -f "$model_name" -o "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then +if [ ! -f "$model_name" ] || [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then echo "Installing $model_name ..." - URL="https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/hyperpose/$model_name" - curl -vLOJ $URL + python3 -c "import gdown ; gdown.download('"https://drive.google.com/uc?id=$gdrive_file_id"', quiet=False)" fi -echo "$model_name installed!" +if [ "$(md5sum "$model_name" | cut -d ' ' -f 1)" != "$model_md5" ] ; then + echo "Failed to install $model_name. The MD5 code doesn't match!" +else + echo "$model_name installed!" +fi diff --git a/scripts/export-uff.sh b/scripts/export-uff.sh deleted file mode 100755 index bdd73c26..00000000 --- a/scripts/export-uff.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/sh -set -e - -cd $(dirname $0)/.. - -MODEL_DIR=${HOME}/Downloads - -DATA_FORMAT=channels_first # Must use channels_first - -height=256 -width=384 - -export_uff() { - local base_model=$1 - local npz_file=$2 - local uff_file=$3 - - ./export.py \ - --data-format=${DATA_FORMAT} \ - --base-model=${base_model} \ - --path-to-npz=${MODEL_DIR}/${npz_file} \ - --height=${height} \ - --width=${width} \ - --uff-filename=${MODEL_DIR}/${uff_file} - - echo "saved to ${MODEL_DIR}/${uff_file}" -} - -# export_uff vgg vgg450000_no_cpm.npz vgg.uff -# export_uff vggtiny new-models/hao18/pose350000.npz vggtiny.uff -export_uff hao28_experimental hao28/pose345000.npz hao28-${height}x${width}.uff -export_uff hao28_experimental pose600000.npz hao28-600000-${height}x${width}.uff - -# TODO: make mobilenet support NCHW -# export_uff mobilenet mbn280000.npz mobilenet.uff diff --git a/scripts/install-mpi.sh b/scripts/install-mpi.sh deleted file mode 100755 index 15b950e6..00000000 --- a/scripts/install-mpi.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh -set -e - -if [ $(uname) == "Darwin" ]; then - NPROC=$(sysctl -n hw.ncpu) -else - NPROC=$(nproc) -fi - -mkdir -p $HOME/openmpi_tmp && cd $HOME/openmpi_tmp - -MPI_MAJOR=3 -MPI_MINOR=1 - -VERSION=${MPI_MAJOR}.${MPI_MINOR}.1 -FILENAME=openmpi-${VERSION}.tar.bz2 -FOLDER=openmpi-${VERSION} -URL=https://download.open-mpi.org/release/open-mpi/v${MPI_MAJOR}.${MPI_MINOR}/${FILENAME} - -[ ! -f ${FILENAME} ] && curl -vLOJ $URL -tar -xf ${FILENAME} -cd ${FOLDER} - -# will take about 8 min or longer depends on your machine -./configure --prefix=$HOME/local/openmpi -make -j ${NPROC} all -make install - -rm -rf $HOME/openmpi_tmp - -echo 'Update the PATH with OpenMPI bin by running: PATH=$PATH:$HOME/local/openmpi/bin' -echo 'Update the PATH in ~/.bashrc if you want OpenMPI to be ready once the machine start' diff --git a/scripts/install-pafprocess.sh b/scripts/install-pafprocess.sh deleted file mode 100755 index cac6fb2a..00000000 --- a/scripts/install-pafprocess.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -set -e - -paf_install_path="$(dirname $0)/../openpose_plus/inference" -[ ! -d $paf_install_path ] && mkdir $paf_install_path -cd $paf_install_path - -[ ! -d pafprocess ] && svn export https://github.com/ildoonet/tf-pose-estimation/trunk/tf_pose/pafprocess -cd pafprocess - -swig -python -c++ pafprocess.i -python setup.py build_ext --inplace diff --git a/scripts/run-uff-cpp.sh b/scripts/run-uff-cpp.sh deleted file mode 100755 index e57123ce..00000000 --- a/scripts/run-uff-cpp.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/sh -set -e - -make -echo - -MODEL_DIR=$HOME/Downloads -MODEL_FILE=${MODEL_DIR}/hao28-600000-256x384.uff - -repeat=20 -gksize=13 - -run_batch_example() { - local BIN=$(pwd)/cmake-build/$(uname -s)/example-batch-detector - local IMAGES=$(echo $@ | tr ' ' ',') - local batch_size=4 - ${BIN} \ - --input_height=256 \ - --input_width=384 \ - --batch_size=${batch_size} \ - --use_f16 \ - --gauss_kernel_size=${gksize} \ - --repeat ${repeat} \ - --model_file=${MODEL_FILE} \ - --image_files=${IMAGES} -} - -run_stream_example() { - local BIN=$(pwd)/cmake-build/$(uname -s)/example-stream-detector - local IMAGES=$(echo $@ | tr ' ' ',') - local buffer_size=4 - ${BIN} \ - --input_height=256 \ - --input_width=384 \ - --buffer_size=${buffer_size} \ - --use_f16 \ - --gauss_kernel_size=${gksize} \ - --repeat ${repeat} \ - --model_file=${MODEL_FILE} \ - --image_files=${IMAGES} -} - -with_images() { - local D=$HOME/var/data/openpose - $1 \ - $D/examples/media/COCO_val2014_000000000192.png \ - $D/new-tests/cam0_27.png \ - $D/126/cam2_3938.png \ - $D/126/cam1_2386.png -} - -# with_images run_batch_example -with_images run_stream_example diff --git a/scripts/run-uff-py.sh b/scripts/run-uff-py.sh deleted file mode 100755 index 6a79d095..00000000 --- a/scripts/run-uff-py.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/sh -set -e - -MODEL_DIR=$HOME/Downloads -DATA_DIR=$HOME/var/data/openpose - -# cam0_27.png -# cam0_59.png -# cam2_21.png -# cam3_107.png -# cam3_146.png -# cam3_148.png -# cam3_52.png -# cam3_63.png - -test_vgg_model() { - local images=$(echo $@ | tr ' ' ',') - echo ${images} - ./uff-runner.py \ - --base-model=vgg \ - --path-to-npz=${MODEL_DIR}/vgg450000_no_cpm.npz \ - --images=${images} -} - -test_vggtiny_model() { - local images=$(echo $@ | tr ' ' ',') - echo ${images} - ./uff-runner.py \ - --base-model=vggtiny \ - --path-to-npz=${MODEL_DIR}/new-models/hao18/pose350000.npz \ - --images=${images} -} - -test_hao28_model() { - local images=$(echo $@ | tr ' ' ',') - echo ${images} - ./uff-runner.py \ - --base-model=hao28_experimental \ - --path-to-npz=${MODEL_DIR}/hao28/pose345000.npz \ - --images=${images} -} - -# test_vgg_model \ -# ${DATA_DIR}/examples/media/COCO_val2014_000000000192.jpg \ -# ${DATA_DIR}/new-tests/cam0_27.png - -test_vggtiny_model \ - ${DATA_DIR}/examples/media/COCO_val2014_000000000192.jpg \ - ${DATA_DIR}/new-tests/cam0_27.png - -# test_hao28_model \ -# ${DATA_DIR}/examples/media/COCO_val2014_000000000192.jpg \ -# ${DATA_DIR}/new-tests/cam0_27.png diff --git a/src/fake/fake_paf.cpp b/src/fake/fake_paf.cpp index 5f45d203..94c8377c 100644 --- a/src/fake/fake_paf.cpp +++ b/src/fake/fake_paf.cpp @@ -13,7 +13,7 @@ namespace parser { struct paf::peak_finder_impl { }; - paf::paf(cv::Size resolution_size, float conf_thresh, float paf_thresh) + paf::paf(float conf_thresh, float paf_thresh, cv::Size resolution_size) : m_resolution_size(resolution_size) , m_conf_thresh(conf_thresh) , m_paf_thresh(paf_thresh) diff --git a/src/paf.cpp b/src/paf.cpp index 47382551..33760b62 100644 --- a/src/paf.cpp +++ b/src/paf.cpp @@ -276,18 +276,18 @@ namespace parser { std::unique_ptr> m_upsample_paf, m_upsample_conf; }; - paf::paf(cv::Size resolution_size, float conf_thresh, float paf_thresh) - : m_resolution_size(resolution_size) - , m_conf_thresh(conf_thresh) + paf::paf(float conf_thresh, float paf_thresh, cv::Size resolution_size) + : m_conf_thresh(conf_thresh) , m_paf_thresh(paf_thresh) + , m_resolution_size(resolution_size) , m_ttl(UNINITIALIZED_PTR) { } paf::paf(const paf& p) - : m_resolution_size(p.m_resolution_size) - , m_conf_thresh(p.m_conf_thresh) + : m_conf_thresh(p.m_conf_thresh) , m_paf_thresh(p.m_paf_thresh) + , m_resolution_size(p.m_resolution_size) , m_ttl(UNINITIALIZED_PTR) { } diff --git a/src/pose_proposal.cpp b/src/pose_proposal.cpp index 58210d38..aa491f23 100644 --- a/src/pose_proposal.cpp +++ b/src/pose_proposal.cpp @@ -20,7 +20,6 @@ namespace parser { // 6: edge_confidence N x 17 x 9 x 9 x 12 x 12 // -> Return human_t {x, y} \in [0, 1]] - inline const coco_pair_list_t COCOPAIR_STD = { { 1, 8 }, // 0 { 8, 9 }, // 1 @@ -41,6 +40,14 @@ namespace parser { { 15, 17 }, // 18 }; // See https://www.cnblogs.com/caffeaoto/p/7793994.html. + pose_proposal::pose_proposal(cv::Size net_resolution, float point_thresh, float limb_thresh, float mns_thresh) + : m_net_resolution(std::move(net_resolution)) + , m_point_thresh(point_thresh) + , m_limb_thresh(limb_thresh) + , m_nms_thresh(mns_thresh) + { + } + void pose_proposal::set_point_thresh(float thresh) { m_point_thresh = thresh; @@ -56,11 +63,6 @@ namespace parser { m_nms_thresh = thresh; } - void pose_proposal::set_max_person(int n_person) - { - m_max_person = n_person; - } - constexpr int MIN_REQUIRED_POINTS_FOR_A_MAN = 3; // 3 Connection to be a man; std::vector pose_proposal::process( @@ -107,33 +109,24 @@ namespace parser { auto nms = [this](key_point_bboxes boxes) { key_point_bboxes ret; - if (boxes.size() == 0) - return ret; - std::multimap idxs; - for (size_t i = 0; i < boxes.size(); ++i) - idxs.emplace(boxes[i].second.br().y, i); - - while (idxs.size() > 0) { - auto last = --std::end(idxs); - const auto& box = boxes[last->second]; - - idxs.erase(last); - - for (auto pos = idxs.begin(); pos != idxs.end();) { - const auto& box_ = boxes[pos->second]; - - float int_area = (box.second & box_.second).area(); - float union_area = box.second.area() + box_.second.area() - int_area; - float overlap = int_area / union_area; - - if (overlap > m_nms_thresh) - pos = idxs.erase(pos); - else - ++pos; - } + std::sort(boxes.begin(), boxes.end(), [](const std::pair& l, const std::pair& r) { + return l.first.conf < r.first.conf; + }); - ret.push_back(box); + const auto iou = [](const auto& l, const auto& r) { + float int_area = (l.second & r.second).area(); + float union_area = l.second.area() + r.second.area() - int_area; + float overlap = int_area / union_area; + return overlap; + }; + + while (!boxes.empty()) { + ret.emplace_back(boxes.back()); + boxes.pop_back(); + for (size_t i = 0; i < boxes.size(); i++) + if (iou(ret.back(), boxes[i]) >= m_nms_thresh) + boxes.erase(boxes.begin() + i); } return ret; @@ -148,20 +141,6 @@ namespace parser { std::vector key_points; key_points.reserve(n_key_points); - // TODO. Debug - cv::Mat debug = cv::Mat::zeros(m_net_resolution, CV_8UC(3)); - - int width = debug.size().width; - int height = debug.size().height; - - int stepSize = width / 12; - - for (int i = 0; i < height; i += stepSize) - cv::line(debug, cv::Point(0, i), cv::Point(width, i), cv::Scalar(0, 255, 255)); - - for (int i = 0; i < width; i += stepSize) - cv::line(debug, cv::Point(i, 0), cv::Point(i, height), cv::Scalar(255, 0, 255)); - for (size_t i = 0; i < n_key_points; ++i) { key_point_bboxes kp_list; @@ -172,22 +151,15 @@ namespace parser { if (m_point_thresh < conf_point.view()[feature_map_index]) kp_list.emplace_back( meta_info{ (int)j, conf_point.view()[feature_map_index] }, - cv::Rect(std::max(std::min(m_net_resolution.width, static_cast(x.view()[feature_map_index])), 0), - std::max(std::min(m_net_resolution.height, static_cast(y.view()[feature_map_index])), 0), + cv::Rect(std::max(std::min(m_net_resolution.width, static_cast(x.view()[feature_map_index] - w.view()[feature_map_index] / 2)), 0), + std::max(std::min(m_net_resolution.height, static_cast(y.view()[feature_map_index] - h.view()[feature_map_index] / 2)), 0), std::max(std::min(m_net_resolution.width, static_cast(w.view()[feature_map_index])), 0), std::max(std::min(m_net_resolution.height, static_cast(h.view()[feature_map_index])), 0))); } - auto nms_kp_list = nms(kp_list); + auto nms_kp_list = nms(std::move(kp_list)); - std::sort(nms_kp_list.begin(), nms_kp_list.end(), [](const std::pair& l, const std::pair& r) { - return l.first.conf > r.first.conf; - }); - - if (nms_kp_list.size() > m_max_person) - nms_kp_list.erase(std::next(nms_kp_list.begin(), m_max_person), nms_kp_list.end()); - - info("Key Point @ ", i, " got ", nms_kp_list.size(), " proposals after nms & thresh.\n"); + info("Key Point @ ", i, " got ", nms_kp_list.size(), " bounding boxes after thresh + NMS.\n"); key_points.push_back(std::move(nms_kp_list)); } @@ -196,28 +168,22 @@ namespace parser { size_t n_range = std::min(n_edges, COCOPAIR_STD.size()); const size_t n_neighbors = h_edge_neighbor * w_edge_neighbor; + for (size_t i = 0; i < n_range; ++i) { auto& from = key_points.at(COCOPAIR_STD[i].first); auto& to = key_points.at(COCOPAIR_STD[i].second); - // 17 x 9 x 9 x 12 x 12 - float best_conf = m_limb_thresh; - int best_to_id = -1; - - for (auto&& from_p : from) { - if (!from_p.first.has_root()) { - from_p.first.set_root(ret_poses.size()); - ret_poses.push_back(human_t{}); - ret_poses.back().parts[COCOPAIR_STD[i].first] = { - true, - (float)from_p.second.x / m_net_resolution.width, - (float)from_p.second.y / m_net_resolution.height, - from_p.first.conf - }; - ret_poses.back().score = 1.; - } // from_p must have root. + struct limb { + int from, to; + float conf; + }; - const auto& from_grid_index = from_p.first.grid_index; + std::vector limb_candidates{}; + + // 17 x 9 x 9 x 12 x 12 + for (size_t from_index = 0; from_index < from.size(); ++from_index) { + auto& from_p = from[from_index]; + const auto& from_grid_index = from_p.first.grid_index; // Location of start point in the feature map. for (size_t j = 0; j < n_neighbors; ++j) { const size_t edge_conf_index = i * (n_grids * n_neighbors) + j * n_grids + from_grid_index; @@ -231,76 +197,117 @@ namespace parser { const size_t aim_to_x = from_grid_x + aim_neighbor_x - w_edge_neighbor / 2; bool out_of_range = (aim_to_x < 0 || aim_to_x >= w_grid || aim_to_y < 0 || aim_to_y >= h_grid); - - if (!out_of_range && edge.view()[edge_conf_index] > best_conf) { - for (size_t k = 0; k < to.size(); ++k) { - auto&& p_to = to[k]; + auto possible_connection_conf = edge.view()[edge_conf_index]; + if (!out_of_range && possible_connection_conf > m_limb_thresh) { + for (size_t to_index = 0; to_index < to.size(); ++to_index) { + auto&& p_to = to[to_index]; size_t to_grid_y = p_to.first.grid_index / w_grid; size_t to_grid_x = p_to.first.grid_index - to_grid_y * w_grid; if (to_grid_x == aim_to_x && to_grid_y == aim_to_y) { // Match Point! - best_conf = edge.view()[edge_conf_index]; - best_to_id = k; - break; + limb_candidates.push_back({ (int)from_index, (int)to_index, possible_connection_conf }); } } } } + // if (best_to_id != -1) { + // ret_poses[from_p.first.root()].parts[COCOPAIR_STD[i].second] = { + // true, + // (float)(to[best_to_id].second.x + to[best_to_id].second.width / 2) / m_net_resolution.width, + // (float)(to[best_to_id].second.y + to[best_to_id].second.height / 2) / m_net_resolution.height, + // to[best_to_id].first.conf + // }; + // ret_poses[from_p.first.root()].score += 1.; + // } + } + + // All right. We now get all possible [from, to] pairs. Let's choose them by rank. + std::sort(limb_candidates.begin(), limb_candidates.end(), [](auto& l, auto& r) { + return l.conf < r.conf; + }); + + std::vector from_check(from.size(), false); + std::vector to_check(to.size(), false); + while (!limb_candidates.empty()) { + auto cur = limb_candidates.back(); + limb_candidates.pop_back(); + + if (from_check[cur.from] || to_check[cur.to]) // Point already taken. + continue; + + auto& from_val = from[cur.from]; + auto& to_val = to[cur.to]; + + size_t root_index = [&]() -> size_t { + if (from_val.first.has_root() == to_val.first.has_root()) { + ret_poses.emplace_back(); + return ret_poses.size() - 1; + } + + return from_val.first.has_root() ? from_val.first.root() : to_val.first.root(); + }(); + + if (!ret_poses[root_index].parts[COCOPAIR_STD[i].first].has_value) { + ret_poses[root_index].parts[COCOPAIR_STD[i].first] = { + true, + (float)(from_val.second.x + from_val.second.width / 2) / m_net_resolution.width, + (float)(from_val.second.y + from_val.second.height / 2) / m_net_resolution.height, + from_val.first.conf + }; + from_val.first.set_root(root_index); + ret_poses[root_index].score += 1.; + } - if (best_to_id != -1) { - ret_poses[from_p.first.root()].parts[COCOPAIR_STD[i].second] = { + if (!ret_poses[root_index].parts[COCOPAIR_STD[i].second].has_value) { + ret_poses[root_index].parts[COCOPAIR_STD[i].second] = { true, - (float)to[best_to_id].second.x / m_net_resolution.width, - (float)to[best_to_id].second.y / m_net_resolution.height, - to[best_to_id].first.conf + (float)(to_val.second.x + to_val.second.width / 2) / m_net_resolution.width, + (float)(to_val.second.y + to_val.second.height / 2) / m_net_resolution.height, + to_val.first.conf }; - ret_poses[from_p.first.root()].score += 1.; + to_val.first.set_root(root_index); + ret_poses[root_index].score += 1.; } } } - ret_poses.erase(std::remove_if(ret_poses.begin(), ret_poses.end(), [](const human_t& pose) { - return pose.score <= 1; - }), - ret_poses.end()); - info("Detected ", ret_poses.size(), " human parts originally\n"); - constexpr size_t hash_grid = 64; - constexpr size_t nan = std::numeric_limits::max(); - std::array, hash_grid> hash_table{}; // Avoid Stack OverFlow! - for (auto&& row : hash_table) - row.fill(nan); + constexpr size_t grid_size = 64; + std::array, grid_size>, grid_size> hash_table{}; - const auto query_table = [hash_grid, &hash_table](const body_part_t& part) -> uint16_t& { + const auto query_table = [grid_size, &hash_table](const body_part_t& part) -> std::vector& { assert(part.x >= 0); assert(part.y >= 0); - size_t x_ind = part.x * hash_grid; - size_t y_ind = part.y * hash_grid; - x_ind = (x_ind == hash_grid) ? hash_grid - 1 : x_ind; - y_ind = (y_ind == hash_grid) ? hash_grid - 1 : y_ind; + size_t x_ind = part.x * grid_size; + size_t y_ind = part.y * grid_size; + x_ind = (x_ind == grid_size) ? grid_size - 1 : x_ind; + y_ind = (y_ind == grid_size) ? grid_size - 1 : y_ind; return hash_table[x_ind][y_ind]; }; for (size_t i = 0; i < ret_poses.size(); ++i) { - auto& this_human = ret_poses[i]; // We are trying to find other parts for current human. - for (size_t j = 0; j < this_human.parts.size(); ++j) { - const auto& this_part = this_human.parts[j]; // Current part: Unique Or Belong to Others. + auto& cur_human = ret_poses[i]; // We are trying to find other parts for current human. + if (cur_human.score > n_key_points - 0.1) + continue; + + for (size_t j = 0; j < cur_human.parts.size(); ++j) { + const auto& this_part = cur_human.parts[j]; // Current part: Unique Or Belong to Others. if (this_part.has_value) { - const size_t root_index = query_table(this_part); - if (root_index >= ret_poses.size()) { // Unique. - query_table(this_part) = i; - } else { - if (root_index == i) // My root is the current person? Skip. - continue; + auto& maybes = query_table(this_part); - auto& aim = ret_poses[root_index]; // Get root person. + bool remove_cur = false; + for (auto possible_id : maybes) { + auto& maybe_combine = ret_poses[possible_id]; + if (possible_id == i || maybe_combine.parts[j].y != this_part.y || maybe_combine.parts[j].x != this_part.x) + continue; - for (size_t u = 0; u < this_human.parts.size(); ++u) { - const auto& part = ret_poses[i].parts[u]; - if (part.has_value && !aim.parts.at(u).has_value) { - query_table(part) = root_index; - aim.parts[u] = part; - aim.score += 1.0; + remove_cur = true; + for (size_t u = 0; u < cur_human.parts.size(); ++u) { + const auto& cur_part = cur_human.parts[u]; + if (cur_part.has_value && !maybe_combine.parts[u].has_value) { + maybe_combine.parts[u] = cur_part; + maybe_combine.score += 1.0; + query_table(cur_part).push_back(i); } } @@ -308,6 +315,11 @@ namespace parser { --i; break; } + + if (remove_cur) + break; + + maybes.push_back(i); } } } diff --git a/src/tensorrt.cpp b/src/tensorrt.cpp index d2cf2ea3..e054a6cb 100644 --- a/src/tensorrt.cpp +++ b/src/tensorrt.cpp @@ -106,13 +106,13 @@ namespace dnn { struct tensorrt::cuda_dep { using cuda_buffer_t = ttl::cuda_tensor; // [batch_size, data_size] - std::unordered_map m_cuda_buffers; destroy_ptr m_engine; destroy_ptr m_context = nullptr; explicit cuda_dep(nvinfer1::ICudaEngine* ptr) - : m_engine(ptr), m_context(m_engine->createExecutionContext()) + : m_engine(ptr) + , m_context(m_engine->createExecutionContext()) { } };