From abeae6f01a7ecc34edc251c3f08b6dfd41d3dd02 Mon Sep 17 00:00:00 2001 From: zhang-prog <69562787+zhang-prog@users.noreply.github.com> Date: Thu, 12 Dec 2024 20:53:52 +0800 Subject: [PATCH] [Feat][Deploy] Add ultrainfer and paddlex-hpi (#2625) --- .precommit/check_custom.py | 10 +- libs/paddlex-hpi/MANIFEST.in | 2 + libs/paddlex-hpi/README.md | 0 libs/paddlex-hpi/pyproject.toml | 18 + libs/paddlex-hpi/requirements.txt | 7 + libs/paddlex-hpi/scripts/build_wheel.sh | 3 + libs/paddlex-hpi/scripts/run_tests.sh | 3 + libs/paddlex-hpi/src/paddlex_hpi/__init__.py | 15 + libs/paddlex-hpi/src/paddlex_hpi/_config.py | 218 + .../src/paddlex_hpi/_model_info.py | 59 + .../src/paddlex_hpi/_utils/__init__.py | 13 + .../src/paddlex_hpi/_utils/compat.py | 20 + .../src/paddlex_hpi/_utils/misc.py | 25 + .../src/paddlex_hpi/_utils/typing.py | 24 + .../paddlex_hpi/model_info_collection.json | 4422 +++++++++++++++++ .../src/paddlex_hpi/models/__init__.py | 51 + .../paddlex_hpi/models/anomaly_detection.py | 56 + .../src/paddlex_hpi/models/base.py | 189 + .../paddlex_hpi/models/face_recognition.py | 23 + .../paddlex_hpi/models/formula_recognition.py | 56 + .../paddlex_hpi/models/general_recognition.py | 56 + .../models/image_classification.py | 91 + .../src/paddlex_hpi/models/image_unwarping.py | 56 + .../models/instance_segmentation.py | 105 + .../models/multilabel_classification.py | 80 + .../paddlex_hpi/models/object_detection.py | 98 + .../models/semantic_segmentation.py | 56 + .../paddlex_hpi/models/table_recognition.py | 68 + .../src/paddlex_hpi/models/text_detection.py | 167 + .../paddlex_hpi/models/text_recognition.py | 86 + .../src/paddlex_hpi/models/ts_ad.py | 58 + .../src/paddlex_hpi/models/ts_cls.py | 55 + .../src/paddlex_hpi/models/ts_fc.py | 58 + libs/paddlex-hpi/test_requirements.txt | 2 + libs/paddlex-hpi/tests/__init__.py | 13 + libs/paddlex-hpi/tests/models/__init__.py | 13 + libs/paddlex-hpi/tests/models/base.py | 117 + .../tests/models/test_anomaly_detection.py | 49 + .../tests/models/test_formula_recognition.py | 45 + .../tests/models/test_general_recognition.py | 49 + .../tests/models/test_image_classification.py | 53 + .../tests/models/test_image_unwarping.py | 51 + .../models/test_instance_segmentation.py | 54 + .../models/test_multilabel_classification.py | 53 + .../tests/models/test_object_detection.py | 53 + .../models/test_semantic_segmentation.py | 49 + .../tests/models/test_table_recognition.py | 59 + .../tests/models/test_text_detection.py | 47 + .../tests/models/test_text_recognition.py | 52 + libs/paddlex-hpi/tests/models/test_ts_ad.py | 49 + libs/paddlex-hpi/tests/models/test_ts_cls.py | 50 + libs/paddlex-hpi/tests/models/test_ts_fc.py | 51 + .../tests/testing_utils/__init__.py | 13 + libs/paddlex-hpi/tests/testing_utils/cv.py | 96 + .../tests/testing_utils/download.py | 107 + libs/paddlex-hpi/tests/testing_utils/misc.py | 19 + libs/ultrainfer/.gitignore | 54 + libs/ultrainfer/CMakeLists.txt | 735 +++ libs/ultrainfer/LICENSE | 201 + libs/ultrainfer/ThirdPartyNotices.txt | 1946 ++++++++ libs/ultrainfer/UltraInfer.cmake.in | 335 ++ libs/ultrainfer/UltraInferCSharp.cmake.in | 13 + libs/ultrainfer/VERSION_NUMBER | 1 + libs/ultrainfer/cmake/UltraInferConfig.cmake | 10 + libs/ultrainfer/cmake/ascend.cmake | 32 + libs/ultrainfer/cmake/build_paddle2onnx.cmake | 40 + libs/ultrainfer/cmake/build_tools.cmake | 87 + libs/ultrainfer/cmake/check.cmake | 45 + libs/ultrainfer/cmake/config_cpack.cmake | 38 + libs/ultrainfer/cmake/cuda.cmake | 283 ++ libs/ultrainfer/cmake/cvcuda.cmake | 41 + libs/ultrainfer/cmake/faiss.cmake | 122 + libs/ultrainfer/cmake/fast_tokenizer.cmake | 106 + libs/ultrainfer/cmake/flycv.cmake | 97 + libs/ultrainfer/cmake/gflags.cmake | 89 + libs/ultrainfer/cmake/glog.cmake | 68 + libs/ultrainfer/cmake/gtest.cmake | 84 + libs/ultrainfer/cmake/horizon.cmake | 24 + libs/ultrainfer/cmake/kunlunxin.cmake | 26 + libs/ultrainfer/cmake/onnxruntime.cmake | 129 + libs/ultrainfer/cmake/opencv.cmake | 90 + libs/ultrainfer/cmake/openvino.cmake | 112 + libs/ultrainfer/cmake/paddle2onnx.cmake | 90 + libs/ultrainfer/cmake/paddle_inference.cmake | 329 ++ libs/ultrainfer/cmake/paddlelite.cmake | 105 + libs/ultrainfer/cmake/poros.cmake | 95 + libs/ultrainfer/cmake/rknpu2.cmake | 19 + libs/ultrainfer/cmake/sophgo.cmake | 7 + libs/ultrainfer/cmake/summary.cmake | 84 + libs/ultrainfer/cmake/timvx.cmake | 38 + libs/ultrainfer/cmake/toolchain.cmake | 45 + libs/ultrainfer/cmake/tvm.cmake | 55 + libs/ultrainfer/cmake/utils.cmake | 223 + libs/ultrainfer/cpack/debian_postinst.in | 42 + libs/ultrainfer/cpack/debian_prerm.in | 12 + libs/ultrainfer/cpack/rpm_postinst.in | 35 + libs/ultrainfer/cpack/rpm_postrm.in | 8 + libs/ultrainfer/python/__init__.py | 13 + libs/ultrainfer/python/requirements.txt | 15 + libs/ultrainfer/python/scripts/__init__.py | 13 + libs/ultrainfer/python/scripts/build_gpu.sh | 12 + .../python/scripts/process_libraries.py.in | 207 + libs/ultrainfer/python/setup.py | 485 ++ libs/ultrainfer/python/ultrainfer/__init__.py | 186 + .../python/ultrainfer/c_lib_wrap.py.in | 190 + libs/ultrainfer/python/ultrainfer/download.py | 274 + libs/ultrainfer/python/ultrainfer/model.py | 88 + .../python/ultrainfer/pipeline/__init__.py | 16 + .../pipeline/pptinypose/__init__.py | 58 + .../python/ultrainfer/py_only/__init__.py | 16 + .../python/ultrainfer/py_only/base.py | 59 + .../python/ultrainfer/py_only/ts/__init__.py | 16 + .../python/ultrainfer/py_only/ts/model.py | 25 + .../ultrainfer/py_only/ts/processors.py | 582 +++ .../ultrainfer/py_only/vision/__init__.py | 16 + .../python/ultrainfer/py_only/vision/model.py | 26 + .../ultrainfer/py_only/vision/processors.py | 465 ++ libs/ultrainfer/python/ultrainfer/runtime.py | 706 +++ .../python/ultrainfer/text/__init__.py | 18 + .../python/ultrainfer/text/uie/__init__.py | 105 + .../python/ultrainfer/ts/__init__.py | 18 + .../ts/anomalydetection/__init__.py | 16 + .../ts/anomalydetection/ppts/__init__.py | 168 + .../ultrainfer/ts/classification/__init__.py | 16 + .../ts/classification/ppts/__init__.py | 128 + .../ultrainfer/ts/forecasting/__init__.py | 16 + .../ts/forecasting/ppts/__init__.py | 195 + .../python/ultrainfer/utils/__init__.py | 14 + .../ultrainfer/utils/example_resource.py | 26 + .../python/ultrainfer/utils/hub_config.py | 76 + .../python/ultrainfer/utils/hub_env.py | 57 + .../ultrainfer/utils/hub_model_server.py | 134 + .../python/ultrainfer/utils/misc.py | 20 + .../python/ultrainfer/vision/__init__.py | 41 + .../vision/classification/__init__.py | 36 + .../vision/classification/contrib/__init__.py | 15 + .../vision/classification/contrib/resnet.py | 104 + .../classification/contrib/yolov5cls.py | 140 + .../vision/classification/ppcls/__init__.py | 288 ++ .../vision/classification/ppshitu/__init__.py | 145 + .../ultrainfer/vision/common/__init__.py | 18 + .../ultrainfer/vision/common/manager.py | 69 + .../ultrainfer/vision/common/processors.py | 152 + .../ultrainfer/vision/detection/__init__.py | 30 + .../vision/detection/contrib/__init__.py | 15 + .../vision/detection/contrib/fastestdet.py | 157 + .../vision/detection/contrib/nanodet_plus.py | 135 + .../detection/contrib/rkyolo/__init__.py | 16 + .../detection/contrib/rkyolo/rkyolov5.py | 315 ++ .../vision/detection/contrib/scaled_yolov4.py | 146 + .../vision/detection/contrib/yolor.py | 145 + .../vision/detection/contrib/yolov5.py | 227 + .../vision/detection/contrib/yolov5lite.py | 191 + .../vision/detection/contrib/yolov5seg.py | 222 + .../vision/detection/contrib/yolov6.py | 145 + .../vision/detection/contrib/yolov7.py | 187 + .../detection/contrib/yolov7end2end_ort.py | 132 + .../detection/contrib/yolov7end2end_trt.py | 132 + .../vision/detection/contrib/yolov8.py | 222 + .../vision/detection/contrib/yolox.py | 130 + .../vision/detection/ppdet/__init__.py | 990 ++++ .../ultrainfer/vision/evaluation/__init__.py | 17 + .../ultrainfer/vision/evaluation/classify.py | 79 + .../ultrainfer/vision/evaluation/detection.py | 125 + .../vision/evaluation/segmentation.py | 105 + .../vision/evaluation/utils/__init__.py | 23 + .../vision/evaluation/utils/cityscapes.py | 78 + .../vision/evaluation/utils/coco.py | 176 + .../vision/evaluation/utils/coco_metrics.py | 90 + .../vision/evaluation/utils/coco_utils.py | 233 + .../vision/evaluation/utils/fd_logging.py | 61 + .../vision/evaluation/utils/json_results.py | 162 + .../vision/evaluation/utils/map_utils.py | 42 + .../vision/evaluation/utils/seg_metrics.py | 144 + .../vision/evaluation/utils/util.py | 34 + .../ultrainfer/vision/facealign/__init__.py | 18 + .../vision/facealign/contrib/__init__.py | 15 + .../facealign/contrib/face_landmark_1000.py | 76 + .../vision/facealign/contrib/pfld.py | 76 + .../vision/facealign/contrib/pipnet.py | 118 + .../ultrainfer/vision/facedet/__init__.py | 22 + .../vision/facedet/contrib/__init__.py | 15 + .../vision/facedet/contrib/blazeface.py | 146 + .../vision/facedet/contrib/centerface.py | 150 + .../vision/facedet/contrib/retinaface.py | 134 + .../vision/facedet/contrib/scrfd.py | 216 + .../vision/facedet/contrib/ultraface.py | 75 + .../vision/facedet/contrib/yolov5face.py | 147 + .../vision/facedet/contrib/yolov7face.py | 193 + .../ultrainfer/vision/faceid/__init__.py | 16 + .../vision/faceid/contrib/__init__.py | 17 + .../vision/faceid/contrib/adaface/__init__.py | 109 + .../faceid/contrib/insightface/__init__.py | 237 + .../ultrainfer/vision/generation/__init__.py | 16 + .../vision/generation/contrib/__init__.py | 15 + .../vision/generation/contrib/anemigan.py | 103 + .../ultrainfer/vision/headpose/__init__.py | 16 + .../vision/headpose/contrib/__init__.py | 15 + .../vision/headpose/contrib/fsanet.py | 76 + .../vision/keypointdetection/__init__.py | 16 + .../keypointdetection/pptinypose/__init__.py | 90 + .../ultrainfer/vision/matting/__init__.py | 18 + .../vision/matting/contrib/__init__.py | 15 + .../vision/matting/contrib/modnet.py | 125 + .../ultrainfer/vision/matting/contrib/rvm.py | 105 + .../vision/matting/ppmatting/__init__.py | 55 + .../python/ultrainfer/vision/ocr/__init__.py | 16 + .../ultrainfer/vision/ocr/ppocr/__init__.py | 1928 +++++++ .../vision/ocr/ppocr/utils/__init__.py | 13 + .../ppocr/utils/ser_vi_layoutxlm/__init__.py | 13 + .../ppocr/utils/ser_vi_layoutxlm/operators.py | 104 + .../utils/ser_vi_layoutxlm/transforms.py | 47 + .../ppocr/utils/ser_vi_layoutxlm/vqa_utils.py | 624 +++ .../ultrainfer/vision/perception/__init__.py | 19 + .../vision/perception/paddle3d/__init__.py | 15 + .../vision/perception/paddle3d/caddn.py | 108 + .../vision/perception/paddle3d/centerpoint.py | 92 + .../vision/perception/paddle3d/petr.py | 106 + .../vision/perception/paddle3d/smoke.py | 106 + .../vision/segmentation/__init__.py | 16 + .../vision/segmentation/ppseg/__init__.py | 321 ++ .../python/ultrainfer/vision/sr/__init__.py | 15 + .../ultrainfer/vision/sr/ppsr/__init__.py | 122 + .../ultrainfer/vision/tracking/__init__.py | 21 + .../vision/tracking/pptracking/__init__.py | 69 + .../python/ultrainfer/vision/utils.py | 290 ++ .../ultrainfer/vision/visualize/__init__.py | 229 + libs/ultrainfer/scripts/__init__.py | 13 + libs/ultrainfer/scripts/ascend_init.sh | 13 + libs/ultrainfer/scripts/build_bcloud_lib.py | 41 + libs/ultrainfer/scripts/clean_sdk.sh | 7 + libs/ultrainfer/scripts/copy_directory.py | 32 + libs/ultrainfer/scripts/linux/_build_cpp.sh | 67 + libs/ultrainfer/scripts/linux/_build_py.sh | 78 + .../linux/set_up_docker_and_build_cpp.sh | 72 + .../linux/set_up_docker_and_build_py.sh | 73 + libs/ultrainfer/scripts/patch_lib.sh | 15 + .../scripts/patch_paddle_inference.py | 52 + libs/ultrainfer/scripts/patch_paddle_lite.py | 44 + libs/ultrainfer/scripts/ultrainfer_init.bat | 167 + libs/ultrainfer/scripts/ultrainfer_init.sh | 61 + libs/ultrainfer/ultrainfer/CMakeLists.txt | 0 .../ultrainfer/benchmark/benchmark.h | 86 + libs/ultrainfer/ultrainfer/benchmark/option.h | 49 + .../ultrainfer/ultrainfer/benchmark/results.h | 28 + libs/ultrainfer/ultrainfer/benchmark/utils.cc | 908 ++++ libs/ultrainfer/ultrainfer/benchmark/utils.h | 204 + libs/ultrainfer/ultrainfer/core/allocate.cc | 45 + libs/ultrainfer/ultrainfer/core/allocate.h | 60 + libs/ultrainfer/ultrainfer/core/config.h.in | 86 + libs/ultrainfer/ultrainfer/core/fd_scalar.h | 121 + libs/ultrainfer/ultrainfer/core/fd_tensor.cc | 447 ++ libs/ultrainfer/ultrainfer/core/fd_tensor.h | 216 + libs/ultrainfer/ultrainfer/core/fd_type.cc | 137 + libs/ultrainfer/ultrainfer/core/fd_type.h | 61 + libs/ultrainfer/ultrainfer/core/float16.h | 651 +++ libs/ultrainfer/ultrainfer/function/cast.cc | 47 + libs/ultrainfer/ultrainfer/function/cast.h | 31 + libs/ultrainfer/ultrainfer/function/clip.cc | 59 + libs/ultrainfer/ultrainfer/function/clip.h | 33 + libs/ultrainfer/ultrainfer/function/concat.cc | 118 + libs/ultrainfer/ultrainfer/function/concat.h | 32 + .../ultrainfer/function/cuda_cast.cu | 46 + .../ultrainfer/function/cuda_cast.h | 29 + .../ultrainfer/ultrainfer/function/cumprod.cc | 78 + libs/ultrainfer/ultrainfer/function/cumprod.h | 31 + libs/ultrainfer/ultrainfer/function/eigen.cc | 33 + libs/ultrainfer/ultrainfer/function/eigen.h | 139 + .../ultrainfer/function/elementwise.cc | 110 + .../ultrainfer/function/elementwise.h | 105 + .../ultrainfer/function/elementwise_base.h | 265 + .../ultrainfer/function/elementwise_functor.h | 131 + libs/ultrainfer/ultrainfer/function/full.cc | 42 + libs/ultrainfer/ultrainfer/function/full.h | 44 + .../ultrainfer/function/functions.h | 36 + .../function/gather_scatter_along_axis.cc | 125 + .../function/gather_scatter_along_axis.h | 33 + .../ultrainfer/function/gaussian_random.cc | 46 + .../ultrainfer/function/gaussian_random.h | 36 + .../ultrainfer/function/isfinite.cc | 111 + .../ultrainfer/ultrainfer/function/isfinite.h | 47 + .../ultrainfer/function/linspace.cc | 52 + .../ultrainfer/ultrainfer/function/linspace.h | 33 + libs/ultrainfer/ultrainfer/function/math.cc | 84 + libs/ultrainfer/ultrainfer/function/math.h | 70 + .../ultrainfer/function/math_functor.h | 81 + libs/ultrainfer/ultrainfer/function/pad.cc | 119 + libs/ultrainfer/ultrainfer/function/pad.h | 32 + .../ultrainfer/function/quantile.cc | 130 + .../ultrainfer/ultrainfer/function/quantile.h | 34 + libs/ultrainfer/ultrainfer/function/reduce.cc | 414 ++ libs/ultrainfer/ultrainfer/function/reduce.h | 127 + .../ultrainfer/function/reduce_functor.h | 77 + libs/ultrainfer/ultrainfer/function/slice.cc | 182 + libs/ultrainfer/ultrainfer/function/slice.h | 44 + .../ultrainfer/ultrainfer/function/softmax.cc | 125 + libs/ultrainfer/ultrainfer/function/softmax.h | 29 + libs/ultrainfer/ultrainfer/function/sort.cc | 120 + libs/ultrainfer/ultrainfer/function/sort.h | 47 + libs/ultrainfer/ultrainfer/function/split.cc | 160 + libs/ultrainfer/ultrainfer/function/split.h | 36 + libs/ultrainfer/ultrainfer/function/tile.cc | 111 + libs/ultrainfer/ultrainfer/function/tile.h | 36 + .../ultrainfer/function/transpose.cc | 123 + .../ultrainfer/function/transpose.h | 33 + libs/ultrainfer/ultrainfer/pipeline.h | 21 + .../ultrainfer/pipeline/pipeline_pybind.cc | 22 + .../pipeline/pptinypose/pipeline.cc | 70 + .../ultrainfer/pipeline/pptinypose/pipeline.h | 70 + .../pptinypose/pptinyposepipeline_pybind.cc | 36 + .../ultrainfer/pybind/fastdeploy_model.cc | 42 + .../ultrainfer/ultrainfer/pybind/fd_tensor.cc | 293 ++ libs/ultrainfer/ultrainfer/pybind/main.cc.in | 181 + libs/ultrainfer/ultrainfer/pybind/main.h | 135 + libs/ultrainfer/ultrainfer/pybind/runtime.cc | 172 + libs/ultrainfer/ultrainfer/runtime.h | 23 + .../ultrainfer/runtime/backends/backend.h | 158 + .../common/cuda/adaptive_pool2d_kernel.cu | 99 + .../common/cuda/adaptive_pool2d_kernel.h | 35 + .../backends/horizon/horizon_backend.cc | 399 ++ .../backends/horizon/horizon_backend.h | 67 + .../backends/lite/configure_hardware.cc | 171 + .../runtime/backends/lite/lite_backend.cc | 298 ++ .../runtime/backends/lite/lite_backend.h | 76 + .../ultrainfer/runtime/backends/lite/option.h | 103 + .../runtime/backends/lite/option_pybind.cc | 64 + .../runtime/backends/openvino/option.h | 100 + .../backends/openvino/option_pybind.cc | 35 + .../runtime/backends/openvino/ov_backend.cc | 457 ++ .../runtime/backends/openvino/ov_backend.h | 72 + .../backends/ort/ops/adaptive_pool2d.cc | 125 + .../backends/ort/ops/adaptive_pool2d.h | 86 + .../backends/ort/ops/multiclass_nms.cc | 287 ++ .../runtime/backends/ort/ops/multiclass_nms.h | 80 + .../ultrainfer/runtime/backends/ort/option.h | 57 + .../runtime/backends/ort/option_pybind.cc | 37 + .../runtime/backends/ort/ort_backend.cc | 455 ++ .../runtime/backends/ort/ort_backend.h | 91 + .../ultrainfer/runtime/backends/ort/utils.cc | 80 + .../ultrainfer/runtime/backends/ort/utils.h | 39 + .../paddle/ops/centerpoint_postprocess_op.cc | 124 + .../paddle/ops/centerpoint_postprocess_op.cu | 295 ++ .../backends/paddle/ops/grid_sample_3d.cc | 100 + .../backends/paddle/ops/grid_sample_3d.cu | 658 +++ .../backends/paddle/ops/grid_sample_3d.h | 33 + .../runtime/backends/paddle/ops/iou3d_cpu.cc | 272 + .../runtime/backends/paddle/ops/iou3d_cpu.h | 35 + .../runtime/backends/paddle/ops/iou3d_nms.cc | 241 + .../runtime/backends/paddle/ops/iou3d_nms.h | 45 + .../backends/paddle/ops/iou3d_nms_api.cc | 56 + .../backends/paddle/ops/iou3d_nms_kernel.cu | 588 +++ .../backends/paddle/ops/voxelize_op.cc | 208 + .../backends/paddle/ops/voxelize_op.cu | 357 ++ .../runtime/backends/paddle/option.h | 169 + .../runtime/backends/paddle/option_pybind.cc | 72 + .../runtime/backends/paddle/paddle_backend.cc | 650 +++ .../runtime/backends/paddle/paddle_backend.h | 103 + .../runtime/backends/paddle/util.cc | 236 + .../runtime/backends/poros/common/compile.h | 170 + .../runtime/backends/poros/common/iengine.h | 82 + .../backends/poros/common/plugin_create.h | 69 + .../backends/poros/common/poros_module.h | 60 + .../runtime/backends/poros/option.h | 46 + .../runtime/backends/poros/option_pybind.cc | 37 + .../runtime/backends/poros/poros_backend.cc | 175 + .../runtime/backends/poros/poros_backend.h | 91 + .../runtime/backends/poros/utils.cc | 185 + .../runtime/backends/rknpu2/option.h | 48 + .../runtime/backends/rknpu2/rknpu2_backend.cc | 593 +++ .../runtime/backends/rknpu2/rknpu2_backend.h | 180 + .../backends/rknpu2/rknpu2_config_pybind.cc | 37 + .../runtime/backends/sophgo/option.h | 25 + .../runtime/backends/sophgo/sophgo_backend.cc | 304 ++ .../runtime/backends/sophgo/sophgo_backend.h | 71 + .../ultrainfer/runtime/backends/tvm/option.h | 21 + .../runtime/backends/tvm/tvm_backend.cc | 204 + .../runtime/backends/tvm/tvm_backend.h | 61 + .../ultrainfer/runtime/enum_variables.cc | 145 + .../ultrainfer/runtime/enum_variables.h | 148 + .../ultrainfer/runtime/option_pybind.cc | 83 + libs/ultrainfer/ultrainfer/runtime/runtime.cc | 431 ++ libs/ultrainfer/ultrainfer/runtime/runtime.h | 126 + .../ultrainfer/runtime/runtime_option.cc | 524 ++ .../ultrainfer/runtime/runtime_option.h | 282 ++ libs/ultrainfer/ultrainfer/text.h | 19 + .../ultrainfer/text/common/option.h | 26 + .../ultrainfer/text/common/result.cc | 18 + .../ultrainfer/text/common/result.h | 23 + .../text/postprocessor/postprocessor.cc | 31 + .../text/postprocessor/postprocessor.h | 34 + .../text/preprocessor/preprocessor.cc | 32 + .../text/preprocessor/preprocessor.h | 34 + libs/ultrainfer/ultrainfer/text/text_model.cc | 79 + libs/ultrainfer/ultrainfer/text/text_model.h | 50 + .../ultrainfer/ultrainfer/text/text_pybind.cc | 63 + libs/ultrainfer/ultrainfer/text/uie/model.cc | 797 +++ libs/ultrainfer/ultrainfer/text/uie/model.h | 210 + .../ultrainfer/text/uie/uie_pybind.cc | 89 + .../ultrainfer/ultrainfer/ultrainfer_model.cc | 517 ++ libs/ultrainfer/ultrainfer/ultrainfer_model.h | 189 + libs/ultrainfer/ultrainfer/utils/axis_utils.h | 52 + libs/ultrainfer/ultrainfer/utils/path.h | 74 + libs/ultrainfer/ultrainfer/utils/perf.h | 49 + libs/ultrainfer/ultrainfer/utils/unique_ptr.h | 55 + libs/ultrainfer/ultrainfer/utils/utils.cc | 68 + libs/ultrainfer/ultrainfer/utils/utils.h | 234 + libs/ultrainfer/ultrainfer/vision.h | 80 + .../classification/classification_pybind.cc | 34 + .../vision/classification/contrib/resnet.cc | 135 + .../vision/classification/contrib/resnet.h | 86 + .../classification/contrib/resnet_pybind.cc | 39 + .../contrib/yolov5cls/postprocessor.cc | 58 + .../contrib/yolov5cls/postprocessor.h | 55 + .../contrib/yolov5cls/preprocessor.cc | 91 + .../contrib/yolov5cls/preprocessor.h | 57 + .../contrib/yolov5cls/yolov5cls.cc | 83 + .../contrib/yolov5cls/yolov5cls.h | 76 + .../contrib/yolov5cls/yolov5cls_pybind.cc | 108 + .../vision/classification/ppcls/model.cc | 123 + .../vision/classification/ppcls/model.h | 128 + .../classification/ppcls/postprocessor.cc | 57 + .../classification/ppcls/postprocessor.h | 56 + .../classification/ppcls/ppcls_pybind.cc | 99 + .../classification/ppcls/preprocessor.cc | 156 + .../classification/ppcls/preprocessor.h | 73 + .../classification/ppshitu/ppshitu_pybind.cc | 101 + .../classification/ppshitu/ppshituv2_det.h | 25 + .../classification/ppshitu/ppshituv2_rec.cc | 121 + .../classification/ppshitu/ppshituv2_rec.h | 117 + .../ppshitu/ppshituv2_rec_postprocessor.cc | 58 + .../ppshitu/ppshituv2_rec_postprocessor.h | 50 + .../ppshitu/ppshituv2_rec_preprocessor.cc | 160 + .../ppshitu/ppshituv2_rec_preprocessor.h | 73 + .../common/image_decoder/image_decoder.cc | 112 + .../common/image_decoder/image_decoder.h | 49 + .../common/image_decoder/nvjpeg_decoder.cc | 364 ++ .../common/image_decoder/nvjpeg_decoder.h | 68 + .../vision/common/processors/base.cc | 177 + .../vision/common/processors/base.h | 93 + .../vision/common/processors/base_pybind.cc | 28 + .../vision/common/processors/cast.cc | 113 + .../vision/common/processors/cast.h | 59 + .../vision/common/processors/cast_pybind.cc | 22 + .../vision/common/processors/center_crop.cc | 102 + .../vision/common/processors/center_crop.h | 63 + .../common/processors/center_crop_pybind.cc | 22 + .../common/processors/color_space_convert.cc | 133 + .../common/processors/color_space_convert.h | 99 + .../vision/common/processors/convert.cc | 67 + .../vision/common/processors/convert.h | 52 + .../common/processors/convert_and_permute.cc | 96 + .../common/processors/convert_and_permute.h | 85 + .../vision/common/processors/crop.cc | 68 + .../vision/common/processors/crop.h | 61 + .../vision/common/processors/cvcuda_utils.cc | 127 + .../vision/common/processors/cvcuda_utils.h | 40 + .../vision/common/processors/hwc2chw.cc | 93 + .../vision/common/processors/hwc2chw.h | 54 + .../common/processors/hwc2chw_pybind.cc | 22 + .../common/processors/limit_by_stride.cc | 86 + .../common/processors/limit_by_stride.h | 54 + .../vision/common/processors/limit_short.cc | 93 + .../vision/common/processors/limit_short.h | 62 + .../vision/common/processors/manager.cc | 102 + .../vision/common/processors/manager.h | 104 + .../common/processors/manager_pybind.cc | 57 + .../vision/common/processors/mat.cc | 337 ++ .../ultrainfer/vision/common/processors/mat.h | 176 + .../vision/common/processors/mat_batch.cc | 92 + .../vision/common/processors/mat_batch.h | 83 + .../common/processors/mat_batch_pybind.cc | 30 + .../vision/common/processors/mat_pybind.cc | 29 + .../vision/common/processors/normalize.cc | 107 + .../vision/common/processors/normalize.cu | 117 + .../vision/common/processors/normalize.h | 90 + .../processors/normalize_and_permute.cc | 124 + .../processors/normalize_and_permute.cu | 134 + .../common/processors/normalize_and_permute.h | 107 + .../normalize_and_permute_pybind.cc | 25 + .../common/processors/normalize_pybind.cc | 24 + .../vision/common/processors/pad.cc | 152 + .../ultrainfer/vision/common/processors/pad.h | 89 + .../vision/common/processors/pad_pybind.cc | 23 + .../vision/common/processors/pad_to_size.cc | 272 + .../vision/common/processors/pad_to_size.h | 79 + .../common/processors/pad_to_size_pybind.cc | 23 + .../vision/common/processors/proc_lib.cc | 46 + .../vision/common/processors/proc_lib.h | 34 + .../common/processors/processors_pybind.cc | 48 + .../vision/common/processors/resize.cc | 171 + .../vision/common/processors/resize.h | 93 + .../common/processors/resize_by_short.cc | 188 + .../common/processors/resize_by_short.h | 74 + .../processors/resize_by_short_pybind.cc | 23 + .../vision/common/processors/resize_pybind.cc | 23 + .../vision/common/processors/stride_pad.cc | 186 + .../vision/common/processors/stride_pad.h | 65 + .../common/processors/stride_pad_pybind.cc | 22 + .../vision/common/processors/transform.cc | 169 + .../vision/common/processors/transform.h | 49 + .../vision/common/processors/utils.cc | 279 ++ .../vision/common/processors/utils.h | 55 + .../vision/common/processors/warp_affine.cc | 50 + .../vision/common/processors/warp_affine.h | 61 + .../ultrainfer/vision/common/result.cc | 944 ++++ .../ultrainfer/vision/common/result.h | 494 ++ .../contrib/fastestdet/fastestdet.cc | 82 + .../detection/contrib/fastestdet/fastestdet.h | 76 + .../contrib/fastestdet/fastestdet_pybind.cc | 111 + .../contrib/fastestdet/postprocessor.cc | 133 + .../contrib/fastestdet/postprocessor.h | 68 + .../contrib/fastestdet/preprocessor.cc | 84 + .../contrib/fastestdet/preprocessor.h | 57 + .../vision/detection/contrib/nanodet_plus.cc | 338 ++ .../vision/detection/contrib/nanodet_plus.h | 103 + .../detection/contrib/nanodet_plus_pybind.cc | 40 + .../vision/detection/contrib/rknpu2/model.h | 104 + .../detection/contrib/rknpu2/postprocessor.cc | 212 + .../detection/contrib/rknpu2/postprocessor.h | 115 + .../detection/contrib/rknpu2/preprocessor.cc | 109 + .../detection/contrib/rknpu2/preprocessor.h | 99 + .../vision/detection/contrib/rknpu2/rkyolo.cc | 83 + .../vision/detection/contrib/rknpu2/rkyolo.h | 65 + .../detection/contrib/rknpu2/rkyolo_pybind.cc | 163 + .../vision/detection/contrib/rknpu2/utils.cc | 72 + .../vision/detection/contrib/rknpu2/utils.h | 29 + .../vision/detection/contrib/scaledyolov4.cc | 254 + .../vision/detection/contrib/scaledyolov4.h | 101 + .../detection/contrib/scaledyolov4_pybind.cc | 42 + .../vision/detection/contrib/yolor.cc | 252 + .../vision/detection/contrib/yolor.h | 101 + .../vision/detection/contrib/yolor_pybind.cc | 38 + .../detection/contrib/yolov5/postprocessor.cc | 140 + .../detection/contrib/yolov5/postprocessor.h | 74 + .../detection/contrib/yolov5/preprocessor.cc | 119 + .../detection/contrib/yolov5/preprocessor.h | 107 + .../vision/detection/contrib/yolov5/yolov5.cc | 97 + .../vision/detection/contrib/yolov5/yolov5.h | 89 + .../detection/contrib/yolov5/yolov5_pybind.cc | 122 + .../vision/detection/contrib/yolov5lite.cc | 471 ++ .../vision/detection/contrib/yolov5lite.h | 156 + .../detection/contrib/yolov5lite_pybind.cc | 50 + .../contrib/yolov5seg/postprocessor.cc | 217 + .../contrib/yolov5seg/postprocessor.h | 78 + .../contrib/yolov5seg/preprocessor.cc | 119 + .../contrib/yolov5seg/preprocessor.h | 107 + .../detection/contrib/yolov5seg/yolov5seg.cc | 83 + .../detection/contrib/yolov5seg/yolov5seg.h | 76 + .../contrib/yolov5seg/yolov5seg_pybind.cc | 122 + .../vision/detection/contrib/yolov6.cc | 342 ++ .../vision/detection/contrib/yolov6.h | 125 + .../vision/detection/contrib/yolov6_pybind.cc | 42 + .../detection/contrib/yolov7/postprocessor.cc | 117 + .../detection/contrib/yolov7/postprocessor.h | 67 + .../detection/contrib/yolov7/preprocessor.cc | 119 + .../detection/contrib/yolov7/preprocessor.h | 94 + .../vision/detection/contrib/yolov7/yolov7.cc | 94 + .../vision/detection/contrib/yolov7/yolov7.h | 89 + .../detection/contrib/yolov7/yolov7_pybind.cc | 112 + .../detection/contrib/yolov7end2end_ort.cc | 249 + .../detection/contrib/yolov7end2end_ort.h | 92 + .../contrib/yolov7end2end_ort_pybind.cc | 42 + .../detection/contrib/yolov7end2end_trt.cc | 357 ++ .../detection/contrib/yolov7end2end_trt.h | 110 + .../contrib/yolov7end2end_trt_pybind.cc | 46 + .../detection/contrib/yolov8/postprocessor.cc | 143 + .../detection/contrib/yolov8/postprocessor.h | 74 + .../detection/contrib/yolov8/preprocessor.cc | 119 + .../detection/contrib/yolov8/preprocessor.h | 107 + .../vision/detection/contrib/yolov8/yolov8.cc | 82 + .../vision/detection/contrib/yolov8/yolov8.h | 76 + .../detection/contrib/yolov8/yolov8_pybind.cc | 122 + .../vision/detection/contrib/yolox.cc | 322 ++ .../vision/detection/contrib/yolox.h | 106 + .../vision/detection/contrib/yolox_pybind.cc | 38 + .../vision/detection/detection_pybind.cc | 54 + .../ultrainfer/vision/detection/ppdet/base.cc | 108 + .../ultrainfer/vision/detection/ppdet/base.h | 100 + .../ultrainfer/vision/detection/ppdet/model.h | 508 ++ .../vision/detection/ppdet/multiclass_nms.cc | 227 + .../vision/detection/ppdet/multiclass_nms.h | 77 + .../detection/ppdet/multiclass_nms_rotated.cc | 468 ++ .../detection/ppdet/multiclass_nms_rotated.h | 77 + .../vision/detection/ppdet/postprocessor.cc | 362 ++ .../vision/detection/ppdet/postprocessor.h | 117 + .../vision/detection/ppdet/ppdet_pybind.cc | 268 + .../vision/detection/ppdet/preprocessor.cc | 228 + .../vision/detection/ppdet/preprocessor.h | 71 + .../facealign/contrib/face_landmark_1000.cc | 134 + .../facealign/contrib/face_landmark_1000.h | 80 + .../contrib/face_landmark_1000_pybind.cc | 34 + .../vision/facealign/contrib/pfld.cc | 135 + .../vision/facealign/contrib/pfld.h | 67 + .../vision/facealign/contrib/pfld_pybind.cc | 31 + .../vision/facealign/contrib/pipnet.cc | 687 +++ .../vision/facealign/contrib/pipnet.h | 133 + .../vision/facealign/contrib/pipnet_pybind.cc | 39 + .../vision/facealign/facealign_pybind.cc | 29 + .../facedet/contrib/centerface/centerface.cc | 88 + .../facedet/contrib/centerface/centerface.h | 81 + .../contrib/centerface/centerface_pybind.cc | 106 + .../contrib/centerface/postprocessor.cc | 151 + .../contrib/centerface/postprocessor.h | 68 + .../contrib/centerface/preprocessor.cc | 81 + .../facedet/contrib/centerface/preprocessor.h | 59 + .../vision/facedet/contrib/retinaface.cc | 293 ++ .../vision/facedet/contrib/retinaface.h | 104 + .../facedet/contrib/retinaface_pybind.cc | 39 + .../vision/facedet/contrib/scrfd.cc | 375 ++ .../ultrainfer/vision/facedet/contrib/scrfd.h | 142 + .../vision/facedet/contrib/scrfd_pybind.cc | 48 + .../vision/facedet/contrib/ultraface.cc | 203 + .../vision/facedet/contrib/ultraface.h | 83 + .../facedet/contrib/ultraface_pybind.cc | 32 + .../vision/facedet/contrib/yolov5face.cc | 280 ++ .../vision/facedet/contrib/yolov5face.h | 102 + .../facedet/contrib/yolov5face_pybind.cc | 42 + .../contrib/yolov7face/postprocessor.cc | 135 + .../contrib/yolov7face/postprocessor.h | 76 + .../contrib/yolov7face/preprocessor.cc | 123 + .../facedet/contrib/yolov7face/preprocessor.h | 98 + .../facedet/contrib/yolov7face/yolov7face.cc | 89 + .../facedet/contrib/yolov7face/yolov7face.h | 81 + .../contrib/yolov7face/yolov7face_pybind.cc | 117 + .../vision/facedet/facedet_pybind.cc | 37 + .../facedet/ppdet/blazeface/blazeface.cc | 94 + .../facedet/ppdet/blazeface/blazeface.h | 84 + .../ppdet/blazeface/blazeface_pybind.cc | 102 + .../facedet/ppdet/blazeface/postprocessor.cc | 96 + .../facedet/ppdet/blazeface/postprocessor.h | 67 + .../facedet/ppdet/blazeface/preprocessor.cc | 209 + .../facedet/ppdet/blazeface/preprocessor.h | 70 + .../vision/faceid/contrib/adaface/adaface.cc | 83 + .../vision/faceid/contrib/adaface/adaface.h | 76 + .../faceid/contrib/adaface/adaface_pybind.cc | 103 + .../faceid/contrib/adaface/postprocessor.cc | 64 + .../faceid/contrib/adaface/postprocessor.h | 51 + .../faceid/contrib/adaface/preprocessor.cc | 76 + .../faceid/contrib/adaface/preprocessor.h | 80 + .../vision/faceid/contrib/insightface/base.cc | 84 + .../vision/faceid/contrib/insightface/base.h | 81 + .../contrib/insightface/insightface_pybind.cc | 138 + .../vision/faceid/contrib/insightface/model.h | 154 + .../contrib/insightface/postprocessor.cc | 67 + .../contrib/insightface/postprocessor.h | 52 + .../contrib/insightface/preprocessor.cc | 79 + .../faceid/contrib/insightface/preprocessor.h | 84 + .../ultrainfer/vision/faceid/faceid_pybind.cc | 25 + .../vision/generation/contrib/animegan.cc | 81 + .../vision/generation/contrib/animegan.h | 79 + .../generation/contrib/animegan_pybind.cc | 85 + .../generation/contrib/postprocessor.cc | 50 + .../vision/generation/contrib/postprocessor.h | 42 + .../vision/generation/contrib/preprocessor.cc | 67 + .../vision/generation/contrib/preprocessor.h | 42 + .../vision/generation/generation_pybind.cc | 26 + .../vision/headpose/contrib/fsanet.cc | 132 + .../vision/headpose/contrib/fsanet.h | 68 + .../vision/headpose/contrib/fsanet_pybind.cc | 31 + .../vision/headpose/headpose_pybind.cc | 25 + .../vision/keypointdet/keypointdet_pybind.cc | 26 + .../keypointdet/pptinypose/pptinypose.cc | 283 ++ .../keypointdet/pptinypose/pptinypose.h | 116 + .../pptinypose/pptinypose_pybind.cc | 50 + .../pptinypose/pptinypose_utils.cc | 125 + .../keypointdet/pptinypose/pptinypose_utils.h | 51 + .../vision/matting/contrib/modnet.cc | 155 + .../vision/matting/contrib/modnet.h | 87 + .../vision/matting/contrib/modnet_pybind.cc | 36 + .../ultrainfer/vision/matting/contrib/rvm.cc | 183 + .../ultrainfer/vision/matting/contrib/rvm.h | 101 + .../vision/matting/contrib/rvm_pybind.cc | 38 + .../vision/matting/matting_pybind.cc | 30 + .../vision/matting/ppmatting/ppmatting.cc | 234 + .../vision/matting/ppmatting/ppmatting.h | 75 + .../matting/ppmatting/ppmatting_pybind.cc | 29 + .../ultrainfer/vision/ocr/ocr_pybind.cc | 33 + .../ultrainfer/vision/ocr/ppocr/classifier.cc | 128 + .../ultrainfer/vision/ocr/ppocr/classifier.h | 123 + .../vision/ocr/ppocr/cls_postprocessor.cc | 84 + .../vision/ocr/ppocr/cls_postprocessor.h | 54 + .../vision/ocr/ppocr/cls_preprocessor.cc | 102 + .../vision/ocr/ppocr/cls_preprocessor.h | 86 + .../vision/ocr/ppocr/dbcurvedetector.cc | 124 + .../vision/ocr/ppocr/dbcurvedetector.h | 118 + .../ultrainfer/vision/ocr/ppocr/dbdetector.cc | 122 + .../ultrainfer/vision/ocr/ppocr/dbdetector.h | 115 + .../vision/ocr/ppocr/det_postprocessor.cc | 98 + .../vision/ocr/ppocr/det_postprocessor.h | 84 + .../ocr/ppocr/det_postprocessor_curve.cc | 103 + .../ocr/ppocr/det_postprocessor_curve.h | 89 + .../vision/ocr/ppocr/det_preprocessor.cc | 106 + .../vision/ocr/ppocr/det_preprocessor.h | 103 + .../vision/ocr/ppocr/ocrmodel_pybind.cc | 748 +++ .../vision/ocr/ppocr/ppocr_pybind.cc | 147 + .../ultrainfer/vision/ocr/ppocr/ppocr_v2.cc | 186 + .../ultrainfer/vision/ocr/ppocr/ppocr_v2.h | 112 + .../ultrainfer/vision/ocr/ppocr/ppocr_v3.h | 87 + .../ultrainfer/vision/ocr/ppocr/ppocr_v4.h | 87 + .../vision/ocr/ppocr/ppstructurev2_layout.h | 40 + .../vision/ocr/ppocr/ppstructurev2_table.cc | 233 + .../vision/ocr/ppocr/ppstructurev2_table.h | 101 + .../vision/ocr/ppocr/rec_postprocessor.cc | 150 + .../vision/ocr/ppocr/rec_postprocessor.h | 60 + .../vision/ocr/ppocr/rec_preprocessor.cc | 142 + .../vision/ocr/ppocr/rec_preprocessor.h | 101 + .../ultrainfer/vision/ocr/ppocr/recognizer.cc | 136 + .../ultrainfer/vision/ocr/ppocr/recognizer.h | 122 + .../vision/ocr/ppocr/structurev2_layout.cc | 102 + .../vision/ocr/ppocr/structurev2_layout.h | 101 + .../ppocr/structurev2_layout_postprocessor.cc | 174 + .../ppocr/structurev2_layout_postprocessor.h | 88 + .../ppocr/structurev2_layout_preprocessor.cc | 72 + .../ppocr/structurev2_layout_preprocessor.h | 90 + .../ocr/ppocr/structurev2_ser_vi_layoutxlm.cc | 72 + .../ocr/ppocr/structurev2_ser_vi_layoutxlm.h | 67 + .../vision/ocr/ppocr/structurev2_table.cc | 134 + .../vision/ocr/ppocr/structurev2_table.h | 126 + .../ppocr/structurev2_table_postprocessor.cc | 182 + .../ppocr/structurev2_table_postprocessor.h | 73 + .../ppocr/structurev2_table_preprocessor.cc | 106 + .../ppocr/structurev2_table_preprocessor.h | 74 + .../vision/ocr/ppocr/utils/clipper.cc | 4374 ++++++++++++++++ .../vision/ocr/ppocr/utils/clipper.h | 421 ++ .../ocr/ppocr/utils/get_rotate_crop_image.cc | 85 + .../vision/ocr/ppocr/utils/matcher.cc | 89 + .../ocr/ppocr/utils/ocr_postprocess_op.cc | 538 ++ .../ocr/ppocr/utils/ocr_postprocess_op.h | 107 + .../vision/ocr/ppocr/utils/ocr_utils.h | 49 + .../vision/ocr/ppocr/utils/softmax.cc | 51 + .../vision/ocr/ppocr/utils/sorted_boxes.cc | 64 + .../vision/ocr/ppocr/uvdoc_postprocessor.cc | 56 + .../vision/ocr/ppocr/uvdoc_postprocessor.h | 40 + .../vision/ocr/ppocr/uvdoc_preprocessor.cc | 45 + .../vision/ocr/ppocr/uvdoc_preprocessor.h | 60 + .../vision/ocr/ppocr/uvdocwarpper.cc | 101 + .../vision/ocr/ppocr/uvdocwarpper.h | 104 + .../vision/perception/paddle3d/caddn/caddn.cc | 86 + .../vision/perception/paddle3d/caddn/caddn.h | 81 + .../perception/paddle3d/caddn/caddn_pybind.cc | 96 + .../paddle3d/caddn/postprocessor.cc | 70 + .../perception/paddle3d/caddn/postprocessor.h | 48 + .../perception/paddle3d/caddn/preprocessor.cc | 112 + .../perception/paddle3d/caddn/preprocessor.h | 69 + .../paddle3d/centerpoint/centerpoint.cc | 92 + .../paddle3d/centerpoint/centerpoint.h | 81 + .../centerpoint/centerpoint_pybind.cc | 56 + .../paddle3d/centerpoint/postprocessor.cc | 71 + .../paddle3d/centerpoint/postprocessor.h | 47 + .../paddle3d/centerpoint/preprocessor.cc | 105 + .../paddle3d/centerpoint/preprocessor.h | 57 + .../vision/perception/paddle3d/petr/petr.cc | 92 + .../vision/perception/paddle3d/petr/petr.h | 77 + .../perception/paddle3d/petr/petr_pybind.cc | 92 + .../perception/paddle3d/petr/postprocessor.cc | 73 + .../perception/paddle3d/petr/postprocessor.h | 48 + .../perception/paddle3d/petr/preprocessor.cc | 114 + .../perception/paddle3d/petr/preprocessor.h | 88 + .../paddle3d/smoke/postprocessor.cc | 67 + .../perception/paddle3d/smoke/postprocessor.h | 48 + .../perception/paddle3d/smoke/preprocessor.cc | 161 + .../perception/paddle3d/smoke/preprocessor.h | 62 + .../vision/perception/paddle3d/smoke/smoke.cc | 82 + .../vision/perception/paddle3d/smoke/smoke.h | 77 + .../perception/paddle3d/smoke/smoke_pybind.cc | 92 + .../vision/perception/perception_pybind.cc | 32 + .../vision/segmentation/ppseg/model.cc | 103 + .../vision/segmentation/ppseg/model.h | 99 + .../segmentation/ppseg/postprocessor.cc | 291 ++ .../vision/segmentation/ppseg/postprocessor.h | 89 + .../vision/segmentation/ppseg/ppseg_pybind.cc | 130 + .../vision/segmentation/ppseg/preprocessor.cc | 180 + .../vision/segmentation/ppseg/preprocessor.h | 88 + .../segmentation/segmentation_pybind.cc | 26 + .../ultrainfer/vision/sr/ppsr/basicvsr.cc | 38 + .../ultrainfer/vision/sr/ppsr/basicvsr.h | 43 + .../ultrainfer/vision/sr/ppsr/edvr.cc | 73 + .../ultrainfer/vision/sr/ppsr/edvr.h | 46 + .../ultrainfer/vision/sr/ppsr/model.h | 18 + .../ultrainfer/vision/sr/ppsr/ppmsvsr.cc | 130 + .../ultrainfer/vision/sr/ppsr/ppmsvsr.h | 63 + .../ultrainfer/vision/sr/ppsr/ppsr_pybind.cc | 79 + .../ultrainfer/vision/sr/sr_pybind.cc | 25 + .../vision/tracking/pptracking/lapjv.cc | 389 ++ .../vision/tracking/pptracking/lapjv.h | 62 + .../tracking/pptracking/letter_box_resize.cc | 169 + .../tracking/pptracking/letter_box_resize.h | 52 + .../vision/tracking/pptracking/model.cc | 316 ++ .../vision/tracking/pptracking/model.h | 103 + .../tracking/pptracking/pptracking_pybind.cc | 37 + .../vision/tracking/pptracking/tracker.cc | 297 ++ .../vision/tracking/pptracking/tracker.h | 73 + .../vision/tracking/pptracking/trajectory.cc | 529 ++ .../vision/tracking/pptracking/trajectory.h | 213 + .../vision/tracking/tracking_pybind.cc | 25 + .../vision/utils/cosine_similarity.cc | 48 + .../ultrainfer/vision/utils/crop_image.cc | 61 + .../ultrainfer/vision/utils/cuda_utils.h | 42 + .../ultrainfer/vision/utils/dark_parse.cc | 81 + .../ultrainfer/vision/utils/face_align.cc | 152 + .../ultrainfer/vision/utils/l2_normalize.cc | 41 + .../ultrainfer/ultrainfer/vision/utils/nms.cc | 142 + .../ultrainfer/vision/utils/sort_det_res.cc | 187 + .../vision/utils/sort_face_det_res.cc | 69 + .../ultrainfer/vision/utils/utils.h | 124 + .../vision/utils/yolo_preprocess.cu | 153 + .../ultrainfer/vision/vision_pybind.cc | 292 ++ .../vision/visualize/classification.cc | 96 + .../ultrainfer/vision/visualize/detection.cc | 374 ++ .../vision/visualize/face_alignment.cc | 37 + .../vision/visualize/face_detection.cc | 137 + .../ultrainfer/vision/visualize/headpose.cc | 62 + .../ultrainfer/vision/visualize/keypoint.cc | 57 + .../ultrainfer/vision/visualize/matting.cc | 152 + .../ultrainfer/vision/visualize/mot.cc | 79 + .../ultrainfer/vision/visualize/ocr.cc | 122 + .../ultrainfer/vision/visualize/perception.cc | 195 + .../visualize/remove_small_connnected_area.cc | 112 + .../vision/visualize/segmentation.cc | 75 + .../vision/visualize/segmentation_arm.cc | 177 + .../vision/visualize/segmentation_arm.h | 27 + .../vision/visualize/swap_background.cc | 180 + .../vision/visualize/swap_background_arm.cc | 238 + .../vision/visualize/swap_background_arm.h | 32 + .../ultrainfer/vision/visualize/visualize.cc | 67 + .../ultrainfer/vision/visualize/visualize.h | 251 + .../vision/visualize/visualize_pybind.cc | 256 + 827 files changed, 106390 insertions(+), 3 deletions(-) create mode 100644 libs/paddlex-hpi/MANIFEST.in create mode 100644 libs/paddlex-hpi/README.md create mode 100644 libs/paddlex-hpi/pyproject.toml create mode 100644 libs/paddlex-hpi/requirements.txt create mode 100644 libs/paddlex-hpi/scripts/build_wheel.sh create mode 100644 libs/paddlex-hpi/scripts/run_tests.sh create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/__init__.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_config.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_model_info.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/base.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py create mode 100644 libs/paddlex-hpi/test_requirements.txt create mode 100644 libs/paddlex-hpi/tests/__init__.py create mode 100644 libs/paddlex-hpi/tests/models/__init__.py create mode 100644 libs/paddlex-hpi/tests/models/base.py create mode 100644 libs/paddlex-hpi/tests/models/test_anomaly_detection.py create mode 100644 libs/paddlex-hpi/tests/models/test_formula_recognition.py create mode 100644 libs/paddlex-hpi/tests/models/test_general_recognition.py create mode 100644 libs/paddlex-hpi/tests/models/test_image_classification.py create mode 100644 libs/paddlex-hpi/tests/models/test_image_unwarping.py create mode 100644 libs/paddlex-hpi/tests/models/test_instance_segmentation.py create mode 100644 libs/paddlex-hpi/tests/models/test_multilabel_classification.py create mode 100644 libs/paddlex-hpi/tests/models/test_object_detection.py create mode 100644 libs/paddlex-hpi/tests/models/test_semantic_segmentation.py create mode 100644 libs/paddlex-hpi/tests/models/test_table_recognition.py create mode 100644 libs/paddlex-hpi/tests/models/test_text_detection.py create mode 100644 libs/paddlex-hpi/tests/models/test_text_recognition.py create mode 100644 libs/paddlex-hpi/tests/models/test_ts_ad.py create mode 100644 libs/paddlex-hpi/tests/models/test_ts_cls.py create mode 100644 libs/paddlex-hpi/tests/models/test_ts_fc.py create mode 100644 libs/paddlex-hpi/tests/testing_utils/__init__.py create mode 100644 libs/paddlex-hpi/tests/testing_utils/cv.py create mode 100644 libs/paddlex-hpi/tests/testing_utils/download.py create mode 100644 libs/paddlex-hpi/tests/testing_utils/misc.py create mode 100644 libs/ultrainfer/.gitignore create mode 100755 libs/ultrainfer/CMakeLists.txt create mode 100755 libs/ultrainfer/LICENSE create mode 100755 libs/ultrainfer/ThirdPartyNotices.txt create mode 100755 libs/ultrainfer/UltraInfer.cmake.in create mode 100755 libs/ultrainfer/UltraInferCSharp.cmake.in create mode 100755 libs/ultrainfer/VERSION_NUMBER create mode 100755 libs/ultrainfer/cmake/UltraInferConfig.cmake create mode 100755 libs/ultrainfer/cmake/ascend.cmake create mode 100755 libs/ultrainfer/cmake/build_paddle2onnx.cmake create mode 100755 libs/ultrainfer/cmake/build_tools.cmake create mode 100755 libs/ultrainfer/cmake/check.cmake create mode 100755 libs/ultrainfer/cmake/config_cpack.cmake create mode 100755 libs/ultrainfer/cmake/cuda.cmake create mode 100755 libs/ultrainfer/cmake/cvcuda.cmake create mode 100755 libs/ultrainfer/cmake/faiss.cmake create mode 100755 libs/ultrainfer/cmake/fast_tokenizer.cmake create mode 100755 libs/ultrainfer/cmake/flycv.cmake create mode 100755 libs/ultrainfer/cmake/gflags.cmake create mode 100755 libs/ultrainfer/cmake/glog.cmake create mode 100755 libs/ultrainfer/cmake/gtest.cmake create mode 100755 libs/ultrainfer/cmake/horizon.cmake create mode 100755 libs/ultrainfer/cmake/kunlunxin.cmake create mode 100755 libs/ultrainfer/cmake/onnxruntime.cmake create mode 100755 libs/ultrainfer/cmake/opencv.cmake create mode 100755 libs/ultrainfer/cmake/openvino.cmake create mode 100755 libs/ultrainfer/cmake/paddle2onnx.cmake create mode 100755 libs/ultrainfer/cmake/paddle_inference.cmake create mode 100755 libs/ultrainfer/cmake/paddlelite.cmake create mode 100755 libs/ultrainfer/cmake/poros.cmake create mode 100755 libs/ultrainfer/cmake/rknpu2.cmake create mode 100755 libs/ultrainfer/cmake/sophgo.cmake create mode 100755 libs/ultrainfer/cmake/summary.cmake create mode 100755 libs/ultrainfer/cmake/timvx.cmake create mode 100755 libs/ultrainfer/cmake/toolchain.cmake create mode 100755 libs/ultrainfer/cmake/tvm.cmake create mode 100755 libs/ultrainfer/cmake/utils.cmake create mode 100755 libs/ultrainfer/cpack/debian_postinst.in create mode 100755 libs/ultrainfer/cpack/debian_prerm.in create mode 100755 libs/ultrainfer/cpack/rpm_postinst.in create mode 100755 libs/ultrainfer/cpack/rpm_postrm.in create mode 100755 libs/ultrainfer/python/__init__.py create mode 100755 libs/ultrainfer/python/requirements.txt create mode 100755 libs/ultrainfer/python/scripts/__init__.py create mode 100755 libs/ultrainfer/python/scripts/build_gpu.sh create mode 100755 libs/ultrainfer/python/scripts/process_libraries.py.in create mode 100755 libs/ultrainfer/python/setup.py create mode 100755 libs/ultrainfer/python/ultrainfer/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in create mode 100755 libs/ultrainfer/python/ultrainfer/download.py create mode 100755 libs/ultrainfer/python/ultrainfer/model.py create mode 100755 libs/ultrainfer/python/ultrainfer/pipeline/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/base.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/ts/model.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/vision/model.py create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py create mode 100755 libs/ultrainfer/python/ultrainfer/runtime.py create mode 100755 libs/ultrainfer/python/ultrainfer/text/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/text/uie/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/ts/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/utils/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/utils/example_resource.py create mode 100755 libs/ultrainfer/python/ultrainfer/utils/hub_config.py create mode 100755 libs/ultrainfer/python/ultrainfer/utils/hub_env.py create mode 100755 libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py create mode 100755 libs/ultrainfer/python/ultrainfer/utils/misc.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/common/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/common/manager.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/common/processors.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/utils.py create mode 100755 libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py create mode 100755 libs/ultrainfer/scripts/__init__.py create mode 100755 libs/ultrainfer/scripts/ascend_init.sh create mode 100755 libs/ultrainfer/scripts/build_bcloud_lib.py create mode 100755 libs/ultrainfer/scripts/clean_sdk.sh create mode 100755 libs/ultrainfer/scripts/copy_directory.py create mode 100755 libs/ultrainfer/scripts/linux/_build_cpp.sh create mode 100755 libs/ultrainfer/scripts/linux/_build_py.sh create mode 100755 libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh create mode 100755 libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh create mode 100755 libs/ultrainfer/scripts/patch_lib.sh create mode 100755 libs/ultrainfer/scripts/patch_paddle_inference.py create mode 100755 libs/ultrainfer/scripts/patch_paddle_lite.py create mode 100755 libs/ultrainfer/scripts/ultrainfer_init.bat create mode 100755 libs/ultrainfer/scripts/ultrainfer_init.sh create mode 100755 libs/ultrainfer/ultrainfer/CMakeLists.txt create mode 100755 libs/ultrainfer/ultrainfer/benchmark/benchmark.h create mode 100755 libs/ultrainfer/ultrainfer/benchmark/option.h create mode 100755 libs/ultrainfer/ultrainfer/benchmark/results.h create mode 100755 libs/ultrainfer/ultrainfer/benchmark/utils.cc create mode 100755 libs/ultrainfer/ultrainfer/benchmark/utils.h create mode 100755 libs/ultrainfer/ultrainfer/core/allocate.cc create mode 100755 libs/ultrainfer/ultrainfer/core/allocate.h create mode 100755 libs/ultrainfer/ultrainfer/core/config.h.in create mode 100755 libs/ultrainfer/ultrainfer/core/fd_scalar.h create mode 100755 libs/ultrainfer/ultrainfer/core/fd_tensor.cc create mode 100755 libs/ultrainfer/ultrainfer/core/fd_tensor.h create mode 100755 libs/ultrainfer/ultrainfer/core/fd_type.cc create mode 100755 libs/ultrainfer/ultrainfer/core/fd_type.h create mode 100755 libs/ultrainfer/ultrainfer/core/float16.h create mode 100755 libs/ultrainfer/ultrainfer/function/cast.cc create mode 100755 libs/ultrainfer/ultrainfer/function/cast.h create mode 100755 libs/ultrainfer/ultrainfer/function/clip.cc create mode 100755 libs/ultrainfer/ultrainfer/function/clip.h create mode 100755 libs/ultrainfer/ultrainfer/function/concat.cc create mode 100755 libs/ultrainfer/ultrainfer/function/concat.h create mode 100755 libs/ultrainfer/ultrainfer/function/cuda_cast.cu create mode 100755 libs/ultrainfer/ultrainfer/function/cuda_cast.h create mode 100755 libs/ultrainfer/ultrainfer/function/cumprod.cc create mode 100755 libs/ultrainfer/ultrainfer/function/cumprod.h create mode 100755 libs/ultrainfer/ultrainfer/function/eigen.cc create mode 100755 libs/ultrainfer/ultrainfer/function/eigen.h create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise.cc create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise.h create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise_base.h create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise_functor.h create mode 100755 libs/ultrainfer/ultrainfer/function/full.cc create mode 100755 libs/ultrainfer/ultrainfer/function/full.h create mode 100755 libs/ultrainfer/ultrainfer/function/functions.h create mode 100755 libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc create mode 100755 libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h create mode 100755 libs/ultrainfer/ultrainfer/function/gaussian_random.cc create mode 100755 libs/ultrainfer/ultrainfer/function/gaussian_random.h create mode 100755 libs/ultrainfer/ultrainfer/function/isfinite.cc create mode 100755 libs/ultrainfer/ultrainfer/function/isfinite.h create mode 100755 libs/ultrainfer/ultrainfer/function/linspace.cc create mode 100755 libs/ultrainfer/ultrainfer/function/linspace.h create mode 100755 libs/ultrainfer/ultrainfer/function/math.cc create mode 100755 libs/ultrainfer/ultrainfer/function/math.h create mode 100755 libs/ultrainfer/ultrainfer/function/math_functor.h create mode 100755 libs/ultrainfer/ultrainfer/function/pad.cc create mode 100755 libs/ultrainfer/ultrainfer/function/pad.h create mode 100755 libs/ultrainfer/ultrainfer/function/quantile.cc create mode 100755 libs/ultrainfer/ultrainfer/function/quantile.h create mode 100755 libs/ultrainfer/ultrainfer/function/reduce.cc create mode 100755 libs/ultrainfer/ultrainfer/function/reduce.h create mode 100755 libs/ultrainfer/ultrainfer/function/reduce_functor.h create mode 100755 libs/ultrainfer/ultrainfer/function/slice.cc create mode 100755 libs/ultrainfer/ultrainfer/function/slice.h create mode 100755 libs/ultrainfer/ultrainfer/function/softmax.cc create mode 100755 libs/ultrainfer/ultrainfer/function/softmax.h create mode 100755 libs/ultrainfer/ultrainfer/function/sort.cc create mode 100755 libs/ultrainfer/ultrainfer/function/sort.h create mode 100755 libs/ultrainfer/ultrainfer/function/split.cc create mode 100755 libs/ultrainfer/ultrainfer/function/split.h create mode 100755 libs/ultrainfer/ultrainfer/function/tile.cc create mode 100755 libs/ultrainfer/ultrainfer/function/tile.h create mode 100755 libs/ultrainfer/ultrainfer/function/transpose.cc create mode 100755 libs/ultrainfer/ultrainfer/function/transpose.h create mode 100755 libs/ultrainfer/ultrainfer/pipeline.h create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc create mode 100755 libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc create mode 100755 libs/ultrainfer/ultrainfer/pybind/main.cc.in create mode 100755 libs/ultrainfer/ultrainfer/pybind/main.h create mode 100755 libs/ultrainfer/ultrainfer/pybind/runtime.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/enum_variables.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/enum_variables.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/option_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime.h create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime_option.cc create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime_option.h create mode 100755 libs/ultrainfer/ultrainfer/text.h create mode 100755 libs/ultrainfer/ultrainfer/text/common/option.h create mode 100755 libs/ultrainfer/ultrainfer/text/common/result.cc create mode 100755 libs/ultrainfer/ultrainfer/text/common/result.h create mode 100755 libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/text/text_model.cc create mode 100755 libs/ultrainfer/ultrainfer/text/text_model.h create mode 100755 libs/ultrainfer/ultrainfer/text/text_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/text/uie/model.cc create mode 100755 libs/ultrainfer/ultrainfer/text/uie/model.h create mode 100755 libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/ultrainfer_model.cc create mode 100755 libs/ultrainfer/ultrainfer/ultrainfer_model.h create mode 100755 libs/ultrainfer/ultrainfer/utils/axis_utils.h create mode 100755 libs/ultrainfer/ultrainfer/utils/path.h create mode 100755 libs/ultrainfer/ultrainfer/utils/perf.h create mode 100755 libs/ultrainfer/ultrainfer/utils/unique_ptr.h create mode 100755 libs/ultrainfer/ultrainfer/utils/utils.cc create mode 100755 libs/ultrainfer/ultrainfer/utils/utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/base.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/base.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cast.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/crop.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/manager.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/transform.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h create mode 100755 libs/ultrainfer/ultrainfer/vision/common/result.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/common/result.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/face_align.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/nms.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/utils.h create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu create mode 100755 libs/ultrainfer/ultrainfer/vision/vision_pybind.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/classification.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/detection.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/matting.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/mot.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/perception.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/visualize.h create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc diff --git a/.precommit/check_custom.py b/.precommit/check_custom.py index db2cc50563..f336d5e98d 100644 --- a/.precommit/check_custom.py +++ b/.precommit/check_custom.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import re import sys @@ -40,9 +41,12 @@ def check(file_path): if not content.startswith(LICENSE_TEXT): print(f"License header missing in {file_path}") return False - if "import paddle" in content or "from paddle import " in content: - print(f"Please use `lazy_paddle` instead `paddle` when import in {file_path}") - return False + if "paddlex" in file_path.split(os.sep): + if "import paddle" in content or "from paddle import " in content: + print( + f"Please use `lazy_paddle` instead `paddle` when import in {file_path}" + ) + return False return True diff --git a/libs/paddlex-hpi/MANIFEST.in b/libs/paddlex-hpi/MANIFEST.in new file mode 100644 index 0000000000..4ee1e6eca8 --- /dev/null +++ b/libs/paddlex-hpi/MANIFEST.in @@ -0,0 +1,2 @@ +include src/paddlex_hpi/py.typed +include src/paddlex_hpi/model_info_collection.json diff --git a/libs/paddlex-hpi/README.md b/libs/paddlex-hpi/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/paddlex-hpi/pyproject.toml b/libs/paddlex-hpi/pyproject.toml new file mode 100644 index 0000000000..3822b24ffe --- /dev/null +++ b/libs/paddlex-hpi/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["setuptools >= 69"] +build-backend = "setuptools.build_meta" + +[project] +name = "paddlex-hpi" +version = "3.0.0.b2" +description = "" +readme = "README.md" +authors = [] +dynamic = ["dependencies", "optional-dependencies"] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} +optional-dependencies.test = {file = ["test_requirements.txt"]} diff --git a/libs/paddlex-hpi/requirements.txt b/libs/paddlex-hpi/requirements.txt new file mode 100644 index 0000000000..2ac37f06f0 --- /dev/null +++ b/libs/paddlex-hpi/requirements.txt @@ -0,0 +1,7 @@ +# ultrainfer +# paddlex +importlib-resources >= 6.4 +numpy >= 1.21 +pandas >= 1.3.3 +pydantic >= 2 +typing-extensions >= 4.11 diff --git a/libs/paddlex-hpi/scripts/build_wheel.sh b/libs/paddlex-hpi/scripts/build_wheel.sh new file mode 100644 index 0000000000..82f3001917 --- /dev/null +++ b/libs/paddlex-hpi/scripts/build_wheel.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +python -m pip wheel -w wheels/original --no-deps . diff --git a/libs/paddlex-hpi/scripts/run_tests.sh b/libs/paddlex-hpi/scripts/run_tests.sh new file mode 100644 index 0000000000..acfaaa8e06 --- /dev/null +++ b/libs/paddlex-hpi/scripts/run_tests.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +python -m pytest tests diff --git a/libs/paddlex-hpi/src/paddlex_hpi/__init__.py b/libs/paddlex-hpi/src/paddlex_hpi/__init__.py new file mode 100644 index 0000000000..a2aacd841a --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "3.0.0.beta2" diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_config.py b/libs/paddlex-hpi/src/paddlex_hpi/_config.py new file mode 100644 index 0000000000..fa129c4911 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/_config.py @@ -0,0 +1,218 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings +from pathlib import Path +from typing import Any, Dict, List, Mapping, Optional, Tuple, Type, Union + +import ultrainfer as ui +from paddlex.utils import logging +from pydantic import BaseModel, ConfigDict, Field, field_validator +from typing_extensions import Annotated, TypeAlias, TypedDict, assert_never + +from paddlex_hpi._model_info import get_model_info +from paddlex_hpi._utils.typing import Backend, DeviceType + + +class _BackendConfig(BaseModel): + def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None: + raise NotImplementedError + + +class PaddleInferConfig(_BackendConfig): + cpu_num_threads: int = 8 + enable_mkldnn: bool = True + enable_trt: bool = False + trt_dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None + trt_dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None + enable_log_info: bool = False + + def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None: + option.use_paddle_infer_backend() + option.set_cpu_thread_num(self.cpu_num_threads) + option.paddle_infer_option.enable_mkldnn = self.enable_mkldnn + option.paddle_infer_option.enable_trt = self.enable_trt + option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt") + if self.trt_dynamic_shapes is not None: + for name, shapes in self.trt_dynamic_shapes.items(): + option.trt_option.set_shape(name, *shapes) + if self.trt_dynamic_shape_input_data is not None: + for name, data in self.trt_dynamic_shape_input_data.items(): + option.trt_option.set_input_data(name, *data) + if self.enable_trt: + option.paddle_infer_option.collect_trt_shape = True + option.paddle_infer_option.collect_trt_shape_by_device = True + option.paddle_infer_option.enable_log_info = self.enable_log_info + + +class OpenVINOConfig(_BackendConfig): + cpu_num_threads: int = 8 + + def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None: + option.use_openvino_backend() + option.set_cpu_thread_num(self.cpu_num_threads) + + +class ONNXRuntimeConfig(_BackendConfig): + cpu_num_threads: int = 8 + + def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None: + option.use_ort_backend() + option.set_cpu_thread_num(self.cpu_num_threads) + + +class TensorRTConfig(_BackendConfig): + dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None + + def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None: + option.use_trt_backend() + option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt") + if self.dynamic_shapes is not None: + for name, shapes in self.dynamic_shapes.items(): + option.trt_option.set_shape(name, *shapes) + + +class PaddleTensorRTConfig(_BackendConfig): + dynamic_shapes: Dict[str, List[List[int]]] + dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None + enable_log_info: bool = False + + def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None: + option.use_paddle_infer_backend() + option.paddle_infer_option.enable_trt = True + option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt") + if self.dynamic_shapes is not None: + option.paddle_infer_option.collect_trt_shape = True + # TODO: Support setting collect_trt_shape_by_device + for name, shapes in self.dynamic_shapes.items(): + option.trt_option.set_shape(name, *shapes) + if self.dynamic_shape_input_data is not None: + for name, data in self.dynamic_shape_input_data.items(): + option.trt_option.set_input_data(name, *data) + option.paddle_infer_option.enable_log_info = self.enable_log_info + + +# Should we use tagged unions? +BackendConfig: TypeAlias = Union[ + PaddleInferConfig, + OpenVINOConfig, + ONNXRuntimeConfig, + TensorRTConfig, +] + + +def get_backend_config_type(backend: Backend, /) -> Type[BackendConfig]: + backend_config_type: Type[BackendConfig] + if backend == "paddle_infer": + backend_config_type = PaddleInferConfig + elif backend == "openvino": + backend_config_type = OpenVINOConfig + elif backend == "onnx_runtime": + backend_config_type = ONNXRuntimeConfig + elif backend == "tensorrt": + backend_config_type = TensorRTConfig + else: + assert_never(backend) + return backend_config_type + + +# Can I create this dynamically and automatically? +class BackendConfigs(TypedDict, total=False): + paddle_infer: PaddleInferConfig + openvino: OpenVINOConfig + onnx_runtime: ONNXRuntimeConfig + tensorrt: TensorRTConfig + paddle_tensorrt: PaddleTensorRTConfig + + +class HPIConfig(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + selected_backends: Optional[Dict[DeviceType, Backend]] = None + # For backward compatilibity + backend_configs: Annotated[ + Optional[BackendConfigs], Field(validation_alias="backend_config") + ] = None + + def get_backend_and_config( + self, model_name: str, device_type: DeviceType + ) -> Tuple[Backend, BackendConfig]: + # Do we need an extensible selector? + model_info = get_model_info(model_name, device_type) + if model_info: + backend_config_pairs = model_info["backend_config_pairs"] + else: + backend_config_pairs = [] + config_dict: Dict[str, Any] = {} + if self.selected_backends and device_type in self.selected_backends: + backend = self.selected_backends[device_type] + for pair in backend_config_pairs: + # Use the first one + if pair[0] == self.selected_backends[device_type]: + config_dict.update(pair[1]) + break + else: + if backend_config_pairs: + # Currently we select the first one + backend = backend_config_pairs[0][0] + config_dict.update(backend_config_pairs[0][1]) + else: + backend = "paddle_infer" + if self.backend_configs and backend in self.backend_configs: + config_dict.update( + self.backend_configs[backend].model_dump(exclude_unset=True) + ) + backend_config_type = get_backend_config_type(backend) + backend_config = backend_config_type.model_validate(config_dict) + return backend, backend_config + + # XXX: For backward compatilibity + @field_validator("selected_backends", mode="before") + @classmethod + def _hack_selected_backends(cls, data: Any) -> Any: + if isinstance(data, Mapping): + new_data = dict(data) + for device_type in new_data: + if new_data[device_type] == "paddle_tensorrt": + warnings.warn( + "`paddle_tensorrt` is deprecated. Please use `paddle_infer` instead.", + FutureWarning, + ) + new_data[device_type] = "paddle_infer" + return new_data + + @field_validator("backend_configs", mode="before") + @classmethod + def _hack_backend_configs(cls, data: Any) -> Any: + if isinstance(data, Mapping): + new_data = dict(data) + if new_data and "paddle_tensorrt" in new_data: + warnings.warn( + "`paddle_tensorrt` is deprecated. Please use `paddle_infer` instead.", + FutureWarning, + ) + if "paddle_infer" not in new_data: + new_data["paddle_infer"] = {} + pptrt_cfg = new_data["paddle_tensorrt"] + logging.warning("`paddle_infer.enable_trt` will be set to `True`.") + new_data["paddle_infer"]["enable_trt"] = True + new_data["paddle_infer"]["trt_dynamic_shapes"] = pptrt_cfg[ + "dynamic_shapes" + ] + if "dynamic_shape_input_data" in pptrt_cfg: + new_data["paddle_infer"]["trt_dynamic_shape_input_data"] = ( + pptrt_cfg["dynamic_shape_input_data"] + ) + logging.warning("`paddle_tensorrt.enable_log_info` will be ignored.") + return new_data diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_model_info.py b/libs/paddlex-hpi/src/paddlex_hpi/_model_info.py new file mode 100644 index 0000000000..516e07b7d9 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/_model_info.py @@ -0,0 +1,59 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import platform +from functools import lru_cache +from typing import Optional + +from importlib_resources import files +from paddlex.utils import logging + +from paddlex_hpi._utils.typing import DeviceType + +_DB_PATH: str = "model_info_collection.json" + + +@lru_cache(1) +def _get_model_info_collection() -> dict: + with files("paddlex_hpi").joinpath(_DB_PATH).open("r", encoding="utf-8") as f: + _model_info_collection = json.load(f) + return _model_info_collection + + +def get_model_info(model_name: str, device_type: DeviceType) -> Optional[dict]: + # TODO: Typed model info and nearest referents + model_info_collection = _get_model_info_collection() + uname = platform.uname() + arch = uname.machine.lower() + if arch not in model_info_collection: + return None + logging.debug("Getting model information for arch: %s", arch) + model_info_collection = model_info_collection[arch] + os = uname.system.lower() + if os not in model_info_collection: + return None + logging.debug("Getting model information for OS: %s", os) + model_info_collection = model_info_collection[os] + if device_type == "cpu": + device = "cpu" + elif device_type == "gpu": + device = "gpu_cuda118_cudnn86" + else: + return None + logging.debug("Getting model information for device: %s", device) + model_info_collection = model_info_collection[device] + if model_name not in model_info_collection: + return None + return model_info_collection[model_name] diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py new file mode 100644 index 0000000000..59372f9379 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py new file mode 100644 index 0000000000..58476146f4 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py @@ -0,0 +1,20 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Optional + + +def get_compat_version() -> Optional[str]: + return os.getenv("PXD_COMPAT_VERSION") diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py new file mode 100644 index 0000000000..c9c7fde505 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py @@ -0,0 +1,25 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Union + + +def parse_scale(s: Union[float, str]) -> float: + if isinstance(s, float): + return s + if re.fullmatch(r"1(?:\.|\.0)?\s*/\s*255(?:\.|\.0)?", s): + return 1 / 255 + else: + raise ValueError("Invalid scale") diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py new file mode 100644 index 0000000000..ac27f33775 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py @@ -0,0 +1,24 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, List, Literal, TypeVar + +from typing_extensions import TypeAlias + +Data: TypeAlias = Dict[str, Any] +BatchData: TypeAlias = List[Data] +DataT = TypeVar("DataT", Data, BatchData) + +DeviceType: TypeAlias = Literal["cpu", "gpu"] +Backend: TypeAlias = Literal["paddle_infer", "openvino", "onnx_runtime", "tensorrt"] diff --git a/libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json b/libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json new file mode 100644 index 0000000000..98623a89cc --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json @@ -0,0 +1,4422 @@ +{ + "x86_64": { + "linux": { + "cpu": { + "RT-DETR-H_layout_17cls":{ + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus_SOD-largesize-L": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus_SOD-L": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus_SOD-S": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50_ML": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x1_0_ML": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B6_ML": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B4_ML": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B0_ML": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "CLIP_vit_base_patch16_448_ML": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50_face": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileFaceNet": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-S_face": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PicoDet_LCNet_x2_5_face": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "BlazeFace-FPN-SSH": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "BlazeFace": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_rec_CLIP_vit_large": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_rec_CLIP_vit_base": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_rec": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_det": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "STFPM": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet18": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet34": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet101": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet152": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet18_vd": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet34_vd": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50_vd": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet101_vd": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "ResNet152_vd": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "ResNet200_vd": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LCNet_x0_25": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x0_35": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x0_5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LCNet_x0_75": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LCNet_x1_0": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LCNet_x1_5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LCNet_x2_5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LCNet_x2_0": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LCNetV2_small": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNetV2_base": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNetV2_large": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "MobileNetV3_large_x0_35": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x0_5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x0_75": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x1_0": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x1_25": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x0_35": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x0_5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x0_75": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x1_0": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x1_25": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ConvNeXt_tiny": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ConvNeXt_small": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ConvNeXt_base_224": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "openvino", + {} + ] + ] + }, + "ConvNeXt_base_384": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ConvNeXt_large_224": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "openvino", + {} + ] + ] + }, + "ConvNeXt_large_384": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV1_x0_25": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV1_x0_5": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV1_x0_75": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV1_x1_0": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x0_25": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x0_5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x1_0": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x1_5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x2_0": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_tiny_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_small_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_base_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_base_patch4_window12_384": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_large_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_large_patch4_window12_384": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNet_small": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNet_tiny": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNet_base": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-HGNetV2-B0": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B1": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B2": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B3": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B4": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B5": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-HGNetV2-B6": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "CLIP_vit_base_patch16_224": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "CLIP_vit_large_patch14_224": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-X": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-L": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-M": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-S": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-N": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-T": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-S": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-M": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-L": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-X": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOv3-DarkNet53": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOv3-ResNet50_vd_DCN": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOv3-MobileNetV3": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "RT-DETR-L": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-H": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-X": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-R18": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-R50": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "PicoDet-S": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PicoDet-L": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Mask-RT-DETR-H": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "Mask-RT-DETR-L": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "Deeplabv3-R50": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Deeplabv3-R101": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Deeplabv3_Plus-R50": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Deeplabv3_Plus-R101": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LiteSeg-T": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "OCRNet_HRNet-W48": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "OCRNet_HRNet-W18": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SeaFormer_tiny": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SeaFormer_small": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SeaFormer_base": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SeaFormer_large": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SegFormer-B0": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SegFormer-B1": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SegFormer-B2": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SegFormer-B3": { + "backend_config_pairs": [ + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SegFormer-B4": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "SegFormer-B5": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-OCRv4_server_rec": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "openvino", + {} + ] + ] + }, + "PP-OCRv4_mobile_rec": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ch_RepSVTR_rec": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ch_SVTRv2_rec": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-OCRv4_server_det": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-OCRv4_mobile_det": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PicoDet_layout_1x": { + "backend_config_pairs": [ + [ + "openvino", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "SLANet": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "SLANet_plus": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + } + }, + "gpu_cuda118_cudnn86": { + "RT-DETR-H_layout_17cls": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "LaTeX_OCR_rec": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus_SOD-largesize-L": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus_SOD-L": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus_SOD-S": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50_ML": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x1_0_ML": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B6_ML": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B4_ML": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B0_ML": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "CLIP_vit_base_patch16_448_ML": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50_face": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileFaceNet": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-S_face": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PicoDet_LCNet_x2_5_face": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "BlazeFace-FPN-SSH": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "BlazeFace": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_rec_CLIP_vit_large": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_rec_CLIP_vit_base": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_rec": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-ShiTuV2_det": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "STFPM": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet18": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet34": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet101": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet152": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet18_vd": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet34_vd": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet50_vd": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet101_vd": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet152_vd": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ResNet200_vd": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x0_25": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x0_35": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x0_5": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x0_75": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x1_0": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x1_5": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x2_5": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNet_x2_0": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNetV2_small": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNetV2_base": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-LCNetV2_large": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x0_35": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x0_5": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x0_75": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x1_0": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_large_x1_25": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x0_35": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x0_5": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x0_75": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x1_0": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV3_small_x1_25": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ConvNeXt_tiny": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ConvNeXt_small": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ConvNeXt_base_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "ConvNeXt_base_384": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "ConvNeXt_large_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "ConvNeXt_large_384": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "MobileNetV1_x0_25": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV1_x0_5": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV1_x0_75": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV1_x1_0": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x0_25": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x0_5": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x1_0": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x1_5": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "MobileNetV2_x2_0": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_tiny_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_small_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_base_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_base_patch4_window12_384": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_large_patch4_window7_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SwinTransformer_large_patch4_window12_384": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNet_small": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-HGNet_tiny": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNet_base": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-HGNetV2-B0": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B1": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B2": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B3": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B4": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B5": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-HGNetV2-B6": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "CLIP_vit_base_patch16_224": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "CLIP_vit_large_patch14_224": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-X": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-L": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-M": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-YOLOE_plus-S": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-N": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOX-T": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "YOLOX-S": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "YOLOX-M": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "YOLOX-L": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "YOLOX-X": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "YOLOv3-DarkNet53": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "YOLOv3-ResNet50_vd_DCN": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "YOLOv3-MobileNetV3": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "RT-DETR-L": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-H": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-X": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-R18": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "RT-DETR-R50": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PicoDet-S": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PicoDet-L": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Mask-RT-DETR-H": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "Mask-RT-DETR-L": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "Deeplabv3-R50": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Deeplabv3-R101": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Deeplabv3_Plus-R50": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "Deeplabv3_Plus-R101": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-LiteSeg-T": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "OCRNet_HRNet-W48": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "OCRNet_HRNet-W18": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SeaFormer_tiny": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ] + ] + }, + "SeaFormer_small": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ] + ] + }, + "SeaFormer_base": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ] + ] + }, + "SeaFormer_large": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ] + ] + }, + "SegFormer-B0": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "SegFormer-B1": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "SegFormer-B2": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "SegFormer-B3": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SegFormer-B4": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SegFormer-B5": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-OCRv4_server_rec": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PP-OCRv4_mobile_rec": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ch_RepSVTR_rec": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "paddle_infer", + {} + ] + ] + }, + "ch_SVTRv2_rec": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ] + ] + }, + "PP-OCRv4_server_det": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + {} + ], + [ + "onnx_runtime", + {} + ] + ] + }, + "PP-OCRv4_mobile_det": { + "backend_config_pairs": [ + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "tensorrt", + {} + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "PicoDet_layout_1x": { + "backend_config_pairs": [ + [ + "tensorrt", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ], + [ + "onnx_runtime", + {} + ], + [ + "paddle_infer", + {} + ] + ] + }, + "SLANet": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ] + ] + }, + "SLANet_plus": { + "backend_config_pairs": [ + [ + "paddle_infer", + {} + ], + [ + "paddle_infer", + { + "enable_trt": true + } + ] + ] + } + } + } + } +} diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py b/libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py new file mode 100644 index 0000000000..a60b103a1b --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py @@ -0,0 +1,51 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlex_hpi.models.anomaly_detection import UadPredictor +from paddlex_hpi.models.base import HPPredictor +from paddlex_hpi.models.face_recognition import FaceRecPredictor +from paddlex_hpi.models.formula_recognition import LaTeXOCRPredictor +from paddlex_hpi.models.general_recognition import ShiTuRecPredictor +from paddlex_hpi.models.image_classification import ClasPredictor +from paddlex_hpi.models.image_unwarping import WarpPredictor +from paddlex_hpi.models.instance_segmentation import InstanceSegPredictor +from paddlex_hpi.models.multilabel_classification import MLClasPredictor +from paddlex_hpi.models.object_detection import DetPredictor +from paddlex_hpi.models.semantic_segmentation import SegPredictor +from paddlex_hpi.models.table_recognition import TablePredictor +from paddlex_hpi.models.text_detection import TextDetPredictor +from paddlex_hpi.models.text_recognition import TextRecPredictor +from paddlex_hpi.models.ts_ad import TSAdPredictor +from paddlex_hpi.models.ts_cls import TSClsPredictor +from paddlex_hpi.models.ts_fc import TSFcPredictor + +__all__ = [ + "UadPredictor", + "HPPredictor", + "FaceRecPredictor", + "LaTeXOCRPredictor", + "ShiTuRecPredictor", + "ClasPredictor", + "WarpPredictor", + "InstanceSegPredictor", + "MLClasPredictor", + "DetPredictor", + "SegPredictor", + "TablePredictor", + "TextDetPredictor", + "TextRecPredictor", + "TSAdPredictor", + "TSClsPredictor", + "TSFcPredictor", +] diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py b/libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py new file mode 100644 index 0000000000..88acf7216b --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py @@ -0,0 +1,56 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import SegResult +from paddlex.modules.anomaly_detection.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor + + +class UadPredictor(CVPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.segmentation.PyOnlyAnomalyDetectionModel: + model = ui.vision.segmentation.PyOnlyAnomalyDetectionModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + uad_result = self._create_uad_result(data, ui_result) + results.append({"result": uad_result}) + return results + + def _create_uad_result(self, data: Data, ui_result: Any) -> SegResult: + pred = np.array(ui_result.label_map, dtype=np.int32).reshape(ui_result.shape) + pred = pred[np.newaxis] + dic = { + "input_path": data["input_path"], + "pred": pred, + } + return SegResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/base.py b/libs/paddlex-hpi/src/paddlex_hpi/models/base.py new file mode 100644 index 0000000000..638e564cbd --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/base.py @@ -0,0 +1,189 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from os import PathLike +from pathlib import Path +from typing import ( + Any, + Dict, + Final, + Generator, + List, + Optional, + Protocol, + TypedDict, + Union, +) + +import ultrainfer as ui +from ultrainfer.model import BaseUltraInferModel +from paddlex.inference.components import ReadImage, ReadTS +from paddlex.inference.models import BasePredictor +from paddlex.inference.utils.new_ir_blacklist import NEWIR_BLOCKLIST +from paddlex.utils import device as device_helper +from paddlex.utils import logging +from paddlex.utils.subclass_register import AutoRegisterABCMetaClass +from typing_extensions import assert_never + +from paddlex_hpi._config import HPIConfig +from paddlex_hpi._utils.typing import Backend, BatchData + +HPI_CONFIG_KEY: Final[str] = "Hpi" + + +class HPIParams(TypedDict, total=False): + serial_number: Optional[str] + update_license: bool + config: Dict[str, Any] + + +class HPPredictor(BasePredictor, metaclass=AutoRegisterABCMetaClass): + __is_base = True + + def __init__( + self, + model_dir: Union[str, PathLike], + config: Optional[Dict[str, Any]] = None, + device: Optional[str] = None, + hpi_params: Optional[HPIParams] = None, + ) -> None: + super().__init__(model_dir=model_dir, config=config) + self._device = device or device_helper.get_default_device() + self._hpi_params = hpi_params or {} + self._hpi_config = self._get_hpi_config() + self._ui_model = self.build_ui_model() + + @property + def model_path(self) -> Path: + return self.model_dir / f"{self.MODEL_FILE_PREFIX}.pdmodel" + + @property + def params_path(self) -> Path: + return self.model_dir / f"{self.MODEL_FILE_PREFIX}.pdiparams" + + def set_predictor(self, **kwargs: Any) -> None: + if "device" in kwargs: + device = kwargs.pop("device") + if device is not None: + if device != self._device: + raise RuntimeError("Currently, changing devices is not supported.") + if kwargs: + raise TypeError(f"Unexpected arguments: {kwargs}") + + def build_ui_model(self) -> BaseUltraInferModel: + option = self._create_ui_option() + return self._build_ui_model(option) + + @abc.abstractmethod + def _build_ui_model(self, option: ui.RuntimeOption) -> BaseUltraInferModel: + raise NotImplementedError + + def _get_hpi_config(self) -> HPIConfig: + if HPI_CONFIG_KEY not in self.config: + logging.debug("Key %r not found in the config", HPI_CONFIG_KEY) + hpi_config = HPIConfig.model_validate( + { + **self.config.get(HPI_CONFIG_KEY, {}), + **self._hpi_params.get("config", {}), + } + ) + return hpi_config + + def _get_selected_backend(self) -> Backend: + device_type, _ = device_helper.parse_device(self._device) + backend = self._hpi_config.get_selected_backend(self.model_name, device_type) + return backend + + def _create_ui_option(self) -> ui.RuntimeOption: + option = ui.RuntimeOption() + # HACK: Disable new IR for models that are known to have issues with the + # new IR. + if self.model_name in NEWIR_BLOCKLIST: + option.paddle_infer_option.enable_new_ir = False + device_type, device_ids = device_helper.parse_device(self._device) + if device_type == "cpu": + pass + elif device_type == "gpu": + if device_ids is None: + device_ids = [0] + if len(device_ids) > 1: + logging.warning( + "Multiple devices are specified (%s), but only the first one will be used.", + self._device, + ) + option.use_gpu(device_ids[0]) + else: + assert_never(device_type) + backend, backend_config = self._hpi_config.get_backend_and_config( + model_name=self.model_name, device_type=device_type + ) + logging.info("Backend: %s", backend) + logging.info("Backend config: %s", backend_config) + backend_config.update_ui_option(option, self.model_dir) + return option + + +class _DataReaderLike(Protocol): + batch_size: int + + def __call__(self, input_list: Any) -> Generator[BatchData, None, None]: ... + + +class HPPredictorWithDataReader(HPPredictor): + def __init__( + self, + model_dir: Union[str, PathLike], + config: Optional[Dict[str, Any]] = None, + device: Optional[str] = None, + hpi_params: Optional[HPIParams] = None, + ) -> None: + super().__init__( + model_dir=model_dir, + config=config, + device=device, + hpi_params=hpi_params, + ) + self._batch_size = 1 + self._data_reader = self._build_data_reader() + + def set_predictor(self, **kwargs: Any) -> None: + batch_size = kwargs.pop("batch_size", None) + super().set_predictor(**kwargs) + if batch_size is not None: + self._batch_size = batch_size + self._data_reader.batch_size = batch_size + logging.info("Batch size updated to %d", self._batch_size) + + def apply(self, input: Any) -> Generator[BatchData, None, None]: + for batch_data in self._data_reader(input): + yield self._predict(batch_data) + + @abc.abstractmethod + def _build_data_reader(self) -> _DataReaderLike: + raise NotImplementedError + + @abc.abstractmethod + def _predict(self, batch_data: BatchData) -> BatchData: + raise NotImplementedError + + +class CVPredictor(HPPredictorWithDataReader): + def _build_data_reader(self) -> _DataReaderLike: + return ReadImage(batch_size=self._batch_size, format="BGR") + + +class TSPredictor(HPPredictorWithDataReader): + def _build_data_reader(self) -> _DataReaderLike: + return ReadTS(batch_size=self._batch_size) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py new file mode 100644 index 0000000000..12c89c02f0 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py @@ -0,0 +1,23 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +from paddlex.modules.face_recognition.model_list import MODELS + +from paddlex_hpi.models.general_recognition import ShiTuRecPredictor + + +class FaceRecPredictor(ShiTuRecPredictor): + entities = MODELS diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py new file mode 100644 index 0000000000..4a15f0e7dc --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py @@ -0,0 +1,56 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import FormulaRecResult +from paddlex.modules.formula_recognition.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor + + +class LaTeXOCRPredictor(CVPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.ocr.PyOnlyFormulaRecognitionModel: + model = ui.vision.ocr.PyOnlyFormulaRecognitionModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [ + np.ascontiguousarray(data["img"]).astype("float32") for data in batch_data + ] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + rec_result = self._create_rec_result(data, ui_result) + results.append({"result": rec_result}) + return results + + def _create_rec_result(self, data: Data, ui_result: Any) -> FormulaRecResult: + dic = { + "input_path": data["input_path"], + "rec_text": ui_result.rec_text, + } + return FormulaRecResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py new file mode 100644 index 0000000000..86f69df680 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py @@ -0,0 +1,56 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import BaseResult +from paddlex.modules.general_recognition.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor + + +class ShiTuRecPredictor(CVPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.classification.PPShiTuV2Recognizer: + model = ui.vision.classification.PPShiTuV2Recognizer( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [ + np.ascontiguousarray(data["img"]).astype("float32") for data in batch_data + ] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + clas_result = self._create_rec_result(data, ui_result) + results.append({"result": clas_result}) + return results + + def _create_rec_result(self, data: Data, ui_result: Any) -> BaseResult: + dic = { + "input_path": data["input_path"], + "feature": np.array(ui_result.feature, dtype="float32"), + } + return BaseResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py b/libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py new file mode 100644 index 0000000000..20dd8ff056 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py @@ -0,0 +1,91 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Any, Dict, List, Optional, Union + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import TopkResult +from paddlex.modules.image_classification.model_list import MODELS +from pydantic import BaseModel + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor, HPIParams + + +class _ClasPPParams(BaseModel): + topk: int + label_list: Optional[List[str]] = None + + +class ClasPredictor(CVPredictor): + entities = MODELS + + def __init__( + self, + model_dir: Union[str, os.PathLike], + config: Optional[Dict[str, Any]] = None, + device: Optional[str] = None, + hpi_params: Optional[HPIParams] = None, + ) -> None: + super().__init__( + model_dir=model_dir, + config=config, + device=device, + hpi_params=hpi_params, + ) + self._pp_params = self._get_pp_params() + self._ui_model.postprocessor.topk = self._pp_params.topk + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.classification.PaddleClasModel: + model = ui.vision.classification.PaddleClasModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + clas_result = self._create_clas_result(data, ui_result) + results.append({"result": clas_result}) + return results + + def _get_pp_params(self) -> _ClasPPParams: + pp_config = self.config["PostProcess"] + if "Topk" not in pp_config: + raise RuntimeError("`Topk` config not found") + topk_config = pp_config["Topk"] + topk = topk_config["topk"] + label_list = topk_config.get("label_list", None) + return _ClasPPParams(topk=topk, label_list=label_list) + + def _create_clas_result(self, data: Data, ui_result: Any) -> TopkResult: + dic = { + "input_path": data["input_path"], + "class_ids": ui_result.label_ids, + "scores": np.around(ui_result.scores, decimals=5).tolist(), + } + if self._pp_params.label_list is not None: + dic["label_names"] = [ + self._pp_params.label_list[i] for i in ui_result.label_ids + ] + return TopkResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py b/libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py new file mode 100644 index 0000000000..6559e1c0e6 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py @@ -0,0 +1,56 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import DocTrResult +from paddlex.modules.image_unwarping.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor + + +class WarpPredictor(CVPredictor): + entities = MODELS + + def _build_ui_model(self, option: ui.RuntimeOption) -> ui.vision.ocr.UVDocWarpper: + model = ui.vision.ocr.UVDocWarpper( + str(self.model_path), + str(self.params_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + warp_result = self._create_warp_result(data, ui_result) + results.append({"result": warp_result}) + return results + + def _create_warp_result(self, data: Data, ui_result: Any) -> DocTrResult: + img = ui_result.numpy() + img = np.moveaxis(img[0], 0, 2) + img *= 255 + img = img[:, :, ::-1] + img = img.astype("uint8") + dic = { + "input_path": data["input_path"], + "doctr_img": img, + } + return DocTrResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py b/libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py new file mode 100644 index 0000000000..7151eae36a --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py @@ -0,0 +1,105 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Any, Dict, List, Optional, Union + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import InstanceSegResult +from paddlex.modules.instance_segmentation.model_list import MODELS +from pydantic import BaseModel + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor, HPIParams + + +class _InstanceSegPPParams(BaseModel): + threshold: float + label_list: List[str] + + +class InstanceSegPredictor(CVPredictor): + entities = MODELS + + def __init__( + self, + model_dir: Union[str, os.PathLike], + config: Optional[Dict[str, Any]] = None, + device: Optional[str] = None, + hpi_params: Optional[HPIParams] = None, + ) -> None: + super().__init__( + model_dir=model_dir, + config=config, + device=device, + hpi_params=hpi_params, + ) + self._pp_params = self._get_pp_params() + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.detection.PaddleDetectionModel: + model = ui.vision.detection.PaddleDetectionModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + instance_seg_result = self._create_instance_seg_result(data, ui_result) + results.append({"result": instance_seg_result}) + return results + + def _get_pp_params(self) -> _InstanceSegPPParams: + return _InstanceSegPPParams( + threshold=self.config["draw_threshold"], + label_list=self.config["label_list"], + ) + + def _create_instance_seg_result( + self, data: Data, ui_result: Any + ) -> InstanceSegResult: + inds = sorted( + range(len(ui_result.scores)), key=ui_result.scores.__getitem__, reverse=True + ) + inds = [i for i in inds if ui_result.scores[i] > self._pp_params.threshold] + inds = [i for i in inds if ui_result.label_ids[i] > -1] + ids = [ui_result.label_ids[i] for i in inds] + scores = [ui_result.scores[i] for i in inds] + boxes = [ui_result.boxes[i] for i in inds] + masks = [ui_result.masks[i] for i in inds] + masks = [ + np.array(mask.data, dtype=np.uint8).reshape(mask.shape) for mask in masks + ] + dic = { + "input_path": data["input_path"], + "boxes": [ + { + "cls_id": id_, + "label": self._pp_params.label_list[id_], + "score": score, + "coordinate": box, + } + for id_, score, box in zip(ids, scores, boxes) + ], + "masks": masks, + } + return InstanceSegResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py b/libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py new file mode 100644 index 0000000000..a8d9438ec5 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py @@ -0,0 +1,80 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Any, Dict, List, Optional, Union + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import MLClassResult +from paddlex.modules.multilabel_classification.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor, HPIParams + + +class MLClasPredictor(CVPredictor): + entities = MODELS + + def __init__( + self, + model_dir: Union[str, os.PathLike], + config: Optional[Dict[str, Any]] = None, + device: Optional[str] = None, + hpi_params: Optional[HPIParams] = None, + ) -> None: + super().__init__( + model_dir=model_dir, + config=config, + device=device, + hpi_params=hpi_params, + ) + self._label_list = self._get_label_list() + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.classification.PyOnlyMultilabelClassificationModel: + model = ui.vision.classification.PyOnlyMultilabelClassificationModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + ml_clas_result = self._create_ml_clas_result(data, ui_result) + results.append({"result": ml_clas_result}) + return results + + def _get_label_list(self) -> Optional[List[str]]: + pp_config = self.config["PostProcess"] + if "MultiLabelThreshOutput" not in pp_config: + raise RuntimeError("`MultiLabelThreshOutput` config not found") + label_list = pp_config["MultiLabelThreshOutput"].get("label_list", None) + return label_list + + def _create_ml_clas_result(self, data: Data, ui_result: Any) -> MLClassResult: + dic = { + "input_path": data["input_path"], + "class_ids": ui_result.label_ids, + "scores": np.around(ui_result.scores, decimals=5).tolist(), + } + if self._label_list is not None: + dic["label_names"] = [self._label_list[i] for i in ui_result.label_ids] + return MLClassResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py b/libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py new file mode 100644 index 0000000000..9867135bec --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py @@ -0,0 +1,98 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Any, Dict, List, Optional, Union + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import DetResult +from paddlex.modules.object_detection.model_list import MODELS +from pydantic import BaseModel + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor, HPIParams + + +class _DetPPParams(BaseModel): + threshold: float + label_list: List[str] + + +class DetPredictor(CVPredictor): + entities = MODELS + + def __init__( + self, + model_dir: Union[str, os.PathLike], + config: Optional[Dict[str, Any]] = None, + device: Optional[str] = None, + hpi_params: Optional[HPIParams] = None, + ) -> None: + super().__init__( + model_dir=model_dir, + config=config, + device=device, + hpi_params=hpi_params, + ) + self._pp_params = self._get_pp_params() + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.detection.PaddleDetectionModel: + model = ui.vision.detection.PaddleDetectionModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + det_result = self._create_det_result(data, ui_result) + results.append({"result": det_result}) + return results + + def _get_pp_params(self) -> _DetPPParams: + return _DetPPParams( + threshold=self.config["draw_threshold"], + label_list=self.config["label_list"], + ) + + def _create_det_result(self, data: Data, ui_result: Any) -> DetResult: + inds = sorted( + range(len(ui_result.scores)), key=ui_result.scores.__getitem__, reverse=True + ) + inds = [i for i in inds if ui_result.scores[i] > self._pp_params.threshold] + inds = [i for i in inds if ui_result.label_ids[i] > -1] + ids = [ui_result.label_ids[i] for i in inds] + scores = [ui_result.scores[i] for i in inds] + boxes = [ui_result.boxes[i] for i in inds] + dic = { + "input_path": data["input_path"], + "boxes": [ + { + "cls_id": id_, + "label": self._pp_params.label_list[id_], + "score": score, + "coordinate": box, + } + for id_, score, box in zip(ids, scores, boxes) + ], + } + return DetResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py b/libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py new file mode 100644 index 0000000000..90d3da3ae5 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py @@ -0,0 +1,56 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import SegResult +from paddlex.modules.semantic_segmentation.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor + + +class SegPredictor(CVPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.segmentation.PaddleSegModel: + model = ui.vision.segmentation.PaddleSegModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + seg_result = self._create_seg_result(data, ui_result) + results.append({"result": seg_result}) + return results + + def _create_seg_result(self, data: Data, ui_result: Any) -> SegResult: + pred = np.array(ui_result.label_map, dtype=np.int32).reshape(ui_result.shape) + pred = pred[np.newaxis] + dic = { + "input_path": data["input_path"], + "pred": pred, + } + return SegResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py new file mode 100644 index 0000000000..8fe33b544a --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py @@ -0,0 +1,68 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +from typing import Any, List + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import TableRecResult +from paddlex.modules.table_recognition.model_list import MODELS + +from paddlex_hpi._utils.compat import get_compat_version +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor + + +class TablePredictor(CVPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.vision.ocr.StructureV2Table: + compat_version = get_compat_version() + if compat_version == "2.5" or self.model_name == "SLANet": + bbox_shape_type = "ori" + else: + bbox_shape_type = "pad" + with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".txt") as f: + pp_config = self.config["PostProcess"] + for lab in pp_config["character_dict"]: + f.write(lab + "\n") + f.flush() + model = ui.vision.ocr.StructureV2Table( + str(self.model_path), + str(self.params_path), + table_char_dict_path=f.name, + box_shape=bbox_shape_type, + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + table_result = self._create_table_result(data, ui_result) + results.append({"result": table_result}) + return results + + def _create_table_result(self, data: Data, ui_result: Any) -> TableRecResult: + dic = { + "input_path": data["input_path"], + "bbox": ui_result.table_boxes, + "structure": ui_result.table_structure, + } + return TableRecResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py b/libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py new file mode 100644 index 0000000000..7e9946415c --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py @@ -0,0 +1,167 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Any, Dict, List, Optional, Union + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import TextDetResult +from paddlex.modules.text_detection.model_list import CURVE_MODELS, MODELS + +from paddlex_hpi._utils.misc import parse_scale +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor, HPIParams + + +class TextDetPredictor(CVPredictor): + entities = MODELS + + def __init__( + self, + model_dir: Union[str, os.PathLike], + config: Optional[Dict[str, Any]] = None, + device: Optional[str] = None, + hpi_params: Optional[HPIParams] = None, + ) -> None: + super().__init__( + model_dir=model_dir, + config=config, + device=device, + hpi_params=hpi_params, + ) + + # HACK + @property + def _is_curve_model(self) -> bool: + return self.model_name in CURVE_MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> Union[ui.vision.ocr.DBDetector, ui.vision.ocr.DBCURVEDetector]: + if self._is_curve_model: + model = ui.vision.ocr.DBCURVEDetector( + str(self.model_path), + str(self.params_path), + runtime_option=option, + ) + else: + model = ui.vision.ocr.DBDetector( + str(self.model_path), + str(self.params_path), + runtime_option=option, + ) + self._config_ui_preprocessor(model) + self._config_ui_postprocessor(model) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_results = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + text_det_result = self._create_text_det_result(data, ui_result) + results.append({"result": text_det_result}) + return results + + def _config_ui_preprocessor(self, model: ui.vision.ocr.DBDetector) -> None: + pp_config = self.config["PreProcess"] + preprocessor = model.preprocessor + for item in pp_config["transform_ops"]: + op_name = next(iter(item)) + op_config = item[op_name] + # XXX: Default values copied from + # `paddlex.inference.models.TextDetPredictor` + if op_name == "DecodeImage": + if op_config["channel_first"]: + raise RuntimeError( + "`DecodeImage.channel_first` must be set to False." + ) + elif op_name == "DetResizeForTest": + preprocessor.max_side_len = op_config.get("resize_long", 960) + elif op_name == "NormalizeImage": + if "scale" in op_config and not ( + abs(parse_scale(op_config["scale"]) - 1 / 255) < 1e-9 + ): + raise RuntimeError("`NormalizeImage.scale` must be set to 1/255.") + if "channel_num" in op_config and op_config["channel_num"] != 3: + raise RuntimeError("`NormalizeImage.channel_num` must be set to 3.") + preprocessor.set_normalize( + op_config.get("mean", [0.485, 0.456, 0.406]), + op_config.get("std", [0.229, 0.224, 0.225]), + True, + ) + elif op_name == "ToCHWImage": + # Do nothing + pass + elif op_name == "DetLabelEncode": + pass + elif op_name == "KeepKeys": + pass + else: + raise RuntimeError(f"Unkown preprocessing operator: {op_name}") + + def _config_ui_postprocessor(self, model: ui.vision.ocr.DBDetector) -> None: + pp_config = self.config["PostProcess"] + # XXX: Default values copied from + # `paddlex.inference.models.TextDetPredictor` + changeable_params: Dict[str, Any] = { + "thresh": 0.3, + "box_thresh": 0.7, + "unclip_ratio": 2.0, + "score_mode": "fast", + "use_dilation": False, + } + unchangeable_params: Dict[str, Any] = { + "max_candidates": 1000, + "box_type": "quad", + } + if self._is_curve_model: + changeable_params["box_type"] = unchangeable_params.pop("box_type") + if "name" in pp_config and pp_config["name"] == "DBPostProcess": + for name in changeable_params: + if name in pp_config: + changeable_params[name] = pp_config[name] + for name, val in unchangeable_params.items(): + if name in pp_config and pp_config[name] != val: + raise RuntimeError( + f"`DBPostProcess.{name}` must be set to {repr(val)}." + ) + else: + raise RuntimeError("Invalid config") + postprocessor = model.postprocessor + postprocessor.det_db_thresh = changeable_params["thresh"] + postprocessor.det_db_box_thresh = changeable_params["box_thresh"] + postprocessor.det_db_unclip_ratio = changeable_params["unclip_ratio"] + postprocessor.use_dilation = changeable_params["use_dilation"] + postprocessor.det_db_score_mode = changeable_params["score_mode"] + if self._is_curve_model: + if changeable_params["box_type"] not in ("quad", "poly"): + raise RuntimeError("Invalid value of `DBPostProcess.box_type`.") + if changeable_params["box_type"] == "quad": + postprocessor.det_db_box_type = "bbox" + else: + postprocessor.det_db_box_type = "poly" + + def _create_text_det_result(self, data: Data, ui_result: Any) -> TextDetResult: + polys = [list(zip(*([iter(box)] * 2))) for box in ui_result.boxes] + # XXX: Currently, we cannot get scores from `ui_result`, so we + # temporarily use dummy scores here. + dummy_scores = [0.0 for _ in ui_result.boxes] + dic = { + "input_path": data["input_path"], + "dt_polys": polys, + "dt_scores": dummy_scores, + } + return TextDetResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py new file mode 100644 index 0000000000..636e2fba6b --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py @@ -0,0 +1,86 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +from typing import List + +import ultrainfer as ui +import numpy as np +from paddlex.inference.results import TextRecResult +from paddlex.modules.text_recognition.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import CVPredictor + + +class TextRecPredictor(CVPredictor): + entities = MODELS + + def _build_ui_model(self, option: ui.RuntimeOption) -> ui.vision.ocr.Recognizer: + with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".txt") as f: + pp_config = self.config["PostProcess"] + for lab in pp_config["character_dict"]: + f.write(lab + "\n") + f.flush() + model = ui.vision.ocr.Recognizer( + str(self.model_path), + str(self.params_path), + label_path=f.name, + runtime_option=option, + ) + self._config_ui_preprocessor(model) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + imgs = [np.ascontiguousarray(data["img"]) for data in batch_data] + ui_result = self._ui_model.batch_predict(imgs) + results: BatchData = [] + for data, text, score in zip(batch_data, ui_result.text, ui_result.rec_scores): + text_rec_result = self._create_text_rec_result(data, text, score) + results.append({"result": text_rec_result}) + return results + + def _config_ui_preprocessor(self, model: ui.vision.ocr.Recognizer) -> None: + pp_config = self.config["PreProcess"] + preprocessor = model.preprocessor + found_resize_op = False + for item in pp_config["transform_ops"]: + op_name = next(iter(item)) + op_config = item[op_name] + if op_name == "DecodeImage": + if op_config["channel_first"]: + raise RuntimeError( + "`DecodeImage.channel_first` must be set to False." + ) + elif op_name == "RecResizeImg": + preprocessor.rec_image_shape = op_config["image_shape"] + found_resize_op = True + elif op_name == "MultiLabelEncode": + pass + elif op_name == "KeepKeys": + pass + else: + raise RuntimeError(f"Unkown preprocessing operator: {op_name}") + if not found_resize_op: + raise RuntimeError("Could not find the config for `RecResizeImg`.") + + def _create_text_rec_result( + self, data: Data, text: str, score: float + ) -> TextRecResult: + dic = { + "input_path": data["input_path"], + "rec_text": text, + "rec_score": score, + } + return TextRecResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py new file mode 100644 index 0000000000..2d3bc03dc1 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py @@ -0,0 +1,58 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import pandas as pd +from paddlex.inference.results import TSAdResult +from paddlex.modules.ts_anomaly_detection.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import TSPredictor + + +class TSAdPredictor(TSPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.ts.anomalydetection.PyOnlyAnomalyDetectionModel: + model = ui.ts.anomalydetection.PyOnlyAnomalyDetectionModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + ts_data = [data["ts"] for data in batch_data] + ui_results = self._ui_model.batch_predict(ts_data) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + ts_ad_result = self._create_ts_ad_result(data, ui_result) + results.append({"result": ts_ad_result}) + return results + + def _create_ts_ad_result(self, data: Data, ui_result: Any) -> TSAdResult: + data_dict = { + ui_result.col_names[i]: ui_result.data[i] + for i in range(len(ui_result.col_names)) + } + anomaly = pd.DataFrame.from_dict(data_dict) + anomaly.index = ui_result.dates + anomaly.index.name = "timestamp" + dic = {"input_path": data["input_path"], "anomaly": anomaly} + return TSAdResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py new file mode 100644 index 0000000000..406ee67f1f --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py @@ -0,0 +1,55 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import pandas as pd +from paddlex.inference.results import TSClsResult +from paddlex.modules.ts_classification.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import TSPredictor + + +class TSClsPredictor(TSPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.ts.classification.PyOnlyClassificationModel: + model = ui.ts.classification.PyOnlyClassificationModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + ts_data = [data["ts"] for data in batch_data] + ui_results = self._ui_model.batch_predict(ts_data) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + ts_cls_result = self._create_ts_cls_result(data, ui_result) + results.append({"result": ts_cls_result}) + return results + + def _create_ts_cls_result(self, data: Data, ui_result: Any) -> TSClsResult: + classification = pd.DataFrame.from_dict( + {"classid": [ui_result.class_id], "score": [ui_result.score]} + ) + classification.index.name = "sample" + dic = {"input_path": data["input_path"], "classification": classification} + return TSClsResult(dic) diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py new file mode 100644 index 0000000000..c1168539f8 --- /dev/null +++ b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py @@ -0,0 +1,58 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, List + +import ultrainfer as ui +import pandas as pd +from paddlex.inference.results import TSFcResult +from paddlex.modules.ts_forecast.model_list import MODELS + +from paddlex_hpi._utils.typing import BatchData, Data +from paddlex_hpi.models.base import TSPredictor + + +class TSFcPredictor(TSPredictor): + entities = MODELS + + def _build_ui_model( + self, option: ui.RuntimeOption + ) -> ui.ts.forecasting.PyOnlyForecastingModel: + model = ui.ts.forecasting.PyOnlyForecastingModel( + str(self.model_path), + str(self.params_path), + str(self.config_path), + runtime_option=option, + ) + return model + + def _predict(self, batch_data: BatchData) -> BatchData: + ts_data = [data["ts"] for data in batch_data] + ui_results = self._ui_model.batch_predict(ts_data) + results: BatchData = [] + for data, ui_result in zip(batch_data, ui_results): + ts_fc_result = self._create_ts_fc_result(data, ui_result) + results.append({"result": ts_fc_result}) + return results + + def _create_ts_fc_result(self, data: Data, ui_result: Any) -> TSFcResult: + data_dict = { + ui_result.col_names[i]: ui_result.data[i] + for i in range(len(ui_result.col_names)) + } + forecast = pd.DataFrame.from_dict(data_dict) + forecast.index = ui_result.dates + forecast.index.name = "date" + dic = {"input_path": data["input_path"], "forecast": forecast} + return TSFcResult(dic) diff --git a/libs/paddlex-hpi/test_requirements.txt b/libs/paddlex-hpi/test_requirements.txt new file mode 100644 index 0000000000..314e76a4f8 --- /dev/null +++ b/libs/paddlex-hpi/test_requirements.txt @@ -0,0 +1,2 @@ +pytest >= 8 +shapely >= 2 diff --git a/libs/paddlex-hpi/tests/__init__.py b/libs/paddlex-hpi/tests/__init__.py new file mode 100644 index 0000000000..59372f9379 --- /dev/null +++ b/libs/paddlex-hpi/tests/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/paddlex-hpi/tests/models/__init__.py b/libs/paddlex-hpi/tests/models/__init__.py new file mode 100644 index 0000000000..59372f9379 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/paddlex-hpi/tests/models/base.py b/libs/paddlex-hpi/tests/models/base.py new file mode 100644 index 0000000000..511791d6a0 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/base.py @@ -0,0 +1,117 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import shutil +import tempfile +from pathlib import Path +from types import GeneratorType + +import pytest +from tests.testing_utils.download import download, download_and_extract +from tests.testing_utils.misc import get_filename + +NUM_INPUT_FILES = 10 +DEVICES = ["cpu", "gpu:0"] +BATCH_SIZES = [1, 2, 4] + + +class BaseTestPredictor(object): + @property + def model_dir(self): + raise NotImplementedError + + @property + def model_url(self): + raise NotImplementedError + + @property + def input_data_url(self): + raise NotImplementedError + + @property + def expected_result_url(self): + raise NotImplementedError + + @property + def predictor_cls(self): + raise NotImplementedError + + @pytest.fixture(scope="class") + def data_dir(self): + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + @pytest.fixture(scope="class") + def model_path(self, data_dir): + download_and_extract(self.model_url, data_dir, "model") + yield data_dir / "model" + + @pytest.fixture(scope="class") + def input_data_path(self, data_dir): + input_data_path = (data_dir / get_filename(self.input_data_url)).with_stem( + "test" + ) + download(self.input_data_url, input_data_path) + yield input_data_path + + @pytest.fixture(scope="class") + def input_data_dir(self, data_dir, input_data_path): + input_data_dir = data_dir / "input_data" + input_data_dir.mkdir() + for i in range(NUM_INPUT_FILES): + shutil.copy( + input_data_path, + (input_data_dir / f"test_{i}").with_suffix(input_data_path.suffix), + ) + yield input_data_dir + + @pytest.fixture(scope="class") + def expected_result(self, data_dir): + expected_result_path = data_dir / "expected.json" + download(self.expected_result_url, expected_result_path) + with open(expected_result_path, "r", encoding="utf-8") as f: + expected_result = json.load(f) + yield expected_result + + @pytest.mark.parametrize("device", DEVICES) + def test___call__single_input_data( + self, model_path, input_data_path, device, expected_result + ): + predictor = self.predictor_cls(model_path, device=device) + output = predictor(str(input_data_path)) + self._check_output(output, expected_result, 1) + output = predictor([str(input_data_path), str(input_data_path)]) + self._check_output(output, expected_result, 2) + + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("batch_size", BATCH_SIZES) + def test___call__input_data_dir( + self, model_path, input_data_dir, device, batch_size, expected_result + ): + predictor = self.predictor_cls(model_path, device=device) + predictor.set_predictor(batch_size=batch_size) + output = predictor(str(input_data_dir)) + self._check_output(output, expected_result, NUM_INPUT_FILES) + + def _check_output(self, output, expected_result, expected_num_results): + assert isinstance(output, GeneratorType) + # Note that this exhausts the generator + output = list(output) + assert len(output) == expected_num_results + for result in output: + self._check_result(result, expected_result) + + def _check_result(self, result, expected_result): + raise NotImplementedError diff --git a/libs/paddlex-hpi/tests/models/test_anomaly_detection.py b/libs/paddlex-hpi/tests/models/test_anomaly_detection.py new file mode 100644 index 0000000000..87dd2b2527 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_anomaly_detection.py @@ -0,0 +1,49 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddlex.inference.results import SegResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import UadPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/uad_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/uad_input.png" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/uad_result.json" + + +class TestUadPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return UadPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, SegResult) + assert set(result) == set(expected_result) + pred = result["pred"] + expected_pred = np.array(expected_result["pred"], dtype=np.int32) + assert pred.shape == expected_pred.shape + assert (pred != expected_pred).sum() / pred.size < 0.01 diff --git a/libs/paddlex-hpi/tests/models/test_formula_recognition.py b/libs/paddlex-hpi/tests/models/test_formula_recognition.py new file mode 100644 index 0000000000..d4cca67b21 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_formula_recognition.py @@ -0,0 +1,45 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlex.inference.results import FormulaRecResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import LaTeXOCRPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/latex_ocr_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/latex_ocr_input.png" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/latex_ocr_result.json" + + +class TestLaTeXOCRPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return LaTeXOCRPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, FormulaRecResult) + assert set(result) == set(expected_result) + assert result["rec_text"] == expected_result["rec_text"] diff --git a/libs/paddlex-hpi/tests/models/test_general_recognition.py b/libs/paddlex-hpi/tests/models/test_general_recognition.py new file mode 100644 index 0000000000..4b7facbc2a --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_general_recognition.py @@ -0,0 +1,49 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlex.inference.results import BaseResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import ShiTuRecPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/shitu_rec_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/shitu_rec_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/shitu_rec_result.json" + + +class TestShiTuRecPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return ShiTuRecPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, BaseResult) + assert set(result) == set(expected_result) + expected_result = expected_result["rec_feature"] + result = result["rec_feature"].tolist() + assert sum([abs(x - y) for x, y in zip(result, expected_result)]) < 0.001 * len( + result + ) diff --git a/libs/paddlex-hpi/tests/models/test_image_classification.py b/libs/paddlex-hpi/tests/models/test_image_classification.py new file mode 100644 index 0000000000..6189d15f5f --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_image_classification.py @@ -0,0 +1,53 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddlex.inference.results import TopkResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import ClasPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/clas_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/clas_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/clas_result.json" + + +class TestClasPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return ClasPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, TopkResult) + assert set(result) == set(expected_result) + assert result["class_ids"] == expected_result["class_ids"] + assert np.allclose( + np.array(result["scores"]), + np.array(expected_result["scores"]), + rtol=1e-2, + atol=1e-3, + ) + assert result["label_names"] == expected_result["label_names"] diff --git a/libs/paddlex-hpi/tests/models/test_image_unwarping.py b/libs/paddlex-hpi/tests/models/test_image_unwarping.py new file mode 100644 index 0000000000..d5fb0e9df9 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_image_unwarping.py @@ -0,0 +1,51 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddlex.inference.results import DocTrResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import WarpPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/warp_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/warp_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/warp_result.json" + + +class TestWarpPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return WarpPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, DocTrResult) + assert set(result) == set(expected_result) + assert np.allclose( + result["doctr_img"], + np.array(expected_result["doctr_img"]), + rtol=1e-2, + atol=2, + ) diff --git a/libs/paddlex-hpi/tests/models/test_instance_segmentation.py b/libs/paddlex-hpi/tests/models/test_instance_segmentation.py new file mode 100644 index 0000000000..faa99bef0f --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_instance_segmentation.py @@ -0,0 +1,54 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlex.inference.results import InstanceSegResult +from tests.models.base import BaseTestPredictor +from tests.testing_utils.cv import compare_det_results + +from paddlex_hpi.models import InstanceSegPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/instance_seg_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/instance_seg_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/instance_seg_result.json" + + +class TestInstanceSegPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return InstanceSegPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, InstanceSegResult) + assert set(result) == set(expected_result) + # TODO: Check masks + compare_det_results( + [obj["coordinate"] for obj in result["boxes"]], + [obj["coordinate"] for obj in expected_result["boxes"]], + labels1=[obj["cls_id"] for obj in result["boxes"]], + labels2=[obj["cls_id"] for obj in expected_result["boxes"]], + scores1=[obj["score"] for obj in result["boxes"]], + scores2=[obj["score"] for obj in expected_result["boxes"]], + ) diff --git a/libs/paddlex-hpi/tests/models/test_multilabel_classification.py b/libs/paddlex-hpi/tests/models/test_multilabel_classification.py new file mode 100644 index 0000000000..729e95a62b --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_multilabel_classification.py @@ -0,0 +1,53 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddlex.inference.results import MLClassResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import MLClasPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ml_clas_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ml_clas_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ml_clas_result.json" + + +class TestMLClasPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return MLClasPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, MLClassResult) + assert set(result) == set(expected_result) + assert result["class_ids"] == expected_result["class_ids"] + assert np.allclose( + np.array(result["scores"]), + np.array(expected_result["scores"]), + rtol=1e-2, + atol=1e-3, + ) + assert result["label_names"] == expected_result["label_names"] diff --git a/libs/paddlex-hpi/tests/models/test_object_detection.py b/libs/paddlex-hpi/tests/models/test_object_detection.py new file mode 100644 index 0000000000..4da6c8049d --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_object_detection.py @@ -0,0 +1,53 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlex.inference.results import DetResult +from tests.models.base import BaseTestPredictor +from tests.testing_utils.cv import compare_det_results + +from paddlex_hpi.models import DetPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/det_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/det_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/det_result.json" + + +class TestDetPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return DetPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, DetResult) + assert set(result) == set(expected_result) + compare_det_results( + [obj["coordinate"] for obj in result["boxes"]], + [obj["coordinate"] for obj in expected_result["boxes"]], + labels1=[obj["cls_id"] for obj in result["boxes"]], + labels2=[obj["cls_id"] for obj in expected_result["boxes"]], + scores1=[obj["score"] for obj in result["boxes"]], + scores2=[obj["score"] for obj in expected_result["boxes"]], + ) diff --git a/libs/paddlex-hpi/tests/models/test_semantic_segmentation.py b/libs/paddlex-hpi/tests/models/test_semantic_segmentation.py new file mode 100644 index 0000000000..1954dbf3c1 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_semantic_segmentation.py @@ -0,0 +1,49 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddlex.inference.results import SegResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import SegPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/seg_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/seg_input.png" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/seg_result.json" + + +class TestSegPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return SegPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, SegResult) + assert set(result) == set(expected_result) + pred = result["pred"] + expected_pred = np.array(expected_result["pred"], dtype=np.int32) + assert pred.shape == expected_pred.shape + assert (pred != expected_pred).sum() / pred.size < 0.01 diff --git a/libs/paddlex-hpi/tests/models/test_table_recognition.py b/libs/paddlex-hpi/tests/models/test_table_recognition.py new file mode 100644 index 0000000000..29b8be31ea --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_table_recognition.py @@ -0,0 +1,59 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlex.inference.results import TableRecResult +from tests.models.base import BaseTestPredictor +from tests.testing_utils.cv import compare_det_results + +from paddlex_hpi.models import TablePredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/table_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/table_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/table_result.json" + + +class TestTablePredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return TablePredictor + + def _check_result(self, result, expected_result): + def _unflatten_poly(poly): + return [ + [poly[0], poly[1]], + [poly[2], poly[3]], + [poly[4], poly[5]], + [poly[6], poly[7]], + ] + + assert isinstance(result, TableRecResult) + assert set(result) == set(expected_result) + compare_det_results( + [_unflatten_poly(poly) for poly in result["bbox"]], + [_unflatten_poly(poly) for poly in expected_result["bbox"]], + labels1=result["structure"], + labels2=expected_result["structure"], + ) diff --git a/libs/paddlex-hpi/tests/models/test_text_detection.py b/libs/paddlex-hpi/tests/models/test_text_detection.py new file mode 100644 index 0000000000..9b014b3a38 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_text_detection.py @@ -0,0 +1,47 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlex.inference.results import TextDetResult +from tests.models.base import BaseTestPredictor +from tests.testing_utils.cv import compare_det_results + +from paddlex_hpi.models import TextDetPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_det_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_det_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_det_result.json" + + +class TestTextDetPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return TextDetPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, TextDetResult) + assert set(result) == set(expected_result) + compare_det_results(result["dt_polys"], expected_result["dt_polys"]) + # Currently no checks for scores diff --git a/libs/paddlex-hpi/tests/models/test_text_recognition.py b/libs/paddlex-hpi/tests/models/test_text_recognition.py new file mode 100644 index 0000000000..08eb79425a --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_text_recognition.py @@ -0,0 +1,52 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddlex.inference.results import TextRecResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import TextRecPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_rec_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_rec_input.jpg" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_rec_result.json" + + +class TestTextRecPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return TextRecPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, TextRecResult) + assert set(result) == set(expected_result) + assert result["rec_text"] == expected_result["rec_text"] + assert np.allclose( + np.array(result["rec_score"]), + np.array(expected_result["rec_score"]), + rtol=1e-2, + atol=1e-3, + ) diff --git a/libs/paddlex-hpi/tests/models/test_ts_ad.py b/libs/paddlex-hpi/tests/models/test_ts_ad.py new file mode 100644 index 0000000000..760c11ca10 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_ts_ad.py @@ -0,0 +1,49 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from paddlex.inference.results import TSAdResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import TSAdPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_ad_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_ad_input.csv" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_ad_result.json" + + +class TestTSAdPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return TSAdPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, TSAdResult) + assert set(result) == set(expected_result) + expected_result = json.loads(expected_result["anomaly"]) + result = result["anomaly"].to_dict(orient="records") + assert result == expected_result diff --git a/libs/paddlex-hpi/tests/models/test_ts_cls.py b/libs/paddlex-hpi/tests/models/test_ts_cls.py new file mode 100644 index 0000000000..3cd42a74f6 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_ts_cls.py @@ -0,0 +1,50 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from paddlex.inference.results import TSClsResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import TSClsPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_cls_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_cls_input.csv" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_cls_result.json" + + +class TestTSClsPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return TSClsPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, TSClsResult) + assert set(result) == set(expected_result) + expected_result = json.loads(expected_result["classification"]) + result = result["classification"].to_dict(orient="records") + assert result[0]["classid"] == expected_result[0]["classid"] + assert round(result[0]["score"], 3) == round(expected_result[0]["score"], 3) diff --git a/libs/paddlex-hpi/tests/models/test_ts_fc.py b/libs/paddlex-hpi/tests/models/test_ts_fc.py new file mode 100644 index 0000000000..8203a5cbe0 --- /dev/null +++ b/libs/paddlex-hpi/tests/models/test_ts_fc.py @@ -0,0 +1,51 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from paddlex.inference.results import TSFcResult +from tests.models.base import BaseTestPredictor + +from paddlex_hpi.models import TSFcPredictor + +MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_fc_model.zip" +INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_fc_input.csv" +EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_fc_result.json" + + +class TestTSFcPredictor(BaseTestPredictor): + @property + def model_url(self): + return MODEL_URL + + @property + def input_data_url(self): + return INPUT_DATA_URL + + @property + def expected_result_url(self): + return EXPECTED_RESULT_URL + + @property + def predictor_cls(self): + return TSFcPredictor + + def _check_result(self, result, expected_result): + assert isinstance(result, TSFcResult) + assert set(result) == set(expected_result) + expected_result = json.loads(expected_result["forecast"]) + expected_result = [{"OT": round(i["OT"], 3)} for i in expected_result] + result = result["forecast"].to_dict(orient="records") + result = [{"OT": round(i["OT"], 3)} for i in result] + assert result == expected_result diff --git a/libs/paddlex-hpi/tests/testing_utils/__init__.py b/libs/paddlex-hpi/tests/testing_utils/__init__.py new file mode 100644 index 0000000000..59372f9379 --- /dev/null +++ b/libs/paddlex-hpi/tests/testing_utils/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/paddlex-hpi/tests/testing_utils/cv.py b/libs/paddlex-hpi/tests/testing_utils/cv.py new file mode 100644 index 0000000000..221ad34345 --- /dev/null +++ b/libs/paddlex-hpi/tests/testing_utils/cv.py @@ -0,0 +1,96 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from shapely.geometry import Polygon + + +def compute_iou(box_or_poly1, box_or_poly2): + if isinstance(box_or_poly1[0], list): + poly1 = box_or_poly1 + poly2 = box_or_poly2 + + poly1 = Polygon(poly1) + poly2 = Polygon(poly2) + + inter_area = poly1.intersection(poly2).area + union_area = poly1.union(poly2).area + + iou = inter_area / (union_area + 1e-9) + + return iou + else: + box1 = box_or_poly1 + box2 = box_or_poly2 + + x11, y11, x12, y12 = box1 + x21, y21, x22, y22 = box2 + + xi1 = max(x11, x21) + yi1 = max(y11, y21) + xi2 = min(x12, x22) + yi2 = min(y12, y22) + + inter_area = max(0, xi2 - xi1 + 1) * max(0, yi2 - yi1 + 1) + box1_area = (x12 - x11 + 1) * (y12 - y11 + 1) + box2_area = (x22 - x21 + 1) * (y22 - y21 + 1) + union_area = box1_area + box2_area - inter_area + + iou = inter_area / (union_area + 1e-9) + + return iou + + +def compare_det_results( + boxes_or_polys1, + boxes_or_polys2, + *, + labels1=None, + labels2=None, + scores1=None, + scores2=None, + iou_tol=0.1, + score_tol=1e-3, +): + compare_labels = labels1 is not None + compare_scores = scores1 is not None + + assert len(boxes_or_polys1) == len(boxes_or_polys2) + if compare_labels: + assert len(labels1) == len(labels2) + if compare_scores: + assert len(scores1) == len(scores2) + + boxes_or_polys2 = boxes_or_polys2.copy() + if labels2 is not None: + labels2 = labels2.copy() + if scores2 is not None: + scores2 = scores2.copy() + for i, box_or_poly1 in enumerate(boxes_or_polys1): + j = 0 + max_iou = 0 + for k, box_or_poly2 in enumerate(boxes_or_polys2): + iou = compute_iou(box_or_poly1, box_or_poly2) + if iou > max_iou: + max_iou = iou + j = k + assert max_iou > 1 - iou_tol + if compare_labels: + assert labels1[i] == labels2[j] + if compare_scores: + assert abs(scores1[i] - scores2[j]) < score_tol + del boxes_or_polys2[j] + if compare_labels: + del labels2[j] + if compare_scores: + del scores2[j] diff --git a/libs/paddlex-hpi/tests/testing_utils/download.py b/libs/paddlex-hpi/tests/testing_utils/download.py new file mode 100644 index 0000000000..e95e46e698 --- /dev/null +++ b/libs/paddlex-hpi/tests/testing_utils/download.py @@ -0,0 +1,107 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import tarfile +import tempfile +import zipfile +from pathlib import Path +from urllib.request import urlopen + + +def _download(url, save_path): + with urlopen(url) as r: + with open(save_path, "wb") as file: + shutil.copyfileobj(r, file) + + +def _extract_zip_file(file_path, extd_dir): + with zipfile.ZipFile(file_path, "r") as f: + file_list = f.namelist() + for file in file_list: + f.extract(file, extd_dir) + + +def _extract_tar_file(file_path, extd_dir): + with tarfile.open(file_path, "r:*") as f: + file_list = f.getnames() + for file in file_list: + f.extract(file, extd_dir) + + +def _extract(file_path, extd_dir): + if zipfile.is_zipfile(file_path): + handler = _extract_zip_file + elif tarfile.is_tarfile(file_path): + handler = _extract_tar_file + else: + raise ValueError("Unsupported file format") + handler(file_path, extd_dir) + + +def _remove_if_exists(path): + if path.exists(): + if path.is_dir(): + shutil.rmtree(path) + else: + path.unlink() + + +def download(url, save_path, overwrite=False): + save_path.parent.mkdir(exist_ok=True) + if overwrite: + _remove_if_exists(save_path) + if not save_path.exists(): + _download(url, save_path) + + +def extract(file_path, extd_dir): + return _extract(file_path, extd_dir) + + +def download_and_extract(url, save_dir, dst_name, overwrite=False, no_interm_dir=True): + save_dir = Path(save_dir) + save_dir.mkdir(exist_ok=True) + dst_path = save_dir / dst_name + if overwrite: + _remove_if_exists(dst_path) + + if not dst_path.exists(): + with tempfile.TemporaryDirectory() as td: + td = Path(td) + arc_file_path = td / url.split("/")[-1] + extd_dir = arc_file_path.stem + _download(url, arc_file_path) + tmp_extd_dir = td / "extracted" + _extract(arc_file_path, tmp_extd_dir) + if no_interm_dir: + paths = list(tmp_extd_dir.iterdir()) + if len(paths) == 1: + sp = paths[0] + else: + sp = tmp_extd_dir / dst_name + if not sp.exists(): + raise FileNotFoundError + dp = save_dir / sp.name + if sp.is_dir(): + shutil.copytree(sp, dp) + else: + shutil.copyfile(sp, dp) + extd_file = dp + else: + shutil.copytree(tmp_extd_dir, extd_dir) + extd_file = extd_dir + + if not dst_path.exists() or not extd_file.samefile(dst_path): + shutil.move(extd_file, dst_path) diff --git a/libs/paddlex-hpi/tests/testing_utils/misc.py b/libs/paddlex-hpi/tests/testing_utils/misc.py new file mode 100644 index 0000000000..13d9790c69 --- /dev/null +++ b/libs/paddlex-hpi/tests/testing_utils/misc.py @@ -0,0 +1,19 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from urllib.parse import urlparse + + +def get_filename(url): + return urlparse(url).path.split("/")[-1] diff --git a/libs/ultrainfer/.gitignore b/libs/ultrainfer/.gitignore new file mode 100644 index 0000000000..4742e191f5 --- /dev/null +++ b/libs/ultrainfer/.gitignore @@ -0,0 +1,54 @@ +build +cmake-build-debug +cmake-build-release +.vscode +UltraInfer.cmake +build-debug.sh +*dist +ultrainfer.egg-info +ultrainfer_python.egg-info +ultrainfer_gpu_python.egg-info +.setuptools-cmake-build +ultrainfer/version.py +ultrainfer/core/config.h +python/ultrainfer/c_lib_wrap.py +python/ultrainfer/LICENSE* +python/build_cpu.sh +python/ultrainfer/ThirdPartyNotices* +*.so* +python/ultrainfer/libs/third_libs +ultrainfer/core/config.h +ultrainfer/pybind/main.cc +python/ultrainfer/libs/lib* +python/ultrainfer/libs/third_libs +__pycache__ +python/scripts/process_libraries.py +.vs +.idea +.DS_Store +miniprogram_npm +node_modules +.DS_Store +dist +etc +lib +dist-ssr +coverage +*.local +yalc.* +.yalc +examples/vision/collect_quantize_cc.sh +examples/vision/tests_quantize +ultrainfer/LICENSE +ultrainfer/ThirdPartyNotices.txt +UltraInferCSharp.cmake +python/ultrainfer/code_version.py +*.pdmodel +*.pdiparams +*.pdiparams.info +log.txt +benchmark/paddlex/build +benchmark/cpp/build +!paddlex/paddlex3.0/serving/libs/**/*.so* +TensorRT* +third_party diff --git a/libs/ultrainfer/CMakeLists.txt b/libs/ultrainfer/CMakeLists.txt new file mode 100755 index 0000000000..0f1072a37d --- /dev/null +++ b/libs/ultrainfer/CMakeLists.txt @@ -0,0 +1,735 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROJECT(ultrainfer C CXX) +CMAKE_MINIMUM_REQUIRED(VERSION 3.10) + + +option(CSRCS_DIR_NAME "Name of source code directory") +option(LIBRARY_NAME "Name of build library name") +option(PY_LIBRARY_NAME "Name of build python library name") +if(NOT CSRCS_DIR_NAME) + set(CSRCS_DIR_NAME ".") +endif() +if(NOT LIBRARY_NAME) + set(LIBRARY_NAME "ultrainfer") +endif() +if(NOT PY_LIBRARY_NAME) + set(PY_LIBRARY_NAME "ultrainfer_main") +endif() + +include(ExternalProject) +set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs) +set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/third_party) + +add_subdirectory(${CSRCS_DIR_NAME}/ultrainfer) +include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake) + +# Set C++11 as standard for the whole project +if(NOT MSVC) + if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 11) + endif() + set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3") + if(NEED_ABI0) + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + else() + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) + endif() +endif(NOT MSVC) + +include(${PROJECT_SOURCE_DIR}/cmake/build_tools.cmake) +if(UNIX AND (NOT APPLE) AND (NOT WITH_TIMVX)) + download_patchelf() + set(PATCHELF_EXE ${THIRD_PARTY_PATH}/patchelf/bin/patchelf) +endif() + + +############################# Basic Options for UltraInfer ################################ +option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF) +option(WITH_IPU "Whether WITH_IPU=ON, will enable paddle-infernce-ipu" OFF) +option(WITH_OPENCL "Whether WITH_OPENCL=ON, will enable paddle-lite-gpu" OFF) +option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF) +option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF) +option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF) +option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF) +option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF) +option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF) +option(ENABLE_SOPHGO_BACKEND "Whether to enable SOPHON backend." OFF) +option(ENABLE_TVM_BACKEND "Whether to enable TVM backend." OFF) +option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF) +option(ENABLE_HORIZON_BACKEND "Whether to enable HORIZON backend." OFF) +option(ENABLE_VISION "Whether to enable vision models usage." OFF) +option(ENABLE_TEXT "Whether to enable text models usage." OFF) +option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF) +option(ENABLE_CVCUDA "Whether to enable NVIDIA CV-CUDA to boost image preprocess." OFF) +option(ENABLE_BENCHMARK "Whether to enable Benchmark mode." OFF) +option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF) +option(WITH_DIRECTML "Whether to compile for onnxruntime DirectML deploy." OFF) +option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF) +option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF) +option(WITH_TESTING "Whether to compile with unittest." OFF) +option(WITH_CAPI "Whether to compile with c api." OFF) +option(WITH_CSHARPAPI "Whether to compile with c# api" OFF) + +option(BUILD_EXAMPLES "Whether to build ultrainfer with vision examples" OFF) +option(BUILD_PADDLE2ONNX "Whether to build paddle2onnx from sources" OFF) + +option(BUILD_FD_TRITON_BACKEND "Whether to compile as Triton Inference Server backend." OFF) + +######################### Paths to user's custom libraries directory ##################### +set(CUDA_DIRECTORY "" CACHE PATH "If build tensorrt backend, need to define path of cuda library.") +set(TRT_DIRECTORY "" CACHE PATH "If build tensorrt backend, need to define path of tensorrt library.") +set(ORT_DIRECTORY "" CACHE PATH "User can specify the installed onnxruntime directory.") +set(OPENCV_DIRECTORY "" CACHE PATH "User can specify the installed opencv directory.") +set(OPENVINO_DIRECTORY "" CACHE PATH "User can specify the installed openvino directory.") + +# Whether to build ultrainfer on device Nvidia Jetson +# Only support CPU Inference & GPU(TensorRT) Inference Now +option(BUILD_ON_JETSON "Whether to build ultrainfer on Nvidia Jetson" OFF) +if(BUILD_ON_JETSON) + set(WITH_GPU ON) + set(ENABLE_TRT_BACKEND ON) + set(ENABLE_ORT_BACKEND ON) +endif() + +# config GIT_URL with github mirrors to speed up dependent repos clone +option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL}) +if(NOT GIT_URL) + set(GIT_URL "https://github.com") +endif() + +# check build options +include(${PROJECT_SOURCE_DIR}/cmake/check.cmake) + +if(WIN32) + add_definitions(-DYAML_CPP_DLL) + set(YAML_BUILD_SHARED_LIBS ON) + set(YAML_CPP_INSTALL ON) + set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) +endif() + +if(NOT CUDA_DIRECTORY) + set(CUDA_DIRECTORY "/usr/local/cuda") +endif() + +option(BUILD_ULTRAINFER_PYTHON "if build python lib for ultrainfer." OFF) + +set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}") +include_directories(${HEAD_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +if (WITH_TIMVX) + include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake) +endif() + +if (WITH_ASCEND) + include(${PROJECT_SOURCE_DIR}/cmake/ascend.cmake) +endif() + +if (WITH_KUNLUNXIN) + include(${PROJECT_SOURCE_DIR}/cmake/kunlunxin.cmake) +endif() + +if(WITH_IPU) + if(NOT ENABLE_PADDLE_BACKEND) + message("Will force to set ENABLE_PADDLE_BACKEND when build with GraphCore IPU.") + set(ENABLE_PADDLE_BACKEND ON) + endif() + add_definitions(-DWITH_IPU) +endif() + +# Check for macOS architecture +get_osx_architecture() + +##################################### Building: UltraInfer C++ SDK ####################################### +add_definitions(-DULTRAINFER_LIB) +# set CMAKE_BUILD_TYPE to Release +add_definitions(-DCMAKE_BUILD_TYPE=Release) +# configure files before glob sources. +configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/core/config.h) +configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pybind/main.cc) +file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/*.cc) +file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/ort/*.cc) +file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/paddle/*.cc) +file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/poros/*.cc) +file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tensorrt/*.cpp) +file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/openvino/*.cc) +file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/rknpu2/*.cc) +file(GLOB_RECURSE DEPLOY_HORIZON_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/horizon/*.cc) +file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/sophgo/*.cc) +file(GLOB_RECURSE DEPLOY_TVM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tvm/*.cc) +file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/lite/*.cc) +file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pipeline/*.cc) +file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/vision/*.cc) +file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/text/*.cc) +file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/*_pybind.cc) +file(GLOB_RECURSE DEPLOY_PADDLE_CUSTOM_OP_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/paddle/ops/*.cc) +if(WITH_GPU) + file(GLOB_RECURSE DEPLOY_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/*.cu) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_CUDA_SRCS}) + file(GLOB_RECURSE DEPLOY_PADDLE_CUSTOM_OP_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/paddle/ops/*.cu) + list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_CUDA_SRCS}) + file(GLOB_RECURSE DEPLOY_VISION_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/vision/*.cu) + list(APPEND DEPLOY_VISION_SRCS ${DEPLOY_VISION_CUDA_SRCS}) + file(GLOB_RECURSE DEPLOY_TEXT_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/text/*.cu) + list(APPEND DEPLOY_TEXT_SRCS ${DEPLOY_TEXT_CUDA_SRCS}) +endif() +list(REMOVE_ITEM DEPLOY_PADDLE_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_SRCS}) +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} + ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} + ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} + ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} + ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS} + ${DEPLOY_SOPHGO_SRCS} + ${DEPLOY_HORIZON_SRCS} ${DEPLOY_TVM_SRCS} + ${DEPLOY_PADDLE_CUSTOM_OP_SRCS}) + + +set(DEPEND_LIBS "") + +file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" ULTRAINFER_VERSION) +string(STRIP "${ULTRAINFER_VERSION}" ULTRAINFER_VERSION) + +# Add eigen lib +download_eigen() +include_directories(${PROJECT_SOURCE_DIR}/third_party/eigen) +if(WIN32) + add_definitions(-DEIGEN_STRONG_INLINE=inline) +endif() + +# sw(sunway) not support thread_local semantic +if(WITH_SW) + add_definitions(-DEIGEN_AVOID_THREAD_LOCAL) +endif() + +if(ENABLE_ORT_BACKEND) + set(ENABLE_PADDLE2ONNX ON) + add_definitions(-DENABLE_ORT_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake) + list(APPEND DEPEND_LIBS external_onnxruntime) +endif() + +if(ENABLE_LITE_BACKEND) + add_definitions(-DENABLE_LITE_BACKEND) + include(${PROJECT_SOURCE_DIR}/cmake/paddlelite.cmake) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_LITE_SRCS}) + list(APPEND DEPEND_LIBS external_paddle_lite) +endif() + +if(ENABLE_PADDLE_BACKEND) + set(ENABLE_PADDLE2ONNX ON) + add_definitions(-DENABLE_PADDLE_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake) + list(APPEND DEPEND_LIBS external_paddle_inference) + if(external_dnnl_FOUND) + list(APPEND DEPEND_LIBS external_dnnl external_omp) + endif() + if(external_ort_FOUND) + list(APPEND DEPEND_LIBS external_p2o external_ort) + endif() + if(PADDLEINFERENCE_API_CUSTOM_OP) + set_paddle_custom_ops_compatible_policy() + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_SRCS}) + if(WITH_GPU) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_CUDA_SRCS}) + endif() + endif() +endif() + +if(ENABLE_OPENVINO_BACKEND) + set(ENABLE_PADDLE2ONNX ON) + add_definitions(-DENABLE_OPENVINO_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake) +endif() + +if(ENABLE_RKNPU2_BACKEND) + add_definitions(-DENABLE_RKNPU2_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_RKNPU2_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/rknpu2.cmake) + list(APPEND DEPEND_LIBS ${RKNN_RT_LIB}) +endif() + +if(ENABLE_HORIZON_BACKEND) + add_definitions(-DENABLE_HORIZON_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_HORIZON_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/horizon.cmake) + list(APPEND DEPEND_LIBS ${BPU_libs}) +endif() + +if(ENABLE_TVM_BACKEND) + set(CMAKE_CXX_STANDARD 17) + add_definitions(-DENABLE_TVM_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TVM_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/tvm.cmake) + list(APPEND DEPEND_LIBS ${TVM_RUNTIME_LIB}) +endif() + +if(ENABLE_SOPHGO_BACKEND) + add_definitions(-DENABLE_SOPHGO_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_SOPHGO_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/sophgo.cmake) + list(APPEND DEPEND_LIBS ${SOPHGO_RT_LIB}) +endif() + +if(ENABLE_POROS_BACKEND) + set(CMAKE_CXX_STANDARD 14) + add_definitions(-DENABLE_POROS_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_POROS_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/poros.cmake) + list(APPEND DEPEND_LIBS external_poros) + set(PYTHON_MINIMUM_VERSION 3.6) + set(PYTORCH_MINIMUM_VERSION 1.9) + set(TENSORRT_MINIMUM_VERSION 8.0) + # find python3 + find_package(Python3 ${PYTHON_MINIMUM_VERSION} REQUIRED COMPONENTS Interpreter Development) + message(STATUS "Found Python: ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}.${Python3_VERSION_PATCH}") + + if (NOT Python3_SITELIB) + message(FATAL_ERROR "site-packages not found. ") + else () + message(STATUS "site-packages: ${Python3_SITELIB}") + endif () + include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/poros/common) + # find trt + if(NOT WITH_GPU) + message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF") + endif() + if(NOT TRT_DIRECTORY) + message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must define -DTRT_DIRECTORY, e.g -DTRT_DIRECTORY=/Downloads/TensorRT-8.4") + endif() + include_directories(${TRT_DIRECTORY}/include) + find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib) + find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib) + find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib) + list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB}) + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt") + endif() + if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + endif() + find_package(Python COMPONENTS Interpreter Development REQUIRED) + message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...") + execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib) +endif() + +if(WITH_GPU) + add_definitions(-DWITH_GPU) + include_directories(${CUDA_DIRECTORY}/include) + if(WIN32) + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64) + add_definitions(-DENABLE_NVJPEG) + else() + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) + if(NOT BUILD_ON_JETSON) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64) + add_definitions(-DENABLE_NVJPEG) + endif() + endif() + list(APPEND DEPEND_LIBS ${CUDA_LIB} ${NVJPEG_LIB}) + + # build CUDA source files in ultrainfer, CUDA source files include CUDA preprocessing, TRT plugins, etc. + enable_language(CUDA) + message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: " + "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}") + include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake) +endif() + +if(WITH_OPENCL) + add_definitions(-DWITH_OPENCL) +endif() + +if(ENABLE_TRT_BACKEND) + set(ENABLE_PADDLE2ONNX ON) + if(APPLE OR IOS) + message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios os, please set -DENABLE_TRT_BACKEND=OFF.") + endif() + if(NOT WITH_GPU) + message(FATAL_ERROR "While -DENABLE_TRT_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF") + endif() + if(NOT BUILD_ON_JETSON) + if(NOT TRT_DIRECTORY) + set(TRT_INC_DIR /usr/include/x86_64-linux-gnu/) + set(TRT_LIB_DIR /usr/lib/x86_64-linux-gnu/) + endif() + endif() + if(BUILD_ON_JETSON) + set(TRT_INC_DIR /usr/include/aarch64-linux-gnu/) + set(TRT_LIB_DIR /usr/lib/aarch64-linux-gnu/) + else() + set(TRT_INC_DIR /usr/include/x86_64-linux-gnu/) + set(TRT_LIB_DIR /usr/lib/x86_64-linux-gnu/) + if(TRT_DIRECTORY) + set(TRT_INC_DIR ${TRT_DIRECTORY}/include) + set(TRT_LIB_DIR ${TRT_DIRECTORY}/lib) + endif() + endif() + + add_definitions(-DENABLE_TRT_BACKEND) + include_directories(${TRT_INC_DIR}) + include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tensorrt/common) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS}) + find_library(TRT_INFER_LIB nvinfer ${TRT_LIB_DIR} NO_DEFAULT_PATH) + find_library(TRT_ONNX_LIB nvonnxparser ${TRT_LIB_DIR} NO_DEFAULT_PATH) + find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_LIB_DIR} NO_DEFAULT_PATH) + list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB}) + + if(NOT BUILD_ON_JETSON AND TRT_DIRECTORY) + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt") + endif() + if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + endif() + + if (NOT Python_EXECUTABLE) + find_package(Python COMPONENTS Interpreter Development REQUIRED) + endif() + + message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...") + execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib) + file(GLOB_RECURSE TRT_STATIC_LIBS ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib/*.a) + if(TRT_STATIC_LIBS) + file(REMOVE ${TRT_STATIC_LIBS}) + endif() + if(UNIX AND (NOT APPLE)) + execute_process(COMMAND sh -c "ls *.so*" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib + COMMAND sh -c "xargs ${PATCHELF_EXE} --force-rpath --set-rpath '$ORIGIN'" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib + RESULT_VARIABLE result + OUTPUT_VARIABLE curr_out + ERROR_VARIABLE curr_out) + if(ret EQUAL "1") + message(FATAL_ERROR "Failed to patchelf tensorrt libraries.") + endif() + message(STATUS "result:${result} out:${curr_out}") + endif() + endif() +endif() + +if(ENABLE_VISION) + add_definitions(-DENABLE_VISION) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS}) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PIPELINE_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/opencv.cmake) + + if(ENABLE_FLYCV) + add_definitions(-DENABLE_FLYCV) + include(${PROJECT_SOURCE_DIR}/cmake/flycv.cmake) + list(APPEND DEPEND_LIBS ${FLYCV_LIBRARIES}) + endif() + + if(ENABLE_CVCUDA) + include(${PROJECT_SOURCE_DIR}/cmake/cvcuda.cmake) + add_definitions(-DENABLE_CVCUDA) + list(APPEND DEPEND_LIBS nvcv_types cvcuda) + endif() +endif() + +download_yaml_cpp() +add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp) +list(APPEND DEPEND_LIBS yaml-cpp) +include_directories(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include) + +if(ENABLE_TEXT) + add_definitions(-DENABLE_TEXT) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/fast_tokenizer.cmake) +endif() + +if(ENABLE_PADDLE2ONNX) + add_definitions(-DENABLE_PADDLE2ONNX) + if(BUILD_PADDLE2ONNX) + download_protobuf() + download_onnx() + download_optimizer() + include(${PROJECT_SOURCE_DIR}/cmake/build_paddle2onnx.cmake) + list(APPEND ALL_DEPLOY_SRCS ${PADDLE2ONNX_ALL_SRCS}) + list(APPEND DEPEND_LIBS p2o_paddle_proto onnx) + else() + include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake) + list(APPEND DEPEND_LIBS external_paddle2onnx) + endif() +endif(ENABLE_PADDLE2ONNX) + +if(WITH_CAPI) + include(${PROJECT_SOURCE_DIR}/c_api/CMakeLists.txt) + if(MSVC) + add_definitions(-DFD_CAPI) + endif() +endif() + +if(WITH_CSHARPAPI) + if(MSVC) + add_subdirectory(${PROJECT_SOURCE_DIR}/csharp) + endif() +endif() + +configure_file(${PROJECT_SOURCE_DIR}/UltraInfer.cmake.in ${PROJECT_SOURCE_DIR}/UltraInfer.cmake @ONLY) +configure_file(${PROJECT_SOURCE_DIR}/UltraInferCSharp.cmake.in ${PROJECT_SOURCE_DIR}/UltraInferCSharp.cmake @ONLY) +if(BUILD_FD_TRITON_BACKEND) + configure_file(${PROJECT_SOURCE_DIR}/python/ultrainfer/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/ultrainfer/c_lib_wrap.py) +else() + configure_file(${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/c_lib_wrap.py) +endif() +configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py) + +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PYBIND_SRCS}) + +add_library(${LIBRARY_NAME} SHARED ${ALL_DEPLOY_SRCS}) + +redefine_file_macro(${LIBRARY_NAME}) + +file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" ULTRAINFER_VERSION) +string(STRIP "${ULTRAINFER_VERSION}" ULTRAINFER_VERSION) +if (APPLE) + set_target_properties(${LIBRARY_NAME} PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") +elseif(MSVC) +else() + if(WITH_GPU) + set_target_properties(${LIBRARY_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + set_target_properties(${LIBRARY_NAME} PROPERTIES INTERFACE_COMPILE_OPTIONS + "$<$>:-fvisibility=hidden>$<$>:-Xcompiler=-fvisibility=hidden>") + else() + set_target_properties(${LIBRARY_NAME} PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") + endif() + set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL") + set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS_RELEASE -s) +endif() + +set_target_properties(${LIBRARY_NAME} PROPERTIES VERSION ${ULTRAINFER_VERSION}) +if(MSVC) + # disable warnings for dll export + target_compile_options(${LIBRARY_NAME} PRIVATE "$<$>:/wd4251>$<$>:-Xcompiler=/wd4251>") + file(GLOB FD_FILES_REQUIRE_BIGOBJ ${CSRCS_DIR_NAME}/ultrainfer/function/reduce.cc) + set_source_files_properties(${FD_FILES_REQUIRE_BIGOBJ} PROPERTIES COMPILE_FLAGS "/bigobj") +endif() + +target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS}) + +##################################### Examples #################################### +if(WIN32) + if("${CMAKE_GENERATOR}" STREQUAL "Ninja") + add_custom_target(copy_yaml_library ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/third_party/yaml-cpp ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/yaml-cpp/lib DEPENDS ${LIBRARY_NAME}) + else() + add_custom_target(copy_yaml_library ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/third_party/yaml-cpp/Release ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/yaml-cpp/lib DEPENDS ${LIBRARY_NAME}) + add_custom_target(copy_yaml_include ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/yaml-cpp/include DEPENDS ${LIBRARY_NAME}) + endif() +endif() + +# add examples after prepare include paths for third-parties +if(BUILD_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) + add_definitions(-DBUILD_EXAMPLES) + if(NOT EXECUTABLE_OUTPUT_PATH STREQUAL ${CMAKE_CURRENT_BINARY_DIR}/bin) + set(EXECUTABLE_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/bin) + endif() + include(${PROJECT_SOURCE_DIR}/cmake/gflags.cmake) + add_subdirectory(examples) +endif() + +if (WITH_TESTING AND EXISTS ${PROJECT_SOURCE_DIR}/tests) + add_definitions(-DWITH_TESTING) + include(${PROJECT_SOURCE_DIR}/cmake/gtest.cmake) + if(NOT BUILD_EXAMPLES) + include(${PROJECT_SOURCE_DIR}/cmake/gflags.cmake) + endif() + include(${PROJECT_SOURCE_DIR}/cmake/glog.cmake) + add_subdirectory(tests) +endif() + +include(${PROJECT_SOURCE_DIR}/cmake/summary.cmake) +ultrainfer_summary() + +################################ Installation: UltraInfer C++ SDK ############################### +if(WIN32) + install( + TARGETS ${LIBRARY_NAME} + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + RUNTIME DESTINATION lib + ) +else() + install( + TARGETS ${LIBRARY_NAME} + LIBRARY DESTINATION lib) +endif() + +install( + DIRECTORY ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer + DESTINATION ${CMAKE_INSTALL_PREFIX}/include + FILES_MATCHING + PATTERN "*.h" + PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/*/*.h" +) + +install( + DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install + DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs +) + +install( + FILES + ${PROJECT_SOURCE_DIR}/LICENSE + ${PROJECT_SOURCE_DIR}/ThirdPartyNotices.txt + ${PROJECT_SOURCE_DIR}/VERSION_NUMBER + ${PROJECT_SOURCE_DIR}/UltraInfer.cmake + ${PROJECT_SOURCE_DIR}/UltraInferCSharp.cmake + ${PROJECT_SOURCE_DIR}/cmake/UltraInferConfig.cmake + ${PROJECT_SOURCE_DIR}/cmake/utils.cmake + ${PROJECT_SOURCE_DIR}/cmake/summary.cmake + DESTINATION ${CMAKE_INSTALL_PREFIX} +) + +install( + FILES ${PROJECT_SOURCE_DIR}/cmake/gflags.cmake + DESTINATION ${CMAKE_INSTALL_PREFIX}/utils +) + +if(NOT WIN32) + install( + FILES ${PROJECT_SOURCE_DIR}/scripts/ultrainfer_init.sh + DESTINATION ${CMAKE_INSTALL_PREFIX} + ) +else() + install( + FILES ${PROJECT_SOURCE_DIR}/scripts/ultrainfer_init.bat + DESTINATION ${CMAKE_INSTALL_PREFIX} + ) +endif() + +if(WITH_ASCEND) + install( + FILES ${PROJECT_SOURCE_DIR}/scripts/ascend_init.sh + DESTINATION ${CMAKE_INSTALL_PREFIX} + ) +endif() + +if(WITH_CAPI) + install( + DIRECTORY ${PROJECT_SOURCE_DIR}/c_api/ultrainfer_capi + DESTINATION ${CMAKE_INSTALL_PREFIX}/include + FILES_MATCHING + PATTERN "*.h" + PATTERN "*/types_internal.h" EXCLUDE + ) +endif() + +include(${PROJECT_SOURCE_DIR}/cmake/config_cpack.cmake) + +if(WIN32 AND BUILD_EXAMPLES) + get_windows_path(_tmp_install_dir ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install) + get_windows_path(_publish_exe_dir ${EXECUTABLE_OUTPUT_PATH}/Release) + list(GET CMAKE_CONFIGURATION_TYPES 0 _CONFIG_TYPE) + if((${CMAKE_BUILD_TYPE} MATCHES "Release") OR (${_CONFIG_TYPE} MATCHES "Release")) + install(TARGETS ${LIBRARY_NAME} RUNTIME DESTINATION ${EXECUTABLE_OUTPUT_PATH}/Release) + add_custom_target( + copy_fd_third_dlls_examples ALL COMMAND + cmd /C ${PROJECT_SOURCE_DIR}/scripts/ultrainfer_init.bat install ${_tmp_install_dir} ${_publish_exe_dir} noconfirm) + add_dependencies(copy_fd_third_dlls_examples ${LIBRARY_NAME} copy_yaml_library) + endif() +endif() + +############################### Building: UltraInfer Python Wheel ############################# +if(BUILD_ULTRAINFER_PYTHON) + add_definitions(-DBUILD_ULTRAINFER_PYTHON) + if("${PY_EXT_SUFFIX}" STREQUAL "") + if(MSVC) + set(PY_EXT_SUFFIX ".pyd") + else() + set(PY_EXT_SUFFIX ".so") + endif() + endif() + + # find_package Python has replaced PythonInterp and PythonLibs since cmake 3.12 + # Use the following command in the future; now this is only compatible with the latest pybind11 + # find_package(Python ${PY_VERSION} COMPONENTS Interpreter Development REQUIRED) + find_package(PythonInterp ${PY_VERSION} REQUIRED) + find_package(PythonLibs ${PY_VERSION}) + if(CMAKE_SYSTEM_NAME STREQUAL "AIX") + set(CMAKE_NO_SYSTEM_FROM_IMPORTED 1) + endif() + + if(NOT ENABLE_VISION) + file(GLOB_RECURSE VISION_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/vision/*_pybind.cc) + file(GLOB_RECURSE PIPELINE_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pipeline/*_pybind.cc) + list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${VISION_PYBIND_SRCS} ${PIPELINE_PYBIND_SRCS}) + endif() + + if (NOT ENABLE_TEXT) + file(GLOB_RECURSE TEXT_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/text/*_pybind.cc) + list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${TEXT_PYBIND_SRCS}) + endif() + + add_library(${PY_LIBRARY_NAME} MODULE ${DEPLOY_PYBIND_SRCS}) + redefine_file_macro(${PY_LIBRARY_NAME}) + set_target_properties(${PY_LIBRARY_NAME} PROPERTIES PREFIX "") + set_target_properties(${PY_LIBRARY_NAME} + PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") + set_target_properties(${PY_LIBRARY_NAME} PROPERTIES SUFFIX ${PY_EXT_SUFFIX}) + set_target_properties(${PY_LIBRARY_NAME} + PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + target_include_directories(${PY_LIBRARY_NAME} PRIVATE + $ + $ + ${PYTHON_INCLUDE_DIR}) + + download_pybind() + target_include_directories(${PY_LIBRARY_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/third_party/pybind11/include) + download_dlpack() + target_include_directories(${PY_LIBRARY_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/third_party/dlpack/include) + + if(APPLE) + set_target_properties(${PY_LIBRARY_NAME} + PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + endif() + + target_link_libraries(${PY_LIBRARY_NAME} PUBLIC ${LIBRARY_NAME}) + + if(MSVC) + target_link_libraries(${PY_LIBRARY_NAME} PRIVATE ${PYTHON_LIBRARIES}) + target_compile_options(${PY_LIBRARY_NAME} + PRIVATE /MP + /wd4244 # 'argument': conversion from 'google:: + # protobuf::uint64' to 'int', possible + # loss of data + /wd4267 # Conversion from 'size_t' to 'int', + # possible loss of data + /wd4996 # The second parameter is ignored. + ${EXTRA_FLAGS}) + target_compile_options(${PY_LIBRARY_NAME} PRIVATE $<$>:/MT> $<$:/MTd>) + endif() + + file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs) + file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs) + + if(WIN32) + add_custom_target(copy_fd_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/Release ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/ DEPENDS ${PY_LIBRARY_NAME}) + elseif(APPLE) + add_custom_target(copy_fd_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/*.so** ${CMAKE_CURRENT_BINARY_DIR}/*.dylib** ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/ DEPENDS ${PY_LIBRARY_NAME}) + else() + add_custom_target(copy_fd_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/*.so* ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/ DEPENDS ${PY_LIBRARY_NAME}) + endif() + add_custom_target(copy_third_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/third_libs DEPENDS ${PY_LIBRARY_NAME}) +endif(BUILD_ULTRAINFER_PYTHON) + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0") + string(STRIP "${CMAKE_CXX_COMPILER_VERSION}" CMAKE_CXX_COMPILER_VERSION) + message(FATAL_ERROR "[ERROR] UltraInfer require g++ version >= 5.4.0, but now your g++ version is ${CMAKE_CXX_COMPILER_VERSION}, this may cause failure! Use -DCMAKE_CXX_COMPILER to define path of your compiler.") + endif() +endif() diff --git a/libs/ultrainfer/LICENSE b/libs/ultrainfer/LICENSE new file mode 100755 index 0000000000..261eeb9e9f --- /dev/null +++ b/libs/ultrainfer/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/libs/ultrainfer/ThirdPartyNotices.txt b/libs/ultrainfer/ThirdPartyNotices.txt new file mode 100755 index 0000000000..35f6dddac8 --- /dev/null +++ b/libs/ultrainfer/ThirdPartyNotices.txt @@ -0,0 +1,1946 @@ +This project depends on some open source projects, list as below + +-------- +1. https://github.com/protocolbuffers/protobuf + +Copyright 2008 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +-------- +2. https://github.com/onnx/onnx + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +3. https://github.com/microsoft/onnxruntime + +MIT License + +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------- +4. https://github.com/pybind/pybind11 + +Copyright (c) 2016 Wenzel Jakob , All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of +external contributions to this project including patches, pull requests, etc. + +-------- +5. https://github.com/onnx/onnx-tensorrt + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +6. https://github.com/opencv/opencv + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +7. https://github.com/jbeder/yaml-cpp + +Copyright (c) 2008-2015 Jesse Beder. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +--------- +8. https://github.com/oneapi-src/oneDNN/ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + ============================================================================ + + Copyright 2016-2021 Intel Corporation + Copyright 2018 YANDEX LLC + Copyright 2019-2021 FUJITSU LIMITED + Copyright 2020 Arm Limited and affiliates + Copyright 2020 Codeplay Software Limited + Copyright 2021 Alanna Tempest + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + This distribution includes third party software ("third party programs"). + This third party software, even if included with the distribution of + the Intel software, may be governed by separate license terms, including + without limitation, third party license terms, other Intel software license + terms, and open source software license terms. These separate license terms + govern your use of the third party programs as set forth in the + "THIRD-PARTY-PROGRAMS" file. + +-------- +9. https://github.com/openvinotoolkit/openvino + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +10. https://gitlab.com/libeigen/eigen + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +11. https://github.com/PaddlePaddle/PaddleNLP + +Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +12. https://github.com/openssl/openssl + + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + +-------- +13. https://github.com/dmlc/dlpack + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/libs/ultrainfer/UltraInfer.cmake.in b/libs/ultrainfer/UltraInfer.cmake.in new file mode 100755 index 0000000000..7c05344958 --- /dev/null +++ b/libs/ultrainfer/UltraInfer.cmake.in @@ -0,0 +1,335 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 3.8) + +# UltraInfer basic infos +set(ULTRAINFER_VERSION @ULTRAINFER_VERSION@) +set(LIBRARY_NAME @LIBRARY_NAME@) + +# If compile with GLIBC_CXX_ABI=0 +set(NEED_ABI0 @NEED_ABI0@) + +# Hardware and Language API +set(WITH_GPU @WITH_GPU@) +set(WITH_IPU @WITH_IPU@) +set(WITH_OPENCL @WITH_OPENCL@) +set(WITH_ASCEND @WITH_ASCEND@) +set(WITH_DIRECTML @WITH_DIRECTML@) +set(WITH_TIMVX @WITH_TIMVX@) +set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@) +set(WITH_CAPI @WITH_CAPI@) +set(WITH_CSHARPAPI @WITH_CSHARPAPI@) +set(WITH_TESTING @WITH_TESTING@) +set(BUILD_ON_JETSON @BUILD_ON_JETSON@) +set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@") + +# Inference backend and UltraInfer Moudle +set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@) +set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@) +set(ENABLE_TVM_BACKEND @ENABLE_TVM_BACKEND@) +set(ENABLE_HORIZON_BACKEND @ENABLE_HORIZON_BACKEND@) +set(ENABLE_SOPHGO_BACKEND @ENABLE_SOPHGO_BACKEND@) +set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@) +set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@) +set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@) +set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@) +set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@) +set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@) +set(BUILD_PADDLE2ONNX @BUILD_PADDLE2ONNX@) + +set(ENABLE_VISION @ENABLE_VISION@) +set(ENABLE_FLYCV @ENABLE_FLYCV@) +set(ENABLE_CVCUDA @ENABLE_CVCUDA@) +set(ENABLE_TEXT @ENABLE_TEXT@) +set(ENABLE_BENCHMARK @ENABLE_BENCHMARK@) + +# Version infos and custom settings for third libs +set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@) +set(POROS_VERSION @POROS_VERSION@) +set(OPENVINO_VERSION @OPENVINO_VERSION@) +set(OPENCV_FILENAME @OPENCV_FILENAME@) +set(OPENVINO_FILENAME @OPENVINO_FILENAME@) +set(PADDLELITE_FILENAME @PADDLELITE_FILENAME@) +set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@") +set(ORT_DIRECTORY "@ORT_DIRECTORY@") +set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@") + +set(ULTRAINFER_LIBS "") +set(ULTRAINFER_INCS "") +list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/include) + +# Note(zhoushunjie): include some useful utils function +include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake) + +# Set C++11 as standard for the whole project +if(NOT MSVC) + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_FLAGS "-Wno-format") + if(NEED_ABI0) + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + else() + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) + endif() +endif(NOT MSVC) + +# Still need omp while using UltraInfer static lib. +# This is due to the use of openmp for Paddle Lite's +# static library. + +find_library(FDLIB ${LIBRARY_NAME} ${CMAKE_CURRENT_LIST_DIR}/lib NO_DEFAULT_PATH) +list(APPEND ULTRAINFER_LIBS ${FDLIB}) + +if(ENABLE_ORT_BACKEND) + if (ORT_DIRECTORY) + set(ORT_LIB_PATH ${ORT_DIRECTORY}/lib) + else() + set(ORT_LIB_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/onnxruntime/lib) + endif() + message(STATUS "The path of ONNXRuntime is ${ORT_LIB_PATH}.") + find_library(ORT_LIB onnxruntime ${ORT_LIB_PATH} NO_DEFAULT_PATH) + list(APPEND ULTRAINFER_LIBS ${ORT_LIB}) +endif() + +if(ENABLE_TVM_BACKEND) + if(APPLE) + set(TVM_RUNTIME_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tvm/lib/libtvm_runtime.dylib) + else() + set(TVM_RUNTIME_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tvm/lib/libtvm_runtime.so) + endif() + list(APPEND ULTRAINFER_LIBS ${TVM_RUNTIME_LIB}) +endif() + +if(ENABLE_PADDLE_BACKEND) + string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ "${PADDLEINFERENCE_VERSION}") + set(PADDLEINFERENCE_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(PADDLEINFERENCE_VERSION_MINOR "${CMAKE_MATCH_2}") + set(PADDLEINFERENCE_VERSION_PATCH "${CMAKE_MATCH_3}") + find_library(PADDLE_LIB paddle_inference ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/paddle/lib NO_DEFAULT_PATH) + if(WIN32) + if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2) + set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/mkldnn.lib") + else() + set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/onednn/lib/dnnl.lib") + endif() + set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5md.lib") + elseif(APPLE) + message(STATUS "No third parties libs(mkldnn and omp) need to link into paddle_inference on MacOS OSX.") + else() + if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2) + set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/libmkldnn.so.0") + else() + set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/onednn/lib/libdnnl.so.3") + endif() + set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.so") + endif() + list(APPEND ULTRAINFER_LIBS ${PADDLE_LIB}) + if(EXISTS "${DNNL_LIB}") + list(APPEND ULTRAINFER_LIBS ${DNNL_LIB} ${IOMP_LIB}) + endif() +endif() + +if(ENABLE_OPENVINO_BACKEND) + if (OPENVINO_DIRECTORY) + set(OPENVINO_DIR ${OPENVINO_DIRECTORY}) + else() + set(OPENVINO_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${OPENVINO_FILENAME}/runtime) + endif() + get_openvino_libs(${OPENVINO_DIR}) + message(STATUS "OPENVINO_LIBS = ${OPENVINO_LIBS}") + list(APPEND ULTRAINFER_LIBS ${OPENVINO_LIBS}) +endif() + +if(ENABLE_RKNPU2_BACKEND) + if(RKNN2_TARGET_SOC STREQUAL "RK356X") + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so) + elseif (RKNN2_TARGET_SOC STREQUAL "RK3588") + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so) + else () + message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588") + endif() + message(STATUS "The path of RKNPU2 is ${RKNPU2_LIB}.") + list(APPEND ULTRAINFER_LIBS ${RKNPU2_LIB}) +endif() + +if(ENABLE_HORIZON_BACKEND) + set(DNN_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/dnn) + set(APPSDK_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/appsdk/appuser/) + + set(DNN_LIB_PATH ${DNN_PATH}/lib) + set(APPSDK_LIB_PATH ${APPSDK_PATH}/lib/hbbpu) + set(BPU_libs dnn cnn_intf hbrt_bernoulli_aarch64) + + link_directories(${DNN_LIB_PATH} + ${APPSDK_PATH}/lib/hbbpu + ${APPSDK_PATH}/lib) + + list(APPEND ULTRAINFER_LIBS ${BPU_libs}) + +endif() +if(ENABLE_LITE_BACKEND) + set(LITE_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${PADDLELITE_FILENAME}) + # Linux/Mac/Win/... + find_library(LITE_LIB paddle_full_api_shared ${LITE_DIR}/lib NO_DEFAULT_PATH) + list(APPEND ULTRAINFER_LIBS ${LITE_LIB}) +endif() + +if(ENABLE_POROS_BACKEND) + find_library(POROS_LIB poros ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/lib NO_DEFAULT_PATH) + find_library(TORCH_LIB torch ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/torch/lib NO_DEFAULT_PATH) + set(TORCH_INCLUDE "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/torch/include") + list(APPEND ULTRAINFER_LIBS ${POROS_LIB} ${TORCH_LIB}) + list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include ${TORCH_INCLUDE}) +endif() + +if(WITH_GPU) + if(NOT CUDA_DIRECTORY) + set(CUDA_DIRECTORY "/usr/local/cuda") + endif() + if(WIN32) + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64) + else() + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) + if(NOT BUILD_ON_JETSON) + find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64) + endif() + endif() + if(NOT CUDA_LIB) + message(FATAL_ERROR "[UltraInfer] Cannot find library cudart in ${CUDA_DIRECTORY}, Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/path/to/cuda") + endif() + list(APPEND ULTRAINFER_LIBS ${CUDA_LIB} ${NVJPEG_LIB}) + list(APPEND ULTRAINFER_INCS ${CUDA_DIRECTORY}/include) + + if(ENABLE_TRT_BACKEND) + if(BUILD_ON_JETSON) + find_library(TRT_INFER_LIB nvinfer /usr/lib/aarch64-linux-gnu/) + find_library(TRT_ONNX_LIB nvonnxparser /usr/lib/aarch64-linux-gnu/) + find_library(TRT_PLUGIN_LIB nvinfer_plugin /usr/lib/aarch64-linux-gnu/) + else() + if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/) + find_library(TRT_INFER_LIB nvinfer ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH) + find_library(TRT_ONNX_LIB nvonnxparser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH) + find_library(TRT_PLUGIN_LIB nvinfer_plugin ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH) + else() + find_library(TRT_INFER_LIB nvinfer /usr/lib/x86_64-linux-gnu/) + find_library(TRT_ONNX_LIB nvonnxparser /usr/lib/x86_64-linux-gnu/) + find_library(TRT_PLUGIN_LIB nvinfer_plugin /usr/lib/x86_64-linux-gnu/) + endif() + endif() + list(APPEND ULTRAINFER_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB}) + endif() +endif() + +if(ENABLE_VISION) + if(OPENCV_DIRECTORY) + set(OpenCV_DIR ${OPENCV_DIRECTORY}) + else() + set(OpenCV_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${OPENCV_FILENAME}) + if(WIN32) + set(OpenCV_DIR ${OpenCV_DIR}/build) + endif() + endif() + message(STATUS "The path of OpenCV is ${OpenCV_DIR}.") + + # Win/Linux/Mac + find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR} NO_DEFAULT_PATH) + list(APPEND ULTRAINFER_INCS ${OpenCV_INCLUDE_DIRS}) + list(APPEND ULTRAINFER_LIBS ${OpenCV_LIBS}) + + if(ENABLE_FLYCV) + include_directories(${CMAKE_CURRENT_LIST_DIR}/third_libs/install/flycv/include) + set(FLYCV_LIB_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/flycv/lib) + + find_library(FLYCV_LIB flycv_shared ${FLYCV_LIB_DIR} NO_DEFAULT_PATH) + list(APPEND ULTRAINFER_LIBS ${FLYCV_LIB}) + endif() + + if(ENABLE_CVCUDA) + find_library(CVCUDA_LIB cvcuda ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH) + find_library(NVCV_TYPES_LIB nvcv_types ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH) + list(APPEND ULTRAINFER_LIBS ${CVCUDA_LIB} ${NVCV_TYPES_LIB}) + list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/include NO_DEFAULT_PATH) + add_definitions(-DENABLE_CVCUDA) + endif() + +endif() + +if (ENABLE_TEXT) + # Add dependency libs later: Linux/Mac/Win/... + find_library(FAST_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib NO_DEFAULT_PATH) + list(APPEND ULTRAINFER_LIBS ${FAST_TOKENIZER_LIB}) + + list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/include) + list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include) +endif() + +if(ENABLE_PADDLE2ONNX) + if(NOT BUILD_PADDLE2ONNX) + find_library(PADDLE2ONNX_LIB paddle2onnx ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib NO_DEFAULT_PATH) + list(APPEND ULTRAINFER_LIBS ${PADDLE2ONNX_LIB}) + endif() +endif() + +if(WITH_KUNLUNXIN) + list(APPEND ULTRAINFER_LIBS -lpthread -lrt -ldl) +endif() + +remove_duplicate_libraries(ULTRAINFER_LIBS) + +include(${CMAKE_CURRENT_LIST_DIR}/summary.cmake) +ultrainfer_summary() +message(STATUS " DEPENDENCY_LIBS : ${ULTRAINFER_LIBS}") + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0") + string(STRIP "${CMAKE_CXX_COMPILER_VERSION}" CMAKE_CXX_COMPILER_VERSION) + message(FATAL_ERROR "[ERROR] UltraInfer require g++ version >= 5.4.0, but now your g++ version is ${CMAKE_CXX_COMPILER_VERSION}, this may cause failure! Use -DCMAKE_CXX_COMPILER to define path of your compiler.") + endif() +endif() + +function(install_ultrainfer_libraries DESTINATION_DIR) + set(DYN_LIB_SUFFIX "*.so*") + if(WIN32) + set(DYN_LIB_SUFFIX "*.dll") + elseif(APPLE) + set(DYN_LIB_SUFFIX "*.dylib*") + endif() + if(UltraInfer_DIR) + set(DYN_SEARCH_DIR ${UltraInfer_DIR}) + elseif(ULTRAINFER_INSTALL_DIR) + set(DYN_SEARCH_DIR ${ULTRAINFER_INSTALL_DIR}) + else() + message(FATAL_ERROR "Please set UltraInfer_DIR/ULTRAINFER_INSTALL_DIR before call install_ultrainfer_libraries.") + endif() + file(GLOB_RECURSE ALL_NEED_DYN_LIBS ${DYN_SEARCH_DIR}/lib/${DYN_LIB_SUFFIX}) + file(GLOB_RECURSE ALL_DEPS_DYN_LIBS ${DYN_SEARCH_DIR}/third_libs/${DYN_LIB_SUFFIX}) + + if(ENABLE_VISION) + # OpenCV + file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_DIR}/${DYN_LIB_SUFFIX}) + list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_OPENCV_DYN_LIBS}) + + if(WIN32) + file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/x64/vc15/bin/${DYN_LIB_SUFFIX}) + file(INSTALL ${OPENCV_DYN_LIBS} DESTINATION ${DESTINATION_DIR}) + else() # linux/mac + file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/lib/${DYN_LIB_SUFFIX}) + file(INSTALL ${OPENCV_DYN_LIBS} DESTINATION ${DESTINATION_DIR}) + endif() + + # FlyCV + if(ENABLE_FLYCV) + file(GLOB_RECURSE ALL_FLYCV_DYN_LIBS ${FLYCV_LIB_DIR}/${DYN_LIB_SUFFIX}) + list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_FLYCV_DYN_LIBS}) + endif() + endif() + + if(ENABLE_OPENVINO_BACKEND) + # need plugins.xml for openvino backend + set(OPENVINO_RUNTIME_BIN_DIR ${OPENVINO_DIR}/bin) + file(GLOB OPENVINO_PLUGIN_XML ${OPENVINO_RUNTIME_BIN_DIR}/*.xml) + file(INSTALL ${OPENVINO_PLUGIN_XML} DESTINATION ${DESTINATION_DIR}) + endif() + + # Install other libraries + file(INSTALL ${ALL_NEED_DYN_LIBS} DESTINATION ${DESTINATION_DIR}) + file(INSTALL ${ALL_DEPS_DYN_LIBS} DESTINATION ${DESTINATION_DIR}) +endfunction() diff --git a/libs/ultrainfer/UltraInferCSharp.cmake.in b/libs/ultrainfer/UltraInferCSharp.cmake.in new file mode 100755 index 0000000000..f247a66fe4 --- /dev/null +++ b/libs/ultrainfer/UltraInferCSharp.cmake.in @@ -0,0 +1,13 @@ +list(APPEND ULTRAINFER_DOTNET_REFERENCES + "Microsoft.CSharp" + "System" + "System.Core" + "System.Data" + "System.Deployment" + "System.Drawing" + "System.Net.Http" + "System.Xml" + "System.Reflection" + "${CMAKE_CURRENT_LIST_DIR}/csharp_lib/ultrainfer_csharp.dll") + +set(ULTRAINFER_PACKAGE_REFERENCES "OpenCvSharp4_4.7.0.20230115;OpenCvSharp4.runtime.win_4.7.0.20230115") diff --git a/libs/ultrainfer/VERSION_NUMBER b/libs/ultrainfer/VERSION_NUMBER new file mode 100755 index 0000000000..77d6f4ca23 --- /dev/null +++ b/libs/ultrainfer/VERSION_NUMBER @@ -0,0 +1 @@ +0.0.0 diff --git a/libs/ultrainfer/cmake/UltraInferConfig.cmake b/libs/ultrainfer/cmake/UltraInferConfig.cmake new file mode 100755 index 0000000000..02b6e0de36 --- /dev/null +++ b/libs/ultrainfer/cmake/UltraInferConfig.cmake @@ -0,0 +1,10 @@ +# This file will define the following variables for find_package method: +# - UltraInfer_LIBS : The list of libraries to link against. +# - UltraInfer_INCLUDE_DIRS : The UltraInfer include directories. +# - UltraInfer_Found : The status of UltraInfer + +include(${CMAKE_CURRENT_LIST_DIR}/UltraInfer.cmake) +# setup UltraInfer cmake variables +set(UltraInfer_LIBS ${ULTRAINFER_LIBS}) +set(UltraInfer_INCLUDE_DIRS ${ULTRAINFER_INCS}) +set(UltraInfer_FOUND TRUE) diff --git a/libs/ultrainfer/cmake/ascend.cmake b/libs/ultrainfer/cmake/ascend.cmake new file mode 100755 index 0000000000..253563d853 --- /dev/null +++ b/libs/ultrainfer/cmake/ascend.cmake @@ -0,0 +1,32 @@ +if(NOT ${ENABLE_LITE_BACKEND}) + set(ENABLE_LITE_BACKEND ON) +endif() + +if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + if (NOT BUILD_ULTRAINFER_PYTHON) + message(STATUS "Build UltraInfer Ascend C++ library on X86 platform.") + if(NOT PADDLELITE_URL) + set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.CPP.inference_lite_lib.ubuntu.x86.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz") + endif() + else () + message(STATUS "Build UltraInfer Ascend Python library on X86 platform.") + if(NOT PADDLELITE_URL) + set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.Python.inference_lite_lib.ubuntu.x86.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz") + endif() + endif() +endif() + + +if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + if (NOT BUILD_ULTRAINFER_PYTHON) + message(STATUS "Build UltraInfer Ascend C++ library on aarch64 platform.") + if(NOT PADDLELITE_URL) + set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.CPP.inference_lite_lib.ubuntu.armv8.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz") + endif() + else () + message(STATUS "Build UltraInfer Ascend Python library on aarch64 platform.") + if(NOT PADDLELITE_URL) + set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.Python.inference_lite_lib.ubuntu.armv8.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz") + endif() + endif() +endif() diff --git a/libs/ultrainfer/cmake/build_paddle2onnx.cmake b/libs/ultrainfer/cmake/build_paddle2onnx.cmake new file mode 100755 index 0000000000..edacfafbc4 --- /dev/null +++ b/libs/ultrainfer/cmake/build_paddle2onnx.cmake @@ -0,0 +1,40 @@ +add_definitions(-DMAX_ONNX_OPSET_VERSION=16) +add_definitions(-DPADDLE2ONNX_LIB) + +# Third dependency: onnx +if(NOT TARGET onnx_proto) + if(NOT ONNX_NAMESPACE) + set(ONNX_NAMESPACE "paddle2onnx") + endif() + add_definitions("-DONNX_NAMESPACE=${ONNX_NAMESPACE}") + + set(MSVC_STATIC_CRT ON) + if(ONNX_CUSTOM_PROTOC_PATH) + if(WIN32) + if(MSVC_STATIC_CRT) + # MT + set(ONNX_USE_MSVC_STATIC_RUNTIME ON) + else() + # MD + set(ONNX_USE_MSVC_STATIC_RUNTIME OFF) + endif() + set(ONNX_CUSTOM_PROTOC_PATH "${ONNX_CUSTOM_PROTOC_PATH};$ENV{PATH}") + else() + set(ONNX_CUSTOM_PROTOC_PATH "${ONNX_CUSTOM_PROTOC_PATH}:$ENV{PATH}") + endif() + set(ENV{PATH} ${ONNX_CUSTOM_PROTOC_PATH}) + endif() + + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/onnx) +endif() + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/third_party/onnx) + +include_directories(${PROJECT_SOURCE_DIR}/third_party/optimizer) +add_subdirectory(${PROJECT_SOURCE_DIR}/paddle2onnx/proto) + +file(GLOB_RECURSE PADDLE2ONNX_ALL_SRCS ${PROJECT_SOURCE_DIR}/paddle2onnx/*.cc ${PROJECT_SOURCE_DIR}/third_party/optimizer/onnxoptimizer/*.cc) +list(REMOVE_ITEM PADDLE2ONNX_ALL_SRCS ${PROJECT_SOURCE_DIR}/paddle2onnx/cpp2py_export.cc ${PROJECT_SOURCE_DIR}/third_party/optimizer/onnxoptimizer/cpp2py_export.cc) diff --git a/libs/ultrainfer/cmake/build_tools.cmake b/libs/ultrainfer/cmake/build_tools.cmake new file mode 100755 index 0000000000..c091f4916e --- /dev/null +++ b/libs/ultrainfer/cmake/build_tools.cmake @@ -0,0 +1,87 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function(download_patchelf) + if(UNIX AND (NOT APPLE)) + set(PATCHELF_EXE "patchelf") + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/patchelf-0.15.0-aarch64.tar.gz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/patchelf-0.15.0-aarch64.tar.gz ${THIRD_PARTY_PATH}/patchelf) + else() + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/patchelf-0.15.0-x86_64.tar.gz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/patchelf-0.15.0-x86_64.tar.gz ${THIRD_PARTY_PATH}/patchelf) + endif() + endif() +endfunction() + +function(download_protobuf) + if(WIN32) + if(NOT CMAKE_CL_64) + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-win-x86-3.16.0.zip) + else() + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-win-x64-3.16.0.zip) + endif() + set(ORIGIN_ENV_PATH "$ENV{PATH}") + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/protobuf-win-3.16.0.tgz ${THIRD_PARTY_PATH}/protobuf) + set(ENV{PATH} "${THIRD_PARTY_PATH}\\protobuf\\bin;${ORIGIN_ENV_PATH}") + elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-osx-arm64-3.16.0.tgz) + else() + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-osx-x86_64-3.16.0.tgz) + endif() + set(ORIGIN_ENV_PATH "$ENV{PATH}") + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/protobuf-osx-3.16.0.tgz ${THIRD_PARTY_PATH}/protobuf) + set(ENV{PATH} "${THIRD_PARTY_PATH}/protobuf/bin/:${ORIGIN_ENV_PATH}") + else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-linux-aarch64-3.16.0.tgz) + else() + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-linux-x64-3.16.0.tgz) + endif() + set(ORIGIN_ENV_PATH "$ENV{PATH}") + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/protobuf-linux-3.16.0.tgz ${THIRD_PARTY_PATH}/protobuf) + set(ENV{PATH} "${THIRD_PARTY_PATH}/protobuf/bin/:${ORIGIN_ENV_PATH}") + endif() +endfunction() + +function(download_eigen) + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/eigen-linux-x86-241210.tgz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/eigen-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/eigen) +endfunction() + +function(download_yaml_cpp) + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/yaml-cpp-linux-x86-241210.tgz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/yaml-cpp-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/yaml-cpp) +endfunction() + +function(download_pybind) + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/pybind11-linux-x86-241210.tgz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/pybind11-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/pybind11) +endfunction() + +function(download_dlpack) + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/dlpack-linux-x86-241210.tgz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/dlpack-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/dlpack) +endfunction() + +function(download_onnx) + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/onnx-linux-x86-241210.tgz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/onnx-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/onnx) +endfunction() + +function(download_optimizer) + set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/optimizer-linux-x86-241210.tgz) + download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/optimizer-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/optimizer) +endfunction() diff --git a/libs/ultrainfer/cmake/check.cmake b/libs/ultrainfer/cmake/check.cmake new file mode 100755 index 0000000000..0bfc4546f7 --- /dev/null +++ b/libs/ultrainfer/cmake/check.cmake @@ -0,0 +1,45 @@ +# Check for 32bit system +if(WIN32) + if(NOT CMAKE_CL_64) + message("***********************Compile on non 64-bit system now**********************") + add_definitions(-DNON_64_PLATFORM) + if(WITH_GPU) + message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.") + endif() + if(ENABLE_PADDLE_BACKEND) + message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.") + endif() + if(ENABLE_POROS_BACKEND) + message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.") + endif() + endif() +endif() + +if(IOS) + if(ENABLE_ORT_BACKEND) + message(FATAL_ERROR "Not support ONNXRuntime backend for IOS now. Please set ENABLE_ORT_BACKEND=OFF.") + endif() + if(ENABLE_PADDLE_BACKEND) + message(FATAL_ERROR "Not support Paddle backend for IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.") + endif() + if(ENABLE_OPENVINO_BACKEND) + message(FATAL_ERROR "Not support OpenVINO backend for IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.") + endif() + if(ENABLE_TRT_BACKEND) + message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.") + endif() +endif() + +if(WITH_GPU) + if(APPLE) + message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.") + elseif(IOS) + message(FATAL_ERROR "Cannot enable GPU while compling in IOS.") + endif() +endif() + +if(WITH_OPENCL) + if(NOT ENABLE_LITE_BACKEND) + message(FATAL_ERROR "Cannot enable OpenCL while compling unless in Paddle Lite backend is enbaled.") + endif() +endif() diff --git a/libs/ultrainfer/cmake/config_cpack.cmake b/libs/ultrainfer/cmake/config_cpack.cmake new file mode 100755 index 0000000000..f0f5e8c8b9 --- /dev/null +++ b/libs/ultrainfer/cmake/config_cpack.cmake @@ -0,0 +1,38 @@ +if(NOT UNIX) + return() +endif() + +set(PACKAGE_SYS_VERSION "linux") +if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(PACKAGE_SYS_VERSION "${PACKAGE_SYS_VERSION}-aarch64") +else() + set(PACKAGE_SYS_VERSION "${PACKAGE_SYS_VERSION}-x64") +endif() +if(WITH_GPU) + set(PACKAGE_SYS_VERSION "${PACKAGE_SYS_VERSION}-gpu") +endif() + +# set(CPACK_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION ON) +set(CPACK_VERBATIM_VARIABLES TRUE) +set(CPACK_GENERATOR DEB RPM) +set(CPACK_THREADS 0) +set(CPACK_PACKAGE_CONTACT "ultrainfer@baidu.com") +set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") +set(CPACK_PACKAGE_VERSION "${ULTRAINFER_VERSION}") +set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PACKAGE_SYS_VERSION}-${ULTRAINFER_VERSION}") +set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") + +set(CPACK_DEBIAN_PACKAGE_CONTROL_STRICT_PERMISSION TRUE) +configure_file(cpack/debian_postinst.in cpack/postinst @ONLY) +configure_file(cpack/debian_prerm.in cpack/prerm @ONLY) +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA + "${CMAKE_CURRENT_BINARY_DIR}/cpack/postinst" + "${CMAKE_CURRENT_BINARY_DIR}/cpack/prerm") + +set(CPACK_RPM_PACKAGE_AUTOREQ FALSE) +configure_file(cpack/rpm_postinst.in cpack/rpm_postinst @ONLY) +configure_file(cpack/rpm_postrm.in cpack/rpm_postrm @ONLY) +set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/cpack/rpm_postinst") +set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/cpack/rpm_postrm") + +include(CPack) diff --git a/libs/ultrainfer/cmake/cuda.cmake b/libs/ultrainfer/cmake/cuda.cmake new file mode 100755 index 0000000000..70811c6122 --- /dev/null +++ b/libs/ultrainfer/cmake/cuda.cmake @@ -0,0 +1,283 @@ +if(NOT WITH_GPU) + return() +endif() + +# This is to eliminate the CMP0104 warnings from cmake 3.18+. +# Instead of setting CUDA_ARCHITECTURES, we will set CMAKE_CUDA_FLAGS. +set(CMAKE_CUDA_ARCHITECTURES OFF) + +if(BUILD_ON_JETSON) + set(fd_known_gpu_archs "53 62 72") + set(fd_known_gpu_archs10 "53 62 72") +else() + message("Using New Release Strategy - All Arches Packge") + set(fd_known_gpu_archs "35 50 52 60 61 70 75 80 86") + set(fd_known_gpu_archs10 "35 50 52 60 61 70 75") + set(fd_known_gpu_archs11 "50 60 61 70 75 80") +endif() + +###################################################################################### +# A function for automatic detection of GPUs installed (if autodetection is enabled) +# Usage: +# detect_installed_gpus(out_variable) +function(detect_installed_gpus out_variable) + if(NOT CUDA_gpu_detect_output) + set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) + + file( + WRITE ${cufile} + "" + "#include \"stdio.h\"\n" + "#include \"cuda.h\"\n" + "#include \"cuda_runtime.h\"\n" + "int main() {\n" + " int count = 0;\n" + " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" + " if (count == 0) return -1;\n" + " for (int device = 0; device < count; ++device) {\n" + " cudaDeviceProp prop;\n" + " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" + " printf(\"%d.%d \", prop.major, prop.minor);\n" + " }\n" + " return 0;\n" + "}\n") + + execute_process( + COMMAND "${CMAKE_CUDA_COMPILER}" "--run" "${cufile}" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" + RESULT_VARIABLE nvcc_res + OUTPUT_VARIABLE nvcc_out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + if(nvcc_res EQUAL 0) + # only keep the last line of nvcc_out + string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}") + string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}") + list(GET nvcc_out -1 nvcc_out) + string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}") + set(CUDA_gpu_detect_output + ${nvcc_out} + CACHE INTERNAL + "Returned GPU architetures from detect_installed_gpus tool" + FORCE) + endif() + endif() + + if(NOT CUDA_gpu_detect_output) + message( + STATUS + "Automatic GPU detection failed. Building for all known architectures.") + set(${out_variable} + ${fd_known_gpu_archs} + PARENT_SCOPE) + else() + set(${out_variable} + ${CUDA_gpu_detect_output} + PARENT_SCOPE) + endif() +endfunction() + +######################################################################## +# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME +# Usage: +# select_nvcc_arch_flags(out_variable) +function(select_nvcc_arch_flags out_variable) + # List of arch names + set(archs_names + "Kepler" + "Maxwell" + "Pascal" + "Volta" + "Turing" + "Ampere" + "All" + "Manual") + set(archs_name_default "All") + list(APPEND archs_names "Auto") + + # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) + set(CUDA_ARCH_NAME + ${archs_name_default} + CACHE STRING "Select target NVIDIA GPU achitecture.") + set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names}) + mark_as_advanced(CUDA_ARCH_NAME) + + # verify CUDA_ARCH_NAME value + if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};") + string(REPLACE ";" ", " archs_names "${archs_names}") + message( + FATAL_ERROR "Only ${archs_names} architectures names are supported.") + endif() + + if(${CUDA_ARCH_NAME} STREQUAL "Manual") + set(CUDA_ARCH_BIN + ${fd_known_gpu_archs} + CACHE + STRING + "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported" + ) + set(CUDA_ARCH_PTX + "" + CACHE + STRING + "Specify 'virtual' PTX architectures to build PTX intermediate code for" + ) + mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) + else() + unset(CUDA_ARCH_BIN CACHE) + unset(CUDA_ARCH_PTX CACHE) + endif() + + if(${CUDA_ARCH_NAME} STREQUAL "Kepler") + set(cuda_arch_bin "30 35") + elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") + if(BUILD_ON_JETSON) + set(cuda_arch_bin "53") + else() + set(cuda_arch_bin "50") + endif() + elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") + if(BUILD_ON_JETSON) + set(cuda_arch_bin "62") + else() + set(cuda_arch_bin "60 61") + endif() + elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") + if(BUILD_ON_JETSON) + set(cuda_arch_bin "72") + else() + set(cuda_arch_bin "70") + endif() + elseif(${CUDA_ARCH_NAME} STREQUAL "Turing") + set(cuda_arch_bin "75") + elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere") + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0 + set(cuda_arch_bin "80") + elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+ + set(cuda_arch_bin "80 86") + endif() + elseif(${CUDA_ARCH_NAME} STREQUAL "All") + set(cuda_arch_bin ${fd_known_gpu_archs}) + elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") + message( + STATUS + "WARNING: This is just a warning for publishing release. + You are building GPU version without supporting different architectures. + So the wheel package may fail on other GPU architectures. + You can add -DCUDA_ARCH_NAME=All in cmake command + to get a full wheel package to resolve this warning. + While, this version will still work on local GPU architecture.") + detect_installed_gpus(cuda_arch_bin) + else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") + set(cuda_arch_bin ${CUDA_ARCH_BIN}) + endif() + + if(NEW_RELEASE_JIT) + set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}") + set(cuda_arch_bin "") + endif() + + # remove dots and convert to lists + string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") + string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") + + list(REMOVE_DUPLICATES cuda_arch_bin) + list(REMOVE_DUPLICATES cuda_arch_ptx) + + set(nvcc_flags "") + set(nvcc_archs_readable "") + + # Tell NVCC to add binaries for the specified GPUs + foreach(arch ${cuda_arch_bin}) + if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified PTX for the concrete BIN + string(APPEND nvcc_flags + " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}") + string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}") + else() + # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN + string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}") + string(APPEND nvcc_archs_readable " sm_${arch}") + endif() + endforeach() + + # Tell NVCC to add PTX intermediate code for the specified architectures + foreach(arch ${cuda_arch_ptx}) + string(APPEND nvcc_flags + " -gencode arch=compute_${arch},code=compute_${arch}") + string(APPEND nvcc_archs_readable " compute_${arch}") + endforeach() + + string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") + set(${out_variable} + ${nvcc_flags} + PARENT_SCOPE) + set(${out_variable}_readable + ${nvcc_archs_readable} + PARENT_SCOPE) +endfunction() + +message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION}) +if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x + set(fd_known_gpu_archs ${fd_known_gpu_archs10}) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") +elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1 + set(fd_known_gpu_archs ${fd_known_gpu_archs11}) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") +elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+ + set(fd_known_gpu_archs "${fd_known_gpu_archs11} 86") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") +endif() + +# setting nvcc arch flags +select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}") +message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}") + +# Set C++14 support +set(CUDA_PROPAGATE_HOST_FLAGS OFF) +# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. +# So, don't set these flags here. +if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 11) +else() + message(WARNING "Detected custom CMAKE_CUDA_STANDARD is using: ${CMAKE_CUDA_STANDARD}") +endif() + +# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w +# So replace /W[1-4] with /W0 +if(WIN32) + string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}") +endif() +# in cuda9, suppress cuda warning on eigen +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w") +# Set :expt-relaxed-constexpr to suppress Eigen warnings +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") +# Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") + +if(WIN32) + set(CMAKE_CUDA_FLAGS + "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj") + if(MSVC_STATIC_CRT) + foreach(flag_var + CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE + CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "-MD") + string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}") + endif() + endforeach() + endif() +endif() + +mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) +mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) diff --git a/libs/ultrainfer/cmake/cvcuda.cmake b/libs/ultrainfer/cmake/cvcuda.cmake new file mode 100755 index 0000000000..5d65ba538f --- /dev/null +++ b/libs/ultrainfer/cmake/cvcuda.cmake @@ -0,0 +1,41 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(NOT WITH_GPU) + message(FATAL_ERROR "ENABLE_CVCUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.") +endif() + +if(APPLE OR IOS OR WIN32) + message(FATAL_ERROR "Cannot enable CV-CUDA in mac/ios/windows os, please set -DENABLE_CVCUDA=OFF.") +endif() + +if(NOT (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")) + message(FATAL_ERROR "CV-CUDA only support x86_64.") +endif() + +set(CVCUDA_LIB_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.1-alpha/nvcv-lib-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) +set(CVCUDA_LIB_FILENAME nvcv-lib-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) +set(CVCUDA_DEV_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.1-alpha/nvcv-dev-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) +set(CVCUDA_DEV_FILENAME nvcv-dev-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) + +download_and_decompress(${CVCUDA_LIB_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_LIB_FILENAME} ${THIRD_PARTY_PATH}/cvcuda) +download_and_decompress(${CVCUDA_DEV_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_DEV_FILENAME} ${THIRD_PARTY_PATH}/cvcuda) + +execute_process(COMMAND rm -rf ${THIRD_PARTY_PATH}/install/cvcuda) +execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install/cvcuda) +execute_process(COMMAND cp -r ${THIRD_PARTY_PATH}/cvcuda/opt/nvidia/cvcuda0/lib/x86_64-linux-gnu/ ${THIRD_PARTY_PATH}/install/cvcuda/lib) +execute_process(COMMAND cp -r ${THIRD_PARTY_PATH}/cvcuda/opt/nvidia/cvcuda0/include/ ${THIRD_PARTY_PATH}/install/cvcuda/include) + +link_directories(${THIRD_PARTY_PATH}/install/cvcuda/lib) +include_directories(${THIRD_PARTY_PATH}/install/cvcuda/include) diff --git a/libs/ultrainfer/cmake/faiss.cmake b/libs/ultrainfer/cmake/faiss.cmake new file mode 100755 index 0000000000..654a908e8e --- /dev/null +++ b/libs/ultrainfer/cmake/faiss.cmake @@ -0,0 +1,122 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +set(FAISS_PROJECT external_faiss_download) +set(FAISS_FILENAME faiss) +set(FAISS_PREFIX_DIR ${THIRD_LIBS_PATH}/${FAISS_FILENAME}) +set(FAISS_SOURCE_DIR ${THIRD_LIBS_PATH}/${FAISS_FILENAME}/src/${FAISS_PROJECT}) +set(FAISS_INSTALL_DIR ${THIRD_LIBS_PATH}/install/${FAISS_FILENAME}) +set(FAISS_INC_DIR ${FAISS_INSTALL_DIR}/include CACHE PATH "faiss include directory." FORCE) +set(FAISS_LIB_DIR ${FAISS_INSTALL_DIR}/lib CACHE PATH "faiss lib directory." FORCE) + +if(NOT WITH_FAISS_STATIC) + message(FATAL_ERROR "Not support WITH_FAISS_STATIC=OFF now!") +endif() + +set(FAISS_URL_PREFIX "https://bj.bcebos.com/fastdeploy/test") + +set(FAISS_VERSION 1.7.3) +# URL +if(NOT FAISS_URL) + if(WIN32) + set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-win-x64-${FAISS_VERSION}.zip") + elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-osx-arm64-${FAISS_VERSION}.tgz") + else() + set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-osx-x64-${FAISS_VERSION}.tgz") + endif() + else() # Linux + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + message(FATAL_ERROR "Not support for Linux aarch64 now!") + else() + if(WITH_FAISS_GPU) + set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-linux-x64-gpu-${FAISS_VERSION}.tgz") + else() + set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-linux-x64-${FAISS_VERSION}.tgz") + endif() + endif() + endif() +endif() + +# FAISS Headers +include_directories(${FAISS_INC_DIR}) + +# FAISS Libs paths +if(WIN32) + set(FAISS_LIB "${FAISS_LIB_DIR}/faiss.lib") +elseif(APPLE) + set(FAISS_LIB "${FAISS_LIB_DIR}/libfaiss.a") +else() # Linux + set(FAISS_LIB "${FAISS_LIB_DIR}/libfaiss.a") +endif() + +# Download FAISS +ExternalProject_Add( + ${FAISS_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${FAISS_URL} + PREFIX ${FAISS_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${FAISS_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${FAISS_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${FAISS_SOURCE_DIR}/lib/ ${FAISS_INSTALL_DIR}/lib && + ${CMAKE_COMMAND} -E copy_directory ${FAISS_SOURCE_DIR}/include ${FAISS_INC_DIR} + BUILD_BYPRODUCTS ${FAISS_LIB}) + +set(FAISS_LIBRARIES) + +add_library(external_faiss STATIC IMPORTED GLOBAL) +set_property(TARGET external_faiss PROPERTY IMPORTED_LOCATION ${FAISS_LIB}) +add_dependencies(external_faiss ${FAISS_PROJECT}) + +list(APPEND FAISS_LIBRARIES external_faiss) + +# Add BLAS/LAPACK/OpenBLAS (needed by FAISS) +if(WIN32) + add_library(external_blas STATIC IMPORTED GLOBAL) + set_property(TARGET external_blas PROPERTY IMPORTED_LOCATION ${FAISS_LIB_DIR}/BLAS.lib) + add_dependencies(external_blas ${FAISS_PROJECT}) + list(APPEND FAISS_LIBRARIES external_blas) + + add_library(external_lapack STATIC IMPORTED GLOBAL) + set_property(TARGET external_lapack PROPERTY IMPORTED_LOCATION ${FAISS_LIB_DIR}/LAPACK.lib) + add_dependencies(external_lapack ${FAISS_PROJECT}) + list(APPEND FAISS_LIBRARIES external_lapack) +elseif(APPLE) + find_package(BLAS REQUIRED) + list(APPEND FAISS_LIBRARIES ${BLAS_LIBRARIES}) + + find_package(LAPACK REQUIRED) + list(APPEND FAISS_LIBRARIES ${LAPACK_LIBRARIES}) +else() # Linux + find_package(BLAS REQUIRED) + list(APPEND FAISS_LIBRARIES ${BLAS_LIBRARIES}) + + find_package(LAPACK REQUIRED) + list(APPEND FAISS_LIBRARIES ${LAPACK_LIBRARIES}) +endif() + +# Add OpenMP (REQUIRED), OpenMP must be avaliable. +find_package(OpenMP REQUIRED) +list(APPEND FAISS_LIBRARIES OpenMP::OpenMP_CXX) + +set(FAISS_INCLUDE_DIRS ${FAISS_INC_DIR}) +set(FAISS_LIBS ${FAISS_LIBRARIES}) +set(FAISS_FOUND TRUE) diff --git a/libs/ultrainfer/cmake/fast_tokenizer.cmake b/libs/ultrainfer/cmake/fast_tokenizer.cmake new file mode 100755 index 0000000000..4803a1db02 --- /dev/null +++ b/libs/ultrainfer/cmake/fast_tokenizer.cmake @@ -0,0 +1,106 @@ + + +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +set(FASTTOKENIZER_PROJECT "extern_fast_tokenizer") +set(FASTTOKENIZER_PREFIX_DIR ${THIRD_PARTY_PATH}/fast_tokenizer) +set(FASTTOKENIZER_SOURCE_DIR + ${THIRD_PARTY_PATH}/fast_tokenizer/src/${FASTTOKENIZER_PROJECT}) +set(FASTTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/fast_tokenizer) +set(FASTTOKENIZER_INC_DIR + "${FASTTOKENIZER_INSTALL_DIR}/include" + "${FASTTOKENIZER_INSTALL_DIR}/third_party/include" + CACHE PATH "fast_tokenizer include directory." FORCE) +set(FASTTOKENIZER_LIB_DIR + "${FASTTOKENIZER_INSTALL_DIR}/lib/" + CACHE PATH "fast_tokenizer lib directory." FORCE) + +set(FASTTOKENIZER_THIRD_LIB_DIR + "${FASTTOKENIZER_INSTALL_DIR}/third_party/lib/" + CACHE PATH "fast_tokenizer lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${FASTTOKENIZER_LIB_DIR}") + +include_directories(${FASTTOKENIZER_INC_DIR}) + +# Set lib path +if(WIN32) + set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib" + CACHE FILEPATH "fast_tokenizer compile library." FORCE) + set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib") + set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib") +elseif(APPLE) + set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib" + CACHE FILEPATH "fast_tokenizer compile library." FORCE) +else() + set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so" + CACHE FILEPATH "fast_tokenizer compile library." FORCE) +endif(WIN32) +message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}") + +set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/") +set(FASTTOKENIZER_VERSION "1.0.2") + +# Set download url +if(WIN32) + set(FASTTOKENIZER_FILE "fast_tokenizer-win-x64-${FASTTOKENIZER_VERSION}.zip") + if(NOT CMAKE_CL_64) + set(FASTTOKENIZER_FILE "fast_tokenizer-win-x86-${FASTTOKENIZER_VERSION}.zip") + endif() +elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + set(FASTTOKENIZER_FILE "fast_tokenizer-osx-arm64-${FASTTOKENIZER_VERSION}.tgz") + else() + set(FASTTOKENIZER_FILE "fast_tokenizer-osx-x86_64-${FASTTOKENIZER_VERSION}.tgz") + endif() +else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(FASTTOKENIZER_FILE "fast_tokenizer-linux-aarch64-${FASTTOKENIZER_VERSION}.tgz") + else() + set(FASTTOKENIZER_FILE "fast_tokenizer-linux-x64-${FASTTOKENIZER_VERSION}.tgz") + endif() +endif() +set(FASTTOKENIZER_URL "${FASTTOKENIZER_URL_BASE}${FASTTOKENIZER_FILE}") + +ExternalProject_Add( + ${FASTTOKENIZER_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${FASTTOKENIZER_URL} + PREFIX ${FASTTOKENIZER_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR} + BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB}) + +add_library(fast_tokenizer STATIC IMPORTED GLOBAL) +set_property(TARGET fast_tokenizer PROPERTY IMPORTED_LOCATION ${FASTTOKENIZER_COMPILE_LIB}) +add_dependencies(fast_tokenizer ${FASTTOKENIZER_PROJECT}) +list(APPEND DEPEND_LIBS fast_tokenizer) + +if (WIN32) + add_library(icudt STATIC IMPORTED GLOBAL) + set_property(TARGET icudt PROPERTY IMPORTED_LOCATION ${ICUDT_LIB}) + add_dependencies(icudt ${FASTTOKENIZER_PROJECT}) + list(APPEND DEPEND_LIBS icudt) + + add_library(icuuc STATIC IMPORTED GLOBAL) + set_property(TARGET icuuc PROPERTY IMPORTED_LOCATION ${ICUUC_LIB}) + add_dependencies(icuuc ${FASTTOKENIZER_PROJECT}) + list(APPEND DEPEND_LIBS icuuc) +endif() diff --git a/libs/ultrainfer/cmake/flycv.cmake b/libs/ultrainfer/cmake/flycv.cmake new file mode 100755 index 0000000000..9bce185368 --- /dev/null +++ b/libs/ultrainfer/cmake/flycv.cmake @@ -0,0 +1,97 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +set(FLYCV_PROJECT "extern_flycv") +set(FLYCV_PREFIX_DIR ${THIRD_PARTY_PATH}/flycv) +set(FLYCV_SOURCE_DIR + ${THIRD_PARTY_PATH}/flycv/src/${FLYCV_PROJECT}) +set(FLYCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/flycv) +set(FLYCV_INC_DIR + "${FLYCV_INSTALL_DIR}/include" + CACHE PATH "flycv include directory." FORCE) +set(FLYCV_LIB_DIR + "${FLYCV_INSTALL_DIR}/lib/" + CACHE PATH "flycv lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${FLYCV_LIB_DIR}") + +include_directories(${FLYCV_INC_DIR}) + +if(WIN32) + set(FLYCV_COMPILE_LIB + "${FLYCV_INSTALL_DIR}/lib/flycv.lib" + CACHE FILEPATH "flycv compile library." FORCE) +elseif(APPLE) + set(FLYCV_COMPILE_LIB + "${FLYCV_INSTALL_DIR}/lib/libflycv.dylib" + CACHE FILEPATH "flycv compile library." FORCE) +else() + set(FLYCV_COMPILE_LIB + "${FLYCV_INSTALL_DIR}/lib/libflycv_shared.so" + CACHE FILEPATH "flycv compile library." FORCE) +endif() + +set(FLYCV_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") +set(FLYCV_VERSION "1.0.0") + +if(WIN32) + message(FATAL_ERROR "FlyCV is not supported on Windows now.") + set(FLYCV_FILE "flycv-win-x64-${FLYCV_VERSION}.zip") +elseif(APPLE) + message(FATAL_ERROR "FlyCV is not supported on Mac OSX now.") + if(CURRENT_OSX_ARCH MATCHES "arm64") + set(FLYCV_FILE "flycv-osx-arm64-${FLYCV_VERSION}.tgz") + else() + set(FLYCV_FILE "flycv-osx-x86_64-${FLYCV_VERSION}.tgz") + endif() +else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(FLYCV_FILE "flycv-linux-aarch64-${FLYCV_VERSION}.tgz") + else() + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + set(FLYCV_FILE "flycv-linux-aarch64-${FLYCV_VERSION}.tgz") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + set(FLYCV_FILE "flycv-linux-armhf-${FLYCV_VERSION}.tgz") + else() + # set(FLYCV_FILE "flycv-linux-x64-${FLYCV_VERSION}.tgz") + set(FLYCV_FILE "flycv-linux-x64-1.1.0-dev.tgz") + endif() + endif() +endif() +set(FLYCV_URL "${FLYCV_URL_BASE}${FLYCV_FILE}") + +ExternalProject_Add( + ${FLYCV_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${FLYCV_URL} + PREFIX ${FLYCV_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${FLYCV_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${FLYCV_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${FLYCV_SOURCE_DIR}/lib/ + ${FLYCV_LIB_DIR} && ${CMAKE_COMMAND} -E copy_directory + ${FLYCV_SOURCE_DIR}/include ${FLYCV_INC_DIR} + BUILD_BYPRODUCTS ${FLYCV_COMPILE_LIB}) + +add_library(external_flycv STATIC IMPORTED GLOBAL) +set_property(TARGET external_flycv PROPERTY IMPORTED_LOCATION + ${FLYCV_COMPILE_LIB}) +add_dependencies(external_flycv ${FLYCV_PROJECT}) + +set(FLYCV_LIBRARIES external_flycv) diff --git a/libs/ultrainfer/cmake/gflags.cmake b/libs/ultrainfer/cmake/gflags.cmake new file mode 100755 index 0000000000..dc2cac4dce --- /dev/null +++ b/libs/ultrainfer/cmake/gflags.cmake @@ -0,0 +1,89 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) + +if(THIRD_PARTY_PATH) + SET(GFLAGS_PREFIX_DIR ${THIRD_PARTY_PATH}/gflags) + SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) +else() + # For example cmake + SET(GFLAGS_PREFIX_DIR ${ULTRAINFER_INSTALL_DIR}/installed_ultrainfer/cmake) + SET(GFLAGS_INSTALL_DIR ${ULTRAINFER_INSTALL_DIR}/installed_ultrainfer/cmake/gflags) +endif() +SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) +set(GFLAGS_SOURCE_FILE ${GFLAGS_PREFIX_DIR}/src/gflags.tgz CACHE PATH "gflags source file." FORCE) + +set(GFLAGS_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs") +set(GFLAGS_URL ${GFLAGS_URL_PREFIX}/gflags.tgz) +set(GFLAGS_CACHE_FILE ${CMAKE_CURRENT_LIST_DIR}/gflags.tgz) +if(EXISTS ${GFLAGS_CACHE_FILE}) + set(GFLAGS_URL ${GFLAGS_CACHE_FILE} CACHE PATH "gflags cache file." FORCE) + set(GFLAGS_SOURCE_FILE ${GFLAGS_CACHE_FILE} CACHE PATH "gflags source file." FORCE) +endif() + +IF(WIN32) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) +ELSE(WIN32) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) + set(BUILD_COMMAND $(MAKE) --silent) + set(INSTALL_COMMAND $(MAKE) install) +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) + +ExternalProject_Add( + extern_gflags + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${GFLAGS_URL} + PREFIX ${GFLAGS_PREFIX_DIR} + UPDATE_COMMAND "" + BUILD_COMMAND ${BUILD_COMMAND} + INSTALL_COMMAND ${INSTALL_COMMAND} + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DBUILD_STATIC_LIBS=ON + -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES} +) +ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) +ADD_DEPENDENCIES(gflags extern_gflags) + +if(UNIX AND (NOT APPLE)) + list(APPEND GFLAGS_LIBRARIES pthread) +endif() + +# On Windows (including MinGW), the Shlwapi library is used by gflags if available. +if (WIN32) + include(CheckIncludeFileCXX) + check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI) + if (HAVE_SHLWAPI) + set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib) + list(APPEND GFLAGS_LIBRARIES shlwapi.lib) + endif(HAVE_SHLWAPI) +endif (WIN32) diff --git a/libs/ultrainfer/cmake/glog.cmake b/libs/ultrainfer/cmake/glog.cmake new file mode 100755 index 0000000000..2a55b7a37b --- /dev/null +++ b/libs/ultrainfer/cmake/glog.cmake @@ -0,0 +1,68 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) + +SET(GLOG_PREFIX_DIR ${THIRD_PARTY_PATH}/glog) +SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog) +SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE) +SET(GLOG_REPOSITORY ${GIT_URL}/google/glog.git) +SET(GLOG_TAG v0.4.0) + +IF(WIN32) + SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/glog.lib" CACHE FILEPATH "glog library." FORCE) + SET(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4530") + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") +ELSE(WIN32) + SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE) + SET(GLOG_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) + +ExternalProject_Add( + extern_glog + ${EXTERNAL_PROJECT_LOG_ARGS} + ${SHALLOW_CLONE} + GIT_REPOSITORY ${GLOG_REPOSITORY} + GIT_TAG ${GLOG_TAG} + DEPENDS gflags + PREFIX ${GLOG_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DWITH_GFLAGS=OFF + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GLOG_LIBRARIES} +) + +ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) +ADD_DEPENDENCIES(glog extern_glog gflags) +LINK_LIBRARIES(glog) diff --git a/libs/ultrainfer/cmake/gtest.cmake b/libs/ultrainfer/cmake/gtest.cmake new file mode 100755 index 0000000000..4294850716 --- /dev/null +++ b/libs/ultrainfer/cmake/gtest.cmake @@ -0,0 +1,84 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(WITH_TESTING) + +INCLUDE(GNUInstallDirs) +INCLUDE(ExternalProject) + +SET(GTEST_PREFIX_DIR ${THIRD_PARTY_PATH}/gtest) +SET(GTEST_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gtest) +SET(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE) +set(GTEST_REPOSITORY ${GIT_URL}/google/googletest.git) +set(GTEST_TAG release-1.8.1) + +INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR}) + +IF(WIN32) + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE) + string(REPLACE "/w " "" GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REPLACE "/w " "" GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "/W0 " "" GTEST_CMAKE_C_FLAGS "${GTEST_CMAKE_C_FLAGS}") + string(REPLACE "/W0 " "" GTEST_CMAKE_CXX_FLAGS "${GTEST_CMAKE_CXX_FLAGS}") +ELSE(WIN32) + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) + set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +ENDIF(WIN32) + +ExternalProject_Add( + extern_gtest + ${EXTERNAL_PROJECT_LOG_ARGS} + ${SHALLOW_CLONE} + GIT_REPOSITORY ${GTEST_REPOSITORY} + GIT_TAG ${GTEST_TAG} + PREFIX ${GTEST_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${GTEST_CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_GMOCK=ON + -Dgtest_disable_pthreads=ON + -Dgtest_force_shared_crt=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GTEST_LIBRARIES} + BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES} +) + +ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) +ADD_DEPENDENCIES(gtest extern_gtest) + +ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) +ADD_DEPENDENCIES(gtest_main extern_gtest) + +ENDIF() diff --git a/libs/ultrainfer/cmake/horizon.cmake b/libs/ultrainfer/cmake/horizon.cmake new file mode 100755 index 0000000000..51070a75c6 --- /dev/null +++ b/libs/ultrainfer/cmake/horizon.cmake @@ -0,0 +1,24 @@ +# get Horizon_URL +set(HORIZON_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") + +set(HORIZON_VERSION "2.5.2") +set(HORIZON_FILE "horizon_runtime-xj3-aarch64-${HORIZON_VERSION}.tgz") +set(HORIZON_URL "${HORIZON_URL_BASE}${HORIZON_FILE}") + +# download_and_decompress +download_and_decompress(${HORIZON_URL} ${CMAKE_CURRENT_BINARY_DIR}/${HORIZON_FILE} ${THIRD_PARTY_PATH}/install) +# set path +set(HORIZON_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/) + +set(DNN_PATH ${HORIZON_RUNTIME_PATH}/dnn/) +set(APPSDK_PATH ${HORIZON_RUNTIME_PATH}/appsdk/appuser/) + +set(DNN_LIB_PATH ${DNN_PATH}/lib) +set(APPSDK_LIB_PATH ${APPSDK_PATH}/lib/hbbpu) +set(BPU_libs dnn cnn_intf hbrt_bernoulli_aarch64) + +include_directories(${DNN_PATH}/include + ${APPSDK_PATH}/include) +link_directories(${DNN_LIB_PATH} + ${APPSDK_PATH}/lib/hbbpu + ${APPSDK_PATH}/lib) diff --git a/libs/ultrainfer/cmake/kunlunxin.cmake b/libs/ultrainfer/cmake/kunlunxin.cmake new file mode 100755 index 0000000000..3194c76212 --- /dev/null +++ b/libs/ultrainfer/cmake/kunlunxin.cmake @@ -0,0 +1,26 @@ +if(NOT ENABLE_PADDLE_BACKEND) + if(NOT ENABLE_LITE_BACKEND) + message(WARNING "Will force to set ENABLE_LITE_BACKEND=ON if ENABLE_PADDLE_BACKEND=OFF when build with KunlunXin.") + set(ENABLE_LITE_BACKEND ON) + endif() +else() + if(ENABLE_LITE_BACKEND) + message(WARNING "Will force to set ENABLE_LITE_BACKEND=OFF if ENABLE_PADDLE_BACKEND=ON when build with KunlunXin.") + set(ENABLE_LITE_BACKEND OFF) + endif() +endif() + +option(WITH_LITE_XPU_LOG "" ON) +if(NOT ENABLE_PADDLE_BACKEND) + if(NOT PADDLELITE_URL) + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-aarch64-xpu-v213.tgz") + else () + if (WITH_LITE_XPU_LOG) + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230410.tgz") + else() + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-without-log-20230303.tgz") + endif() + endif() + endif() +endif() diff --git a/libs/ultrainfer/cmake/onnxruntime.cmake b/libs/ultrainfer/cmake/onnxruntime.cmake new file mode 100755 index 0000000000..8672c8c7ef --- /dev/null +++ b/libs/ultrainfer/cmake/onnxruntime.cmake @@ -0,0 +1,129 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(ExternalProject) + +set(ONNXRUNTIME_PROJECT "extern_onnxruntime") +set(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime) +set(ONNXRUNTIME_SOURCE_DIR + ${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT}) +set(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime) + +if (ORT_DIRECTORY) + message(STATUS "Use the onnxruntime lib specified by user. The ONNXRuntime path: ${ORT_DIRECTORY}") + STRING(REGEX REPLACE "\\\\" "/" ORT_DIRECTORY ${ORT_DIRECTORY}) + set(ONNXRUNTIME_INC_DIR + "${ORT_DIRECTORY}/include" + CACHE PATH "onnxruntime include directory." FORCE) + + set(ONNXRUNTIME_LIB_DIR + "${ORT_DIRECTORY}/lib" + CACHE PATH "onnxruntime lib directory." FORCE) +else() + message(STATUS "Use the default onnxruntime lib. The ONNXRuntime path: ${ONNXRUNTIME_INSTALL_DIR}") + set(ONNXRUNTIME_INC_DIR + "${ONNXRUNTIME_INSTALL_DIR}/include" + CACHE PATH "onnxruntime include directory." FORCE) + set(ONNXRUNTIME_LIB_DIR + "${ONNXRUNTIME_INSTALL_DIR}/lib" + CACHE PATH "onnxruntime lib directory." FORCE) +endif() +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}") + +set(ONNXRUNTIME_VERSION "1.12.0") +set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/") + +if(WIN32) + if(WITH_GPU) + set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}.zip") + elseif(WITH_DIRECTML) + set(ONNXRUNTIME_FILENAME "onnxruntime-directml-win-x64.zip") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-${ONNXRUNTIME_VERSION}.zip") + endif() + if(NOT CMAKE_CL_64) + if(WITH_DIRECTML) + set(ONNXRUNTIME_FILENAME "onnxruntime-directml-win-x86.zip") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-win-x86-${ONNXRUNTIME_VERSION}.zip") + endif() + endif() +elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + set(ONNXRUNTIME_FILENAME "onnxruntime-osx-arm64-${ONNXRUNTIME_VERSION}.tgz") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-osx-x86_64-${ONNXRUNTIME_VERSION}.tgz") + endif() +else() + if(WITH_GPU) + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + message("Cannot compile with onnxruntime-gpu while in linux-aarch64 platform, fallback to onnxruntime-cpu") + set(ONNXRUNTIME_FILENAME "onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}.tgz") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}.tgz") + endif() + else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(ONNXRUNTIME_FILENAME "onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}.tgz") + else() + # cross-compling while the host is x64 but the target is aarch64. + if ((CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "arm")) + set(ONNXRUNTIME_FILENAME "onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}.tgz") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz") + endif() + endif() + endif() +endif() +set(ONNXRUNTIME_URL "${ONNXRUNTIME_URL_PREFIX}${ONNXRUNTIME_FILENAME}") + +include_directories(${ONNXRUNTIME_INC_DIR} +)# For ONNXRUNTIME code to include internal headers. + +if(WIN32) + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_LIB_DIR}/onnxruntime.lib" + CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) +elseif(APPLE) + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_LIB_DIR}/libonnxruntime.dylib" + CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) +else() + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_LIB_DIR}/libonnxruntime.so" + CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) +endif() + +if (NOT ORT_DIRECTORY) + ExternalProject_Add( + ${ONNXRUNTIME_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${ONNXRUNTIME_URL} + PREFIX ${ONNXRUNTIME_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${ONNXRUNTIME_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${ONNXRUNTIME_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${ONNXRUNTIME_SOURCE_DIR}/lib/ ${ONNXRUNTIME_INSTALL_DIR}/lib && + ${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include + ${ONNXRUNTIME_INC_DIR} + BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB}) +endif() + +add_library(external_onnxruntime STATIC IMPORTED GLOBAL) +set_property(TARGET external_onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB}) +add_dependencies(external_onnxruntime ${ONNXRUNTIME_PROJECT}) diff --git a/libs/ultrainfer/cmake/opencv.cmake b/libs/ultrainfer/cmake/opencv.cmake new file mode 100755 index 0000000000..948931a69c --- /dev/null +++ b/libs/ultrainfer/cmake/opencv.cmake @@ -0,0 +1,90 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set(COMPRESSED_SUFFIX ".tgz") + +if(WIN32) + if(NOT CMAKE_CL_64) + set(OPENCV_FILENAME "opencv-win-x86-3.4.16") + else() + set(OPENCV_FILENAME "opencv-win-x64-3.4.16") + endif() + set(COMPRESSED_SUFFIX ".zip") +elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + set(OPENCV_FILENAME "opencv-osx-arm64-3.4.16") + else() + set(OPENCV_FILENAME "opencv-osx-x86_64-3.4.16") + endif() +elseif(IOS) + message(FATAL_ERROR "Not support cross compiling for IOS now!") +# Linux +else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14") + endif() +endif() + +if(NOT OPENCV_FILENAME) + set(OPENCV_FILENAME "opencv-linux-x64-3.4.16") +endif() + +set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/) +if(WIN32) + if(NOT CMAKE_CL_64) + set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs") + else() + set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs") + endif() +else() # TODO: use ultrainfer/third_libs instead. + set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs") +endif() +if(NOT OPENCV_URL) + set(OPENCV_URL ${OPENCV_URL_PREFIX}/${OPENCV_FILENAME}${COMPRESSED_SUFFIX}) +endif() + + +if(BUILD_ON_JETSON) + if(EXISTS /usr/lib/aarch64-linux-gnu/cmake/opencv4/) + set(OPENCV_DIRECTORY /usr/lib/aarch64-linux-gnu/cmake/opencv4/) + endif() +endif() + +if(OPENCV_DIRECTORY) + message(STATUS "Use the opencv lib specified by user. The OpenCV path: ${OPENCV_DIRECTORY}") + STRING(REGEX REPLACE "\\\\" "/" OPENCV_DIRECTORY ${OPENCV_DIRECTORY}) + # Win/Linux/Mac + set(OpenCV_DIR ${OPENCV_DIRECTORY}) + find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR}) + include_directories(${OpenCV_INCLUDE_DIRS}) + list(APPEND DEPEND_LIBS ${OpenCV_LIBS}) +else() + message(STATUS "Use the default OpenCV lib from: ${OPENCV_URL}") + # Win/Linux/Mac + download_and_decompress(${OPENCV_URL} ${CMAKE_CURRENT_BINARY_DIR}/${OPENCV_FILENAME}${COMPRESSED_SUFFIX} ${THIRD_PARTY_PATH}/install/) + if(EXISTS ${THIRD_PARTY_PATH}/install/opencv) + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/opencv) + endif() + file(RENAME ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME}/ ${THIRD_PARTY_PATH}/install/opencv) + set(OPENCV_FILENAME opencv) + if(NOT OpenCV_DIR) + set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME}) + endif() + if (WIN32) + set(OpenCV_DIR ${OpenCV_DIR}/build) + endif() + find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR} NO_DEFAULT_PATH) + include_directories(${OpenCV_INCLUDE_DIRS}) + list(APPEND DEPEND_LIBS opencv_core opencv_video opencv_highgui opencv_imgproc opencv_imgcodecs opencv_calib3d opencv_features2d opencv_flann) +endif() diff --git a/libs/ultrainfer/cmake/openvino.cmake b/libs/ultrainfer/cmake/openvino.cmake new file mode 100755 index 0000000000..c97af80bf1 --- /dev/null +++ b/libs/ultrainfer/cmake/openvino.cmake @@ -0,0 +1,112 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(ExternalProject) + +if (OPENVINO_DIRECTORY) + message(STATUS "Use the openvino lib specified by user. The OpenVINO path: ${OPENVINO_DIRECTORY}") + STRING(REGEX REPLACE "\\\\" "/" OPENVINO_DIRECTORY ${OPENVINO_DIRECTORY}) + get_openvino_libs(${OPENVINO_DIRECTORY}/runtime) + include_directories(${OPENVINO_DIRECTORY}/runtime/include ${OPENVINO_DIRECTORY}/runtime/include/ie) + set(OPENVINO_LIB_DIR + "${OPENVINO_DIRECTORY}/runtime/lib/intel64" + CACHE PATH "openvino lib directory." FORCE) + set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENVINO_LIB_DIR}") + +else() + set(OPENVINO_PROJECT "extern_openvino") + + set(OPENVINO_VERSION "2022.2.0.dev20220829") + set(OPENVINO_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs/") + + set(COMPRESSED_SUFFIX ".tgz") + if(WIN32) + set(OPENVINO_FILENAME "w_openvino_toolkit_windows_${OPENVINO_VERSION}") + set(COMPRESSED_SUFFIX ".zip") + if(NOT CMAKE_CL_64) + message(FATAL_ERROR "UltraInfer cannot ENABLE_OPENVINO_BACKEND in win32 now.") + endif() + elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + message("Cannot compile with openvino while in osx arm64 platform right now") + else() + set(OPENVINO_FILENAME "m_openvino_toolkit_osx_${OPENVINO_VERSION}") + endif() + else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + message("Cannot compile with openvino while in linux-aarch64 platform") + else() + set(OPENVINO_VERSION "dev.2023.03.2") + if(NEED_ABI0) + set(OPENVINO_FILENAME "openvino-linux-x64-20230302-abi0") + else() + set(OPENVINO_FILENAME "openvino-linux-x64-20230302") + endif() + endif() + endif() + set(OPENVINO_URL "${OPENVINO_URL_PREFIX}${OPENVINO_FILENAME}${COMPRESSED_SUFFIX}") + + download_and_decompress(${OPENVINO_URL} + ${CMAKE_CURRENT_BINARY_DIR}/${OPENVINO_FILENAME}${COMPRESSED_SUFFIX} + ${THIRD_PARTY_PATH}/install) + + if(EXISTS ${THIRD_PARTY_PATH}/install/openvino) + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/openvino) + endif() + + file(RENAME ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME} ${THIRD_PARTY_PATH}/install/openvino) + set(OPENVINO_FILENAME openvino) + + set(OPENVINO_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/runtime) + set(OPENVINO_INSTALL_INC_DIR + "${OPENVINO_INSTALL_DIR}/include" + "${OPENVINO_INSTALL_DIR}/include/ie" + CACHE PATH "openvino install include directory." FORCE) + + set(OPENVINO_LIB_DIR + "${OPENVINO_INSTALL_DIR}/lib/" + "${OPENVINO_INSTALL_DIR}/3rdparty/tbb/lib/" + CACHE PATH "openvino lib directory." FORCE) + set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENVINO_LIB_DIR}") + + # For OPENVINO code to include internal headers. + include_directories(${OPENVINO_INSTALL_INC_DIR}) + + if(WIN32) + file(GLOB_RECURSE OPENVINO_LIB_FILES ${OPENVINO_INSTALL_DIR}/lib/intel64/Release/*) + file(COPY ${OPENVINO_LIB_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/lib/) + file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/lib/intel64) + + file(GLOB_RECURSE OPENVINO_BIN_FILES ${OPENVINO_INSTALL_DIR}/bin/intel64/Release/*) + file(COPY ${OPENVINO_BIN_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/bin/) + file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/bin/intel64) + elseif(APPLE) + file(GLOB_RECURSE OPENVINO_LIB_FILES ${OPENVINO_INSTALL_DIR}/lib/intel64/Release/*) + file(COPY ${OPENVINO_LIB_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/lib/) + file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/lib/intel64) + else() + file(GLOB_RECURSE OPENVINO_LIB_FILES ${OPENVINO_INSTALL_DIR}/lib/intel64/*) + file(COPY ${OPENVINO_LIB_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/lib/) + file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/lib/intel64) + endif() + + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/docs) + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/install_dependencies) + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/samples) + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/setupvars.sh) + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/tools) + get_openvino_libs(${OPENVINO_INSTALL_DIR}) +endif() +message("OPENVINO_LIBS = ${OPENVINO_LIBS}") +list(APPEND DEPEND_LIBS ${OPENVINO_LIBS}) diff --git a/libs/ultrainfer/cmake/paddle2onnx.cmake b/libs/ultrainfer/cmake/paddle2onnx.cmake new file mode 100755 index 0000000000..3e5d2f4293 --- /dev/null +++ b/libs/ultrainfer/cmake/paddle2onnx.cmake @@ -0,0 +1,90 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +set(PADDLE2ONNX_PROJECT "extern_paddle2onnx") +set(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx) +set(PADDLE2ONNX_SOURCE_DIR + ${THIRD_PARTY_PATH}/paddle2onnx/src/${PADDLE2ONNX_PROJECT}) +set(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx) +set(PADDLE2ONNX_INC_DIR + "${PADDLE2ONNX_INSTALL_DIR}/include" + CACHE PATH "paddle2onnx include directory." FORCE) +set(PADDLE2ONNX_LIB_DIR + "${PADDLE2ONNX_INSTALL_DIR}/lib/" + CACHE PATH "paddle2onnx lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${PADDLE2ONNX_LIB_DIR}") + +include_directories(BEFORE ${PADDLE2ONNX_INC_DIR}) +if(WIN32) + set(PADDLE2ONNX_COMPILE_LIB + "${PADDLE2ONNX_INSTALL_DIR}/lib/paddle2onnx.lib" + CACHE FILEPATH "paddle2onnx compile library." FORCE) +elseif(APPLE) + set(PADDLE2ONNX_COMPILE_LIB + "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.dylib" + CACHE FILEPATH "paddle2onnx compile library." FORCE) +else() + set(PADDLE2ONNX_COMPILE_LIB + "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.so" + CACHE FILEPATH "paddle2onnx compile library." FORCE) +endif(WIN32) + +if (NOT PADDLE2ONNX_URL) + # Use default paddle2onnx url if custom url is not setting + set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") + set(PADDLE2ONNX_VERSION "1.0.8rc") + if(WIN32) + set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip") + if(NOT CMAKE_CL_64) + set(PADDLE2ONNX_FILE "paddle2onnx-win-x86-${PADDLE2ONNX_VERSION}.zip") + endif() + elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz") + else() + set(PADDLE2ONNX_FILE "paddle2onnx-osx-x86_64-${PADDLE2ONNX_VERSION}.tgz") + endif() + else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(PADDLE2ONNX_FILE "paddle2onnx-linux-aarch64-${PADDLE2ONNX_VERSION}.tgz") + else() + set(PADDLE2ONNX_FILE "paddle2onnx-linux-x64-${PADDLE2ONNX_VERSION}.tgz") + endif() + endif() + set(PADDLE2ONNX_URL "${PADDLE2ONNX_URL_BASE}${PADDLE2ONNX_FILE}") +endif() + +ExternalProject_Add( + ${PADDLE2ONNX_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${PADDLE2ONNX_URL} + PREFIX ${PADDLE2ONNX_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${PADDLE2ONNX_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${PADDLE2ONNX_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${PADDLE2ONNX_SOURCE_DIR}/lib/ + ${PADDLE2ONNX_LIB_DIR} && ${CMAKE_COMMAND} -E copy_directory + ${PADDLE2ONNX_SOURCE_DIR}/include ${PADDLE2ONNX_INC_DIR} + BUILD_BYPRODUCTS ${PADDLE2ONNX_COMPILE_LIB}) + +add_library(external_paddle2onnx STATIC IMPORTED GLOBAL) +set_property(TARGET external_paddle2onnx PROPERTY IMPORTED_LOCATION + ${PADDLE2ONNX_COMPILE_LIB}) +add_dependencies(external_paddle2onnx ${PADDLE2ONNX_PROJECT}) diff --git a/libs/ultrainfer/cmake/paddle_inference.cmake b/libs/ultrainfer/cmake/paddle_inference.cmake new file mode 100755 index 0000000000..abe562b50b --- /dev/null +++ b/libs/ultrainfer/cmake/paddle_inference.cmake @@ -0,0 +1,329 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +# The priority strategy for Paddle inference is as follows: +# PADDLEINFERENCE_DIRECTORY > custom PADDLEINFERENCE_URL > default PADDLEINFERENCE_URL. + +if(WITH_GPU AND WITH_IPU) + message(FATAL_ERROR "Cannot build with WITH_GPU=ON and WITH_IPU=ON on the same time.") +endif() + +# Custom options for Paddle Inference backend +option(PADDLEINFERENCE_DIRECTORY "Directory of custom Paddle Inference library" OFF) +option(PADDLEINFERENCE_API_CUSTOM_OP "Whether building with custom paddle ops" OFF) +option(PADDLEINFERENCE_API_COMPAT_2_4_x "Whether using Paddle Inference 2.4.x" OFF) +option(PADDLEINFERENCE_API_COMPAT_2_5_x "Whether using Paddle Inference 2.5.x" OFF) +option(PADDLEINFERENCE_API_COMPAT_2_6_x "Whether using Paddle Inference 2.6.x" OFF) +option(PADDLEINFERENCE_API_COMPAT_DEV "Whether using Paddle Inference latest dev" OFF) +option(PADDLEINFERENCE_API_COMPAT_CUDA_SM_80 "Whether using Paddle Inference with CUDA sm_80(A100)" OFF) + +set(PADDLEINFERENCE_URL "" CACHE STRING "URL of the custom Paddle Inference library") +set(PADDLEINFERENCE_VERSION "" CACHE STRING "Paddle Inference version") + +set(PADDLEINFERENCE_PROJECT "extern_paddle_inference") +set(PADDLEINFERENCE_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle_inference) +set(PADDLEINFERENCE_SOURCE_DIR + ${THIRD_PARTY_PATH}/paddle_inference/src/${PADDLEINFERENCE_PROJECT}) +set(PADDLEINFERENCE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle_inference) + +set(PADDLEINFERENCE_INC_DIR "${PADDLEINFERENCE_INSTALL_DIR}" + CACHE PATH "paddle_inference include directory." FORCE) +set(PADDLEINFERENCE_LIB_DIR + "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/" + CACHE PATH "paddle_inference lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${PADDLEINFERENCE_LIB_DIR}") + +if(PADDLEINFERENCE_DIRECTORY) + set(PADDLEINFERENCE_INC_DIR ${PADDLEINFERENCE_DIRECTORY}) +endif() + +include_directories(${PADDLEINFERENCE_INC_DIR}) + +if(PADDLEINFERENCE_DIRECTORY) + # Use custom Paddle Inference libs. + if(EXISTS "${THIRD_PARTY_PATH}/install/paddle_inference") + file(REMOVE_RECURSE "${THIRD_PARTY_PATH}/install/paddle_inference") + endif() + if(NOT Python_EXECUTABLE) + find_package(Python COMPONENTS Interpreter Development REQUIRED) + endif() + message(STATUS "Copying ${PADDLEINFERENCE_DIRECTORY} to ${THIRD_PARTY_PATH}/install/paddle_inference ...") + if(WIN32) + execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install) + execute_process(COMMAND cp -r ${PADDLEINFERENCE_DIRECTORY} ${THIRD_PARTY_PATH}/install/paddle_inference) + else() + execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install) + execute_process(COMMAND cp -r ${PADDLEINFERENCE_DIRECTORY} ${THIRD_PARTY_PATH}/install/paddle_inference) + execute_process(COMMAND rm -rf ${THIRD_PARTY_PATH}/install/paddle_inference/paddle/lib/*.a) + endif() +else() + + # Custom Paddle Inference URL + if (NOT PADDLEINFERENCE_URL) + + # Use default Paddle Inference libs. + set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") + if(WIN32) + if (WITH_GPU) + set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt8.5.2.2-mkl-2.5.0.281761089e.zip") + set(PADDLEINFERENCE_VERSION "2.5.0.281761089e") + else() + set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-mkl-2.5.0.281761089e.zip") + set(PADDLEINFERENCE_VERSION "2.5.0.281761089e") + endif() + elseif(APPLE) + if(CURRENT_OSX_ARCH MATCHES "arm64") + message(FATAL_ERROR "Paddle Backend doesn't support Mac OSX with Arm64 now.") + set(PADDLEINFERENCE_FILE "paddle_inference-osx-arm64-openblas-0.0.0.660f781b77.tgz") + else() + # TODO(qiuyanjun): Should remove this old paddle inference lib + # set(PADDLEINFERENCE_FILE "paddle_inference-osx-x86_64-2.4-dev3.tgz") + set(PADDLEINFERENCE_FILE "paddle_inference-osx-x86_64-openblas-0.0.0.660f781b77.tgz") + endif() + set(PADDLEINFERENCE_VERSION "0.0.0.660f781b77") + else() + # Linux with x86/aarch64 CPU/Arm CPU/GPU/IPU ... + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + message(FATAL_ERROR "Paddle Backend doesn't support linux aarch64 now.") + else() + # x86_64 + if(WITH_GPU) + if(PADDLEINFERENCE_API_COMPAT_CUDA_SM_80) + set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-trt8.5.2.2-mkl-sm70.sm75.sm80.sm86.nodist-2.5.1.tgz") + set(PADDLEINFERENCE_VERSION "2.5.1") + else() + set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-trt8.5.2.2-mkl-sm61.sm70.sm75.sm86.nodist-2.5.1.tgz") + set(PADDLEINFERENCE_VERSION "2.5.1") + endif() + else() + set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-mkl-2.5.1.tgz") + set(PADDLEINFERENCE_VERSION "2.5.1") + endif() + if(WITH_IPU) + set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-ipu-2.4-dev1.tgz") + # TODO(qiuyanjun): Should use the commit id to tag the version + set(PADDLEINFERENCE_VERSION "2.4-dev1") + endif() + if(WITH_KUNLUNXIN) + set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-xpu-openblas-0.0.0.021fd73536.tgz") + set(PADDLEINFERENCE_VERSION "0.0.0.021fd73536") + endif() + + if(NEED_ABI0) + if(WITH_GPU OR WITH_IPU OR WITH_KUNLUNXIN) + message(WARNING "While NEED_ABI0=ON, only support CPU now, will fallback to CPU.") + endif() + set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-2.4.0-abi0.tgz") + set(PADDLEINFERENCE_VERSION "2.4.0-abi0") + endif() + endif() + endif() + set(PADDLEINFERENCE_URL "${PADDLEINFERENCE_URL_BASE}${PADDLEINFERENCE_FILE}") + + endif(PADDLEINFERENCE_URL) + + ExternalProject_Add( + ${PADDLEINFERENCE_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${PADDLEINFERENCE_URL} + PREFIX ${PADDLEINFERENCE_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy_directory ${PADDLEINFERENCE_SOURCE_DIR} ${PADDLEINFERENCE_INSTALL_DIR} + BUILD_BYPRODUCTS ${PADDLEINFERENCE_COMPILE_LIB}) + +endif(PADDLEINFERENCE_DIRECTORY) + +if (PADDLEINFERENCE_VERSION STREQUAL "") + message(FATAL_ERROR "The Paddle Inference version is unspecified and cannot be determined.") +endif() +string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ "${PADDLEINFERENCE_VERSION}") +set(PADDLEINFERENCE_VERSION_MAJOR "${CMAKE_MATCH_1}") +set(PADDLEINFERENCE_VERSION_MINOR "${CMAKE_MATCH_2}") +set(PADDLEINFERENCE_VERSION_PATCH "${CMAKE_MATCH_3}") +add_definitions("-DPADDLEINFERENCE_VERSION_MAJOR=${PADDLEINFERENCE_VERSION_MAJOR}") +add_definitions("-DPADDLEINFERENCE_VERSION_MINOR=${PADDLEINFERENCE_VERSION_MINOR}") +add_definitions("-DPADDLEINFERENCE_VERSION_PATCH=${PADDLEINFERENCE_VERSION_PATCH}") + +# check libs +if(WIN32) + set(PADDLEINFERENCE_COMPILE_LIB + "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/paddle_inference.lib" + CACHE FILEPATH "paddle_inference compile library." FORCE) + if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2) + set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/mkldnn.lib") + else() + set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onednn/lib/dnnl.lib") + endif() + set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5md.lib") + set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/paddle2onnx.lib") + set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/onnxruntime.lib") +elseif(APPLE) + set(PADDLEINFERENCE_COMPILE_LIB + "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.dylib" + CACHE FILEPATH "paddle_inference compile library." FORCE) + if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2) + set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2") + else() + set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onednn/lib/libdnnl.so.3") + endif() + set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so") + set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.dylib") + set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.dylib") +else() + set(PADDLEINFERENCE_COMPILE_LIB + "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so" + CACHE FILEPATH "paddle_inference compile library." FORCE) + if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2) + set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2") + else() + set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onednn/lib/libdnnl.so.3") + endif() + set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so") + set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so") + set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so") +endif(WIN32) + +# Path Paddle Inference ELF lib file +if(UNIX AND (NOT APPLE)) + set(PATCHELF_SCRIPT ${PROJECT_SOURCE_DIR}/scripts/patch_paddle_inference.py) + set(PATCHELF_TARGET ${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so) + add_custom_target( + patchelf_paddle_inference ALL COMMAND bash -c + "PATCHELF_EXE=${PATCHELF_EXE} python ${PATCHELF_SCRIPT} ${PATCHELF_TARGET} ${PADDLEINFERENCE_VERSION}" + DEPENDS ${LIBRARY_NAME} + ) + unset(PATCHELF_SCRIPT) + unset(PATCHELF_TARGET) +endif() + +add_library(external_paddle_inference STATIC IMPORTED GLOBAL) +set_property(TARGET external_paddle_inference PROPERTY IMPORTED_LOCATION + ${PADDLEINFERENCE_COMPILE_LIB}) +add_dependencies(external_paddle_inference ${PADDLEINFERENCE_PROJECT}) + + +add_library(external_p2o STATIC IMPORTED GLOBAL) +set_property(TARGET external_p2o PROPERTY IMPORTED_LOCATION + ${P2O_LIB}) +add_dependencies(external_p2o ${PADDLEINFERENCE_PROJECT}) + +add_library(external_ort STATIC IMPORTED GLOBAL) +set_property(TARGET external_ort PROPERTY IMPORTED_LOCATION + ${ORT_LIB}) +add_dependencies(external_ort ${PADDLEINFERENCE_PROJECT}) + +add_library(external_dnnl STATIC IMPORTED GLOBAL) +set_property(TARGET external_dnnl PROPERTY IMPORTED_LOCATION + ${DNNL_LIB}) +add_dependencies(external_dnnl ${PADDLEINFERENCE_PROJECT}) + +add_library(external_omp STATIC IMPORTED GLOBAL) +set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION + ${OMP_LIB}) +add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT}) + +# Compatible policy for 2.4.x/2.5.x/2.6.x and latest dev. +if (NOT WITH_KUNLUNXIN) + string(REGEX MATCH "0.0.0" PADDLEINFERENCE_USE_DEV ${PADDLEINFERENCE_VERSION}) + string(REGEX MATCH "2.4|post24|post2.4" PADDLEINFERENCE_USE_2_4_x ${PADDLEINFERENCE_VERSION}) + string(REGEX MATCH "2.5|post25|post2.5" PADDLEINFERENCE_USE_2_5_x ${PADDLEINFERENCE_VERSION}) + string(REGEX MATCH "2.6|post26|post2.6" PADDLEINFERENCE_USE_2_6_x ${PADDLEINFERENCE_VERSION}) +endif() + +if(PADDLEINFERENCE_USE_DEV) + set(PADDLEINFERENCE_API_COMPAT_DEV ON CACHE BOOL "" FORCE) +endif() + +if(PADDLEINFERENCE_USE_2_6_x) + set(PADDLEINFERENCE_API_COMPAT_2_6_x ON CACHE BOOL "" FORCE) +endif() + +if(PADDLEINFERENCE_USE_2_5_x) + set(PADDLEINFERENCE_API_COMPAT_2_5_x ON CACHE BOOL "" FORCE) +endif() + +if(PADDLEINFERENCE_USE_2_4_x AND (NOT PADDLEINFERENCE_API_COMPAT_2_5_x) AND (NOT PADDLEINFERENCE_API_COMPAT_2_6_x) AND (NOT PADDLEINFERENCE_API_COMPAT_DEV)) + set(PADDLEINFERENCE_API_COMPAT_2_4_x ON CACHE BOOL "" FORCE) + message(WARNING "You are using PADDLEINFERENCE_USE_2_4_x:${PADDLEINFERENCE_VERSION}, force PADDLEINFERENCE_API_COMPAT_2_4_x=ON") +endif() + +if(PADDLEINFERENCE_API_COMPAT_2_4_x) + add_definitions(-DPADDLEINFERENCE_API_COMPAT_2_4_x) +endif() + +if(PADDLEINFERENCE_API_COMPAT_2_5_x) + add_definitions(-DPADDLEINFERENCE_API_COMPAT_2_5_x) +endif() + +if(PADDLEINFERENCE_API_COMPAT_2_6_x) + add_definitions(-DPADDLEINFERENCE_API_COMPAT_2_6_x) +endif() + +if(PADDLEINFERENCE_API_COMPAT_DEV) + add_definitions(-DPADDLEINFERENCE_API_COMPAT_DEV) +endif() + +# Compatible policy for custom paddle ops +if(PADDLEINFERENCE_API_COMPAT_2_5_x AND (NOT WITH_KUNLUNXIN)) + # no c++ standard policy conflicts vs c++ 11 + # TODO: support custom ops for latest dev + set(PADDLEINFERENCE_API_CUSTOM_OP ON CACHE BOOL "" FORCE) + # add paddle_inference/paddle/include path for custom ops + # the extension.h and it's deps headers are located in + # paddle/include/paddle directory. + include_directories(${PADDLEINFERENCE_INC_DIR}/paddle/include) + message(WARNING "You are using PADDLEINFERENCE_API_COMPAT_2_5_x:${PADDLEINFERENCE_VERSION}, force PADDLEINFERENCE_API_CUSTOM_OP=${PADDLEINFERENCE_API_CUSTOM_OP}") +endif() + +function(set_paddle_custom_ops_compatible_policy) + if(PADDLEINFERENCE_API_CUSTOM_OP AND (NOT WITH_KUNLUNXIN)) + if(NOT MSVC) + # TODO: add non c++ 14 policy for latest dev + if(NOT PADDLEINFERENCE_API_COMPAT_2_5_x) + # gcc c++ 14 policy for 2.4.x + if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 14 PARENT_SCOPE) + message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON, but CMAKE_CXX_STANDARD is not defined, use c++ 14 by default!") + elseif(NOT (CMAKE_CXX_STANDARD EQUAL 14)) + set(CMAKE_CXX_STANDARD 14 PARENT_SCOPE) + message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON, force use c++ 14!") + endif() + endif() + if(WITH_GPU) + # cuda c++ 14 policy for 2.4.x + if(NOT PADDLEINFERENCE_API_COMPAT_2_5_x) + if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14 PARENT_SCOPE) + message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON and WITH_GPU=ON, but CMAKE_CUDA_STANDARD is not defined, use c++ 14 by default!") + elseif(NOT (CMAKE_CUDA_STANDARD EQUAL 14)) + set(CMAKE_CUDA_STANDARD 14 PARENT_SCOPE) + message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON and WITH_GPU=ON, force use c++ 14!") + endif() + endif() + endif() + endif() + # common compile flags for paddle custom ops + add_definitions(-DPADDLE_ON_INFERENCE) + add_definitions(-DPADDLE_NO_PYTHON) + if(WITH_GPU) + add_definitions(-DPADDLE_WITH_CUDA) + endif() + endif() +endfunction() diff --git a/libs/ultrainfer/cmake/paddlelite.cmake b/libs/ultrainfer/cmake/paddlelite.cmake new file mode 100755 index 0000000000..6042ea14c2 --- /dev/null +++ b/libs/ultrainfer/cmake/paddlelite.cmake @@ -0,0 +1,105 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(ExternalProject) + +option(PADDLELITE_DIRECTORY "Directory of custom Paddle-Lite library" OFF) + +set(PADDLELITE_PROJECT "extern_paddlelite") +set(PADDLELITE_FILENAME paddlelite) +set(PADDLELITE_PREFIX_DIR ${THIRD_PARTY_PATH}/${PADDLELITE_FILENAME}) +set(PADDLELITE_SOURCE_DIR + ${THIRD_PARTY_PATH}/${PADDLELITE_FILENAME}/src/${PADDLELITE_PROJECT}) +set(PADDLELITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${PADDLELITE_FILENAME}) +set(PADDLELITE_INC_DIR + "${PADDLELITE_INSTALL_DIR}/include" + CACHE PATH "paddlelite include directory." FORCE) +set(PADDLELITE_LIB_DIR +"${PADDLELITE_INSTALL_DIR}/lib/" +CACHE PATH "paddlelite lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${PADDLELITE_LIB_DIR}") + +if(PADDLELITE_DIRECTORY) + set(PADDLELITE_INC_DIR "${PADDLELITE_DIRECTORY}/include") +endif() + +include_directories(${PADDLELITE_INC_DIR}) + +if(PADDLELITE_DIRECTORY) + # Use custom Paddle-Lite libs. + if(EXISTS "${THIRD_PARTY_PATH}/install/paddlelite") + file(REMOVE_RECURSE "${THIRD_PARTY_PATH}/install/paddlelite") + endif() + if(WIN32 OR APPLE OR IOS) + message(FATAL_ERROR "Doesn't support windows/mac/ios platform with backend Paddle Lite now.") + else() + message(STATUS "Copying ${PADDLELITE_DIRECTORY} to ${THIRD_PARTY_PATH}/install/paddlelite ...") + execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install/paddlelite) + execute_process(COMMAND cp -r ${PADDLELITE_DIRECTORY}/include ${THIRD_PARTY_PATH}/install/paddlelite) + execute_process(COMMAND mkdir -p ${PADDLELITE_LIB_DIR}) + execute_process(COMMAND cp -r ${PADDLELITE_DIRECTORY}/lib/ ${PADDLELITE_LIB_DIR}) + message(${PADDLELITE_LIB_DIR}) + execute_process(COMMAND rm -rf ${PADDLELITE_LIB_DIR}/*.a) + set(PADDLELITE_LIB "${PADDLELITE_LIB_DIR}/libpaddle_full_api_shared.so") + endif() + +else() + # Use default Paddle-Lite libs. + set(PADDLELITE_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs") + + if(WIN32 OR APPLE OR IOS) + message(FATAL_ERROR "Doesn't support windows/mac/ios platform with backend Paddle Lite now.") + else() + set(PADDLELITE_LIB "${PADDLELITE_LIB_DIR}/libpaddle_full_api_shared.so") + set(PADDLELITE_REMOVE_LIB "${PADDLELITE_LIB_DIR}/libpaddle_api_full_bundled.a") + endif() + + if(NOT PADDLELITE_URL) + if(WIN32 OR APPLE OR IOS) + message(FATAL_ERROR "Doesn't support windows/mac/ios platform with backend Paddle Lite now.") + else() # Linux + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-linux-arm64-20221209.tgz") + set(PADDLELITE_VERSION 0.0.0.20221209) + else() + message(FATAL_ERROR "Only support Linux aarch64 now, x64 is not supported with backend Paddle Lite.") + endif() + endif() + endif() + + ExternalProject_Add( + ${PADDLELITE_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${PADDLELITE_URL} + PREFIX ${PADDLELITE_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${PADDLELITE_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${PADDLELITE_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${PADDLELITE_SOURCE_DIR}/lib/ ${PADDLELITE_INSTALL_DIR}/lib && + ${CMAKE_COMMAND} -E copy_directory ${PADDLELITE_SOURCE_DIR}/include ${PADDLELITE_INC_DIR} + BUILD_BYPRODUCTS ${PADDLELITE_LIB}) + +endif(PADDLELITE_DIRECTORY) + +if(UNIX AND (NOT APPLE) AND BUILD_ULTRAINFER_PYTHON) + add_custom_target(patchelf_paddle_lite ALL COMMAND bash -c "PATCHELF_EXE=${PATCHELF_EXE} python ${PROJECT_SOURCE_DIR}/scripts/patch_paddle_lite.py ${PADDLELITE_INSTALL_DIR}/lib/" DEPENDS ${LIBRARY_NAME}) +endif() + +add_library(external_paddle_lite STATIC IMPORTED GLOBAL) +set_property(TARGET external_paddle_lite PROPERTY IMPORTED_LOCATION ${PADDLELITE_LIB}) +add_dependencies(external_paddle_lite ${PADDLELITE_PROJECT}) diff --git a/libs/ultrainfer/cmake/poros.cmake b/libs/ultrainfer/cmake/poros.cmake new file mode 100755 index 0000000000..edfd7a3909 --- /dev/null +++ b/libs/ultrainfer/cmake/poros.cmake @@ -0,0 +1,95 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +if(NOT ENABLE_TRT_BACKEND) + message(FATAL_ERROR "While ENABLE_POROS_BACKEND, requires ENABLE_TRT_BACKEND=ON, but now its OFF.") +endif() + +set(POROS_PROJECT "extern_poros") +set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros) +set(POROS_SOURCE_DIR + ${THIRD_PARTY_PATH}/poros/src/${POROS_PROJECT}) +set(POROS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/poros) +set(POROS_INC_DIR + "${POROS_INSTALL_DIR}/include" + CACHE PATH "poros include directory." FORCE) +set(POROS_LIB_DIR + "${POROS_INSTALL_DIR}/lib/" + CACHE PATH "poros lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${POROS_LIB_DIR}") + +include_directories(${POROS_INC_DIR}) +if(WIN32) + message(FATAL_ERROR "Poros Backend doesn't support Windows now.") +elseif(APPLE) + message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.") +else() + set(POROS_COMPILE_LIB + "${POROS_INSTALL_DIR}/lib/libporos.so" + CACHE FILEPATH "poros compile library." FORCE) +endif(WIN32) + +set(POROS_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") +set(POROS_VERSION "0.1.0") +if(WIN32) + message(FATAL_ERROR "Poros Backend doesn't support Windows now.") +elseif(APPLE) + message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.") +else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.") + else() + if(WITH_GPU) + set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz") + else() + message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.") + endif() + endif() +endif() +set(POROS_URL "${POROS_URL_BASE}${POROS_FILE}") + +ExternalProject_Add( + ${POROS_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${POROS_URL} + PREFIX ${POROS_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy_directory ${POROS_SOURCE_DIR} ${POROS_INSTALL_DIR} + BUILD_BYPRODUCTS ${POROS_COMPILE_LIB}) + +add_library(external_poros STATIC IMPORTED GLOBAL) +set_property(TARGET external_poros PROPERTY IMPORTED_LOCATION + ${POROS_COMPILE_LIB}) +add_dependencies(external_poros ${POROS_PROJECT}) + +# Download libtorch.so with ABI=1 +set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") +set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip") +set(TORCH_URL "${TORCH_URL_BASE}${TORCH_FILE}") +message(STATUS "Use the default Torch lib from: ${TORCH_URL}") +download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install) +if(EXISTS ${THIRD_PARTY_PATH}/install/torch) + file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/torch) +endif() +file(RENAME ${THIRD_PARTY_PATH}/install/libtorch/ ${THIRD_PARTY_PATH}/install/torch) +set(TORCH_INCLUDE_DIRS ${THIRD_PARTY_PATH}/install/torch/include) +find_library(TORCH_LIBRARY torch ${THIRD_PARTY_PATH}/install/torch/lib NO_DEFAULT_PATH) +include_directories(${TORCH_INCLUDE_DIRS}) +list(APPEND DEPEND_LIBS ${TORCH_LIBRARY}) diff --git a/libs/ultrainfer/cmake/rknpu2.cmake b/libs/ultrainfer/cmake/rknpu2.cmake new file mode 100755 index 0000000000..44bc43fe65 --- /dev/null +++ b/libs/ultrainfer/cmake/rknpu2.cmake @@ -0,0 +1,19 @@ +# get RKNPU2_URL +set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") +set(RKNPU2_VERSION "1.4.2b0") +set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz") +set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}") + +# download_and_decompress +download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} ${THIRD_PARTY_PATH}/install/) + +# set path +set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime) + +# include lib +if (EXISTS ${RKNPU_RUNTIME_PATH}) + set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so) + include_directories(${RKNPU_RUNTIME_PATH}/include) +else () + message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.") +endif () diff --git a/libs/ultrainfer/cmake/sophgo.cmake b/libs/ultrainfer/cmake/sophgo.cmake new file mode 100755 index 0000000000..1e6706dbcf --- /dev/null +++ b/libs/ultrainfer/cmake/sophgo.cmake @@ -0,0 +1,7 @@ +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +find_package(libsophon REQUIRED) +message(${LIBSOPHON_LIB_DIRS}) +include_directories(${LIBSOPHON_INCLUDE_DIRS}) +message(${LIBSOPHON_LIB_DIRS}) +set(SOPHGO_RT_LIB ${LIBSOPHON_LIB_DIRS}/libbmrt.so) diff --git a/libs/ultrainfer/cmake/summary.cmake b/libs/ultrainfer/cmake/summary.cmake new file mode 100755 index 0000000000..282a0396e7 --- /dev/null +++ b/libs/ultrainfer/cmake/summary.cmake @@ -0,0 +1,84 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function(ultrainfer_summary) + message(STATUS "") + message(STATUS "*************UltraInfer Building Summary**********") + message(STATUS " CMake version : ${CMAKE_VERSION}") + message(STATUS " CMake command : ${CMAKE_COMMAND}") + message(STATUS " System : ${CMAKE_SYSTEM_NAME}") + message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") + message(STATUS " C++ standard : ${CMAKE_CXX_STANDARD}") + message(STATUS " C++ cuda standard : ${CMAKE_CUDA_STANDARD}") + message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") + message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") + message(STATUS " EXE linker flags : ${CMAKE_EXE_LINKER_FLAGS}") + message(STATUS " Shared linker flags : ${CMAKE_SHARED_LINKER_FLAGS}") + message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") + get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS) + message(STATUS " Compile definitions : ${tmp}") + message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}") + message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}") + message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}") + message(STATUS "") + message(STATUS " UltraInfer version : ${ULTRAINFER_VERSION}") + message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") + message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}") + message(STATUS " ENABLE_HORIZON_BACKEND : ${ENABLE_HORIZON_BACKEND}") + message(STATUS " ENABLE_SOPHGO_BACKEND : ${ENABLE_SOPHGO_BACKEND}") + message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") + message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}") + message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") + message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") + message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") + message(STATUS " ENABLE_TVM_BACKEND : ${ENABLE_TVM_BACKEND}") + message(STATUS " ENABLE_BENCHMARK : ${ENABLE_BENCHMARK}") + message(STATUS " ENABLE_VISION : ${ENABLE_VISION}") + message(STATUS " ENABLE_TEXT : ${ENABLE_TEXT}") + message(STATUS " ENABLE_FLYCV : ${ENABLE_FLYCV}") + message(STATUS " ENABLE_CVCUDA : ${ENABLE_CVCUDA}") + message(STATUS " WITH_GPU : ${WITH_GPU}") + message(STATUS " WITH_IPU : ${WITH_IPU}") + message(STATUS " WITH_OPENCL : ${WITH_OPENCL}") + message(STATUS " WITH_TESTING : ${WITH_TESTING}") + message(STATUS " WITH_ASCEND : ${WITH_ASCEND}") + message(STATUS " WITH_DIRECTML : ${WITH_DIRECTML}") + message(STATUS " WITH_TIMVX : ${WITH_TIMVX}") + message(STATUS " WITH_KUNLUNXIN : ${WITH_KUNLUNXIN}") + message(STATUS " WITH_CAPI : ${WITH_CAPI}") + message(STATUS " WITH_CSHARPAPI : ${WITH_CSHARPAPI}") + if(ENABLE_ORT_BACKEND) + message(STATUS " ONNXRuntime version : ${ONNXRUNTIME_VERSION}") + endif() + if(ENABLE_PADDLE_BACKEND) + message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}") + endif() + if(ENABLE_POROS_BACKEND) + message(STATUS " Poros version : ${POROS_VERSION}") + endif() + if(ENABLE_OPENVINO_BACKEND) + message(STATUS " OpenVINO version : ${OPENVINO_VERSION}") + endif() + if(WITH_GPU) + message(STATUS " CUDA_DIRECTORY : ${CUDA_DIRECTORY}") + message(STATUS " TRT_DRECTORY : ${TRT_DIRECTORY}") + endif() + if (${BUILD_ULTRAINFER_PYTHON}) + message(STATUS " Python executable : ${PYTHON_EXECUTABLE}") + message(STATUS " Python includes : ${PYTHON_INCLUDE_DIR}") + endif() + if(ENABLE_LITE_BACKEND) + message(STATUS " Paddle Lite version : ${PADDLELITE_VERSION}") + endif() +endfunction() diff --git a/libs/ultrainfer/cmake/timvx.cmake b/libs/ultrainfer/cmake/timvx.cmake new file mode 100755 index 0000000000..6299a3dead --- /dev/null +++ b/libs/ultrainfer/cmake/timvx.cmake @@ -0,0 +1,38 @@ + +if(NOT ${ENABLE_LITE_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_LITE_BACKEND=ON") + set(ENABLE_LITE_BACKEND ON) +endif() +if(${ENABLE_PADDLE2ONNX}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE2ONNX=OFF") + set(ENABLE_PADDLE2ONNX OFF) +endif() +if(${ENABLE_ORT_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_ORT_BACKEND=OFF") + set(ENABLE_ORT_BACKEND OFF) +endif() +if(${ENABLE_PADDLE_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_BACKEND=OFF") + set(ENABLE_PADDLE_BACKEND OFF) +endif() +if(${ENABLE_OPENVINO_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENVINO_BACKEND=OFF") + set(ENABLE_OPENVINO_BACKEND OFF) +endif() +if(${ENABLE_TRT_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TRT_BACKEND=OFF") + set(ENABLE_TRT_BACKEND OFF) +endif() + +if(${WITH_GPU}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DWITH_GPU=OFF") + set(WITH_GPU OFF) +endif() + +if(${ENABLE_TEXT}) + set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE) + message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF") +endif() + +install(FILES ${PROJECT_SOURCE_DIR}/cmake/timvx.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) +install(FILES ${PROJECT_SOURCE_DIR}/cmake/toolchain.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) diff --git a/libs/ultrainfer/cmake/toolchain.cmake b/libs/ultrainfer/cmake/toolchain.cmake new file mode 100755 index 0000000000..c0a7edc81f --- /dev/null +++ b/libs/ultrainfer/cmake/toolchain.cmake @@ -0,0 +1,45 @@ +if (DEFINED TARGET_ABI) + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_BUILD_TYPE MinSizeRel) + if(${TARGET_ABI} MATCHES "armhf") + set(CMAKE_SYSTEM_PROCESSOR arm) + if(NOT CMAKE_C_COMPILER) + set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc") + endif() + if(NOT CMAKE_CXX_COMPILER) + set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") + endif() + set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" ) + set(OPENCV_URL "https://bj.bcebos.com/fastdeploy/third_libs/opencv-linux-armv7hf-4.6.0.tgz") + set(OPENCV_FILENAME "opencv-linux-armv7hf-4.6.0") + if(WITH_TIMVX) + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-armhf-timvx-20230316.tgz") + else() + message(STATUS "PADDLELITE_URL will be configured if WITH_TIMVX=ON.") + endif() + set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs) + set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/opencv/lib/cmake/opencv4) + elseif(${TARGET_ABI} MATCHES "arm64") + set(CMAKE_SYSTEM_PROCESSOR aarch64) + if(NOT CMAKE_C_COMPILER) + set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") + endif() + if(NOT CMAKE_CXX_COMPILER) + set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") + endif() + set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}") + set(OPENCV_URL "https://bj.bcebos.com/fastdeploy/third_libs/opencv-linux-aarch64-4.6.0.tgz") + set(OPENCV_FILENAME "opencv-linux-aarch64-4.6.0") + if(WITH_TIMVX) + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-aarch64-timvx-20230316.tgz") + else() + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-arm64-20230316.tgz") + endif() + set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs) + set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/opencv/lib/cmake/opencv4) + else() + message(FATAL_ERROR "When cross-compiling, please set the -DTARGET_ABI to arm64 or armhf.") + endif() +endif() diff --git a/libs/ultrainfer/cmake/tvm.cmake b/libs/ultrainfer/cmake/tvm.cmake new file mode 100755 index 0000000000..b6dfbebcbb --- /dev/null +++ b/libs/ultrainfer/cmake/tvm.cmake @@ -0,0 +1,55 @@ +# set path + +set(TVM_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") +set(TVM_VERSION "0.12.0") +set(TVM_SYSTEM "") + +if (${CMAKE_SYSTEM} MATCHES "Darwin") + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64") + set(TVM_SYSTEM "macos-arm64") + endif () +elseif (${CMAKE_SYSTEM} MATCHES "Linux") + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86") + set(TVM_SYSTEM "linux-x86") + endif () +else () + error("TVM only support MacOS in Arm64 or linux in x86") +endif () +set(TVM_FILE "tvm-${TVM_SYSTEM}-${TVM_VERSION}.tgz") +set(TVM_URL "${TVM_URL_BASE}${TVM_FILE}") + +set(TVM_RUNTIME_PATH "${THIRD_PARTY_PATH}/install/tvm") +execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${TVM_RUNTIME_PATH}") +download_and_decompress(${TVM_URL} + "${CMAKE_CURRENT_BINARY_DIR}/${TVM_FILE}" + "${THIRD_PARTY_PATH}/install/") +include_directories(${TVM_RUNTIME_PATH}/include) + +# copy dlpack to third_party +set(DLPACK_PATH "${THIRD_PARTY_PATH}/install/dlpack") +execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${DLPACK_PATH}") +execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory + "${PROJECT_SOURCE_DIR}/third_party/dlpack" + "${THIRD_PARTY_PATH}/install/dlpack") +include_directories(${DLPACK_PATH}/include) + +set(DMLC_CORE_PATH "${THIRD_PARTY_PATH}/install/dmlc-core") +execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${DMLC_CORE_PATH}") +set(DMLC_CORE_URL https://bj.bcebos.com/fastdeploy/third_libs/dmlc-core.tgz) +download_and_decompress(${DMLC_CORE_URL} + "${CMAKE_CURRENT_BINARY_DIR}/dmlc-core.tgz" + "${THIRD_PARTY_PATH}/install/") +include_directories(${DMLC_CORE_PATH}/include) + +# include lib +if (EXISTS ${TVM_RUNTIME_PATH}) + if (${CMAKE_SYSTEM} MATCHES "Darwin") + set(TVM_RUNTIME_LIB ${TVM_RUNTIME_PATH}/lib/libtvm_runtime.dylib) + elseif (${CMAKE_SYSTEM} MATCHES "Linux") + set(TVM_RUNTIME_LIB ${TVM_RUNTIME_PATH}/lib/libtvm_runtime.so) + endif () + include(${TVM_RUNTIME_PATH}/lib/cmake/tvm/tvmConfig.cmake) + add_definitions(-DDMLC_USE_LOGGING_LIBRARY=) +else () + error(FATAL_ERROR "[tvm.cmake] TVM_RUNTIME_PATH does not exist.") +endif () diff --git a/libs/ultrainfer/cmake/utils.cmake b/libs/ultrainfer/cmake/utils.cmake new file mode 100755 index 0000000000..07c492d2ff --- /dev/null +++ b/libs/ultrainfer/cmake/utils.cmake @@ -0,0 +1,223 @@ +# This function comes from https://blog.csdn.net/yindongjie1221/article/details/90614261 +function(redefine_file_macro targetname) + get_target_property(source_files "${targetname}" SOURCES) + foreach(sourcefile ${source_files}) + get_property(defs SOURCE "${sourcefile}" + PROPERTY COMPILE_DEFINITIONS) + get_filename_component(filepath "${sourcefile}" ABSOLUTE) + string(REPLACE ${PROJECT_SOURCE_DIR}/ "" relpath ${filepath}) + list(APPEND defs "__REL_FILE__=\"${relpath}\"") + set_property( + SOURCE "${sourcefile}" + PROPERTY COMPILE_DEFINITIONS ${defs} + ) + endforeach() +endfunction() + +function(download_and_decompress url filename decompress_dir) + if(NOT EXISTS ${filename}) + message("Downloading file from ${url} to ${filename} ...") + file(DOWNLOAD ${url} "${filename}.tmp" SHOW_PROGRESS) + file(RENAME "${filename}.tmp" ${filename}) + endif() + if(NOT EXISTS ${decompress_dir}) + file(MAKE_DIRECTORY ${decompress_dir}) + endif() + message("Decompress file ${filename} ...") + execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${filename} WORKING_DIRECTORY ${decompress_dir}) +endfunction() + +function(get_openvino_libs OPENVINO_RUNTIME_DIR) + set(LIB_LIST "") + find_library(OPENVINO_LIB openvino PATHS ${OPENVINO_RUNTIME_DIR}/lib/ ${OPENVINO_RUNTIME_DIR}/lib/intel64 NO_DEFAULT_PATH) + list(APPEND LIB_LIST ${OPENVINO_LIB}) + + set(TBB_DIR ${OPENVINO_RUNTIME_DIR}/3rdparty/tbb/lib/cmake) + message(STATUS "TBB_DIR: ${TBB_DIR}") + find_package(TBB PATHS ${TBB_DIR}) + if (TBB_FOUND) + # 2024.10.22(zhangyue): Use openvino with tbb on linux + set(TBB_LIB "${OPENVINO_RUNTIME_DIR}/3rdparty/tbb/lib/libtbb.so.12") + list(APPEND LIB_LIST ${TBB_LIB}) + else() + # TODO(zhoushunjie): Use openvino with tbb on linux in future. + set(OMP_LIB "${OPENVINO_RUNTIME_DIR}/3rdparty/omp/lib/libiomp5.so") + list(APPEND LIB_LIST ${OMP_LIB}) + endif() + set(OPENVINO_LIBS ${LIB_LIST} PARENT_SCOPE) +endfunction() + +function(remove_duplicate_libraries libraries) + list(LENGTH ${libraries} lib_length) + set(libraries_temp "") + set(full_libraries "") + foreach(lib_path ${${libraries}}) + get_filename_component(lib_name ${lib_path} NAME) + list(FIND libraries_temp ${lib_name} lib_idx) + if (${lib_idx} EQUAL -1) + list(APPEND libraries_temp ${lib_name}) + list(APPEND full_libraries ${lib_path}) + endif() + endforeach() + set(${libraries} ${full_libraries} PARENT_SCOPE) +endfunction() + +function(get_windows_path win_path origin_path) + STRING(REGEX REPLACE "/" "\\\\" _win_path ${origin_path}) + set(${win_path} ${_win_path} PARENT_SCOPE) +endfunction() + +function(get_osx_architecture) + if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") + set(CURRENT_OSX_ARCH "arm64" PARENT_SCOPE) + elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64") + set(CURRENT_OSX_ARCH "x86_64" PARENT_SCOPE) + else() + set(CURRENT_OSX_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR} PARENT_SCOPE) + endif() +endfunction() + + +# A fake target to include all the libraries and tests the ultrainfer module depends. +add_custom_target(fd_compile_deps COMMAND echo 1) + +# A function to grep LINK_ONLY dependencies from INTERFACE_LINK_LIBRARIES +function(regrex_link_only_libraries OUTPUT_DEPS PUBLIC_DEPS) + string(JOIN "#" _public_deps ${PUBLIC_DEPS}) + string(REPLACE "$" "" _public_deps ${_public_deps}) + string(REPLACE "#" ";" _public_deps ${_public_deps}) + set(${OUTPUT_DEPS} ${_public_deps} PARENT_SCOPE) +endfunction() + +# Bundle several static libraries into one. This function is modified from Paddle Lite. +# reference: https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/cmake/lite.cmake#L252 +function(bundle_static_library tgt_name bundled_tgt_name fake_target) + list(APPEND static_libs ${tgt_name}) + add_dependencies(fd_compile_deps ${fake_target}) + # Set redundant static libs here, protobuf is already available + # in the Paddle Lite static library. So, we don't need protobuf + # in opencv. And there is no need for opencv_dnn, opencv_ml, + # opencv_flann and some other modules. Therefore, we chose + # to discard these redundant modules. + set(REDUNDANT_STATIC_LIBS opencv_dnn opencv_calib3d opencv_photo + opencv_flann opencv_objdetect opencv_stitching opencv_gapi + opencv_ml libprotobuf) + + function(_recursively_collect_dependencies input_target) + list(FIND REDUNDANT_STATIC_LIBS ${input_target} _input_redunant_id) + if(${_input_redunant_id} GREATER 0) + return() + endif() + set(_input_link_libraries LINK_LIBRARIES) + # https://cmake.org/cmake/help/latest/prop_tgt/TYPE.html + get_target_property(_input_type ${input_target} TYPE) + # In OpenCVModules.cmake, they set the deps of modules + # (opencv_core,...) as INTERFACE_LINK_LIBRARIES. The + # 'Type' of opencv static lib is set as 'STATIC_LIBRARY'. + if ((${_input_type} STREQUAL "INTERFACE_LIBRARY") + OR (${_input_type} STREQUAL "STATIC_LIBRARY")) + set(_input_link_libraries INTERFACE_LINK_LIBRARIES) + endif() + get_target_property(_public_dependencies ${input_target} ${_input_link_libraries}) + regrex_link_only_libraries(public_dependencies "${_public_dependencies}") + + foreach(dependency IN LISTS public_dependencies) + if(TARGET ${dependency}) + get_target_property(alias ${dependency} ALIASED_TARGET) + if (TARGET ${alias}) + set(dependency ${alias}) + endif() + get_target_property(_type ${dependency} TYPE) + list(FIND REDUNDANT_STATIC_LIBS ${dependency} _deps_redunant_id) + if (${_type} STREQUAL "STATIC_LIBRARY" AND + (NOT (${_deps_redunant_id} GREATER 0))) + list(APPEND static_libs ${dependency}) + endif() + + get_property(library_already_added + GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency}) + if (NOT library_already_added) + set_property(GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency} ON) + if(NOT (${_deps_redunant_id} GREATER 0)) + _recursively_collect_dependencies(${dependency}) + endif() + endif() + endif() + endforeach() + set(static_libs ${static_libs} PARENT_SCOPE) + endfunction() + + _recursively_collect_dependencies(${tgt_name}) + + list(REMOVE_DUPLICATES static_libs) + list(REMOVE_ITEM static_libs ${REDUNDANT_STATIC_LIBS}) + message(STATUS "WITH_STATIC_LIB=${WITH_STATIC_LIB}, Found all needed static libs from dependecy tree: ${static_libs}") + message(STATUS "Exclude some redundant static libs: ${REDUNDANT_STATIC_LIBS}") + + set(bundled_tgt_full_name + ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX}) + + message(STATUS "Use bundled_tgt_full_name: ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX}") + + if(WIN32) + message(FATAL_ERROR "Not support UltraInfer static lib for windows now.") + endif() + + add_custom_target(${fake_target} ALL COMMAND ${CMAKE_COMMAND} -E echo "Building fake_target ${fake_target}") + add_dependencies(${fake_target} ${tgt_name}) + # add_dependencies(${fake_target} fastdelpoy_dummy) + + if(NOT IOS AND NOT APPLE) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in + "CREATE ${bundled_tgt_full_name}\n" ) + + foreach(tgt IN LISTS static_libs) + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in + "ADDLIB $\n") + endforeach() + + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in "SAVE\n") + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in "END\n") + + file(GENERATE + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar + INPUT ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in) + + set(ar_tool ${CMAKE_AR}) + if (CMAKE_INTERPROCEDURAL_OPTIMIZATION) + set(ar_tool ${CMAKE_CXX_COMPILER_AR}) + endif() + message(STATUS "Found ar_tool: ${ar_tool}") + + add_custom_command( + TARGET ${fake_target} PRE_BUILD + COMMAND rm -f ${bundled_tgt_full_name} + COMMAND ${ar_tool} -M < ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar + COMMENT "Bundling ${bundled_tgt_name}" + COMMAND ${CMAKE_STRIP} --strip-unneeded ${CMAKE_CURRENT_BINARY_DIR}/lib${bundled_tgt_name}.a + COMMENT "Stripped unneeded debug symbols in ${bundled_tgt_name}" + DEPENDS ${tgt_name} + VERBATIM) + else() + foreach(lib ${static_libs}) + set(libfiles ${libfiles} $) + endforeach() + add_custom_command( + TARGET ${fake_target} PRE_BUILD + COMMAND rm -f ${bundled_tgt_full_name} + COMMAND /usr/bin/libtool -static -o ${bundled_tgt_full_name} ${libfiles} + COMMENT "Bundling ${bundled_tgt_name}" + COMMAND ${CMAKE_STRIP} -S ${CMAKE_CURRENT_BINARY_DIR}/lib${bundled_tgt_name}.a + COMMENT "Stripped unneeded debug symbols in ${bundled_tgt_name}" + DEPENDS ${tgt_name} + ) + endif() + + add_library(${bundled_tgt_name} STATIC IMPORTED GLOBAL) + set_property(TARGET ${bundled_tgt_name} PROPERTY IMPORTED_LOCATION + ${bundled_tgt_full_name}) + add_dependencies(${bundled_tgt_name} ${fake_target}) + add_dependencies(${bundled_tgt_name} ${tgt_name}) + +endfunction() diff --git a/libs/ultrainfer/cpack/debian_postinst.in b/libs/ultrainfer/cpack/debian_postinst.in new file mode 100755 index 0000000000..f4d8d2d206 --- /dev/null +++ b/libs/ultrainfer/cpack/debian_postinst.in @@ -0,0 +1,42 @@ +#!/bin/bash + +case "$1" in + configure) + PLATFORM=`uname` + ULTRAINFER_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@ + + echo "=============== Information ======================" + echo "UltraInfer Library Path: $ULTRAINFER_LIBRARY_PATH" + echo "Platform: $PLATFORM" + echo "==================================================" + + # Find all the .so files' path + ALL_SO_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"` + for SO_FILE in $ALL_SO_FILES;do + LIBS_DIRECOTRIES[${#LIBS_DIRECOTRIES[@]}]=${SO_FILE%/*} + done + + # Remove the dumplicate directories + LIBS_DIRECOTRIES=($(awk -v RS=' ' '!a[$1]++' <<< ${LIBS_DIRECOTRIES[@]})) + + IMPORT_PATH="" + for LIB_DIR in ${LIBS_DIRECOTRIES[@]};do + echo "Found Library Directory: $LIB_DIR" + echo ${LIB_DIR} >> @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf + done + + if [ ! -d /etc/ld.so.conf.d ]; then + mkdir -p /etc/ld.so.conf.d + fi + ln -sf @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf /etc/ld.so.conf.d + + ldconfig + + echo "UltraInfer is successfully installed and configured." + echo "Now please get started with UltraInfer examples at $ULTRAINFER_LIBRARY_PATH/examples." + echo "And don't forget to set ULTRAINFER_INSTALL_DIR=$ULTRAINFER_LIBRARY_PATH in cmake when building examples." + ;; + *) + echo "postinst called with unknown argument \`$1'" >&2 + exit 1 +esac diff --git a/libs/ultrainfer/cpack/debian_prerm.in b/libs/ultrainfer/cpack/debian_prerm.in new file mode 100755 index 0000000000..a3a4c1ab35 --- /dev/null +++ b/libs/ultrainfer/cpack/debian_prerm.in @@ -0,0 +1,12 @@ +#!/bin/bash + +case "$1" in + remove|deconfigure) + rm -f /etc/ld.so.conf.d/@CPACK_PACKAGE_NAME@.conf + rm @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf + rm -rf @CMAKE_INSTALL_PREFIX@/third_libs/install/tensorrt/lib/ + + ldconfig + echo "UltraInfer is going to be uninstalled." + ;; +esac diff --git a/libs/ultrainfer/cpack/rpm_postinst.in b/libs/ultrainfer/cpack/rpm_postinst.in new file mode 100755 index 0000000000..d9f2989fb2 --- /dev/null +++ b/libs/ultrainfer/cpack/rpm_postinst.in @@ -0,0 +1,35 @@ +#!/bin/bash + +PLATFORM=`uname` +ULTRAINFER_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@ + +echo "=============== Information ======================" +echo "UltraInfer Library Path: $ULTRAINFER_LIBRARY_PATH" +echo "Platform: $PLATFORM" +echo "==================================================" + +# Find all the .so files' path +ALL_SO_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"` +for SO_FILE in $ALL_SO_FILES;do + LIBS_DIRECOTRIES[${#LIBS_DIRECOTRIES[@]}]=${SO_FILE%/*} +done + +# Remove the dumplicate directories +LIBS_DIRECOTRIES=($(awk -v RS=' ' '!a[$1]++' <<< ${LIBS_DIRECOTRIES[@]})) + +IMPORT_PATH="" +for LIB_DIR in ${LIBS_DIRECOTRIES[@]};do + echo "Found Library Directory: $LIB_DIR" + echo ${LIB_DIR} >> @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf +done + +if [ ! -d /etc/ld.so.conf.d ]; then + mkdir -p /etc/ld.so.conf.d +fi +ln -sf @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf /etc/ld.so.conf.d + +ldconfig + +echo "UltraInfer is successfully installed and configured." +echo "Now please get started with UltraInfer examples at $ULTRAINFER_LIBRARY_PATH/examples." +echo "And don't forget to set ULTRAINFER_INSTALL_DIR=$ULTRAINFER_LIBRARY_PATH in cmake when building examples." diff --git a/libs/ultrainfer/cpack/rpm_postrm.in b/libs/ultrainfer/cpack/rpm_postrm.in new file mode 100755 index 0000000000..8c63660390 --- /dev/null +++ b/libs/ultrainfer/cpack/rpm_postrm.in @@ -0,0 +1,8 @@ +#!/bin/bash + +rm -f /etc/ld.so.conf.d/@CPACK_PACKAGE_NAME@.conf +rm @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf +rm -rf @CMAKE_INSTALL_PREFIX@/third_libs/install/tensorrt/lib/ + +ldconfig +echo "UltraInfer has been uninstalled." diff --git a/libs/ultrainfer/python/__init__.py b/libs/ultrainfer/python/__init__.py new file mode 100755 index 0000000000..59372f9379 --- /dev/null +++ b/libs/ultrainfer/python/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/ultrainfer/python/requirements.txt b/libs/ultrainfer/python/requirements.txt new file mode 100755 index 0000000000..02f33802f0 --- /dev/null +++ b/libs/ultrainfer/python/requirements.txt @@ -0,0 +1,15 @@ +wheel +requests +tqdm +numpy<2 +opencv-python +pyyaml +pillow<10.0.0 +pandas>=0.25.0,<=1.3.5 +pycocotools +matplotlib +chinese_calendar +joblib +scikit-image +scikit-learn>=1.3.2 +tokenizers diff --git a/libs/ultrainfer/python/scripts/__init__.py b/libs/ultrainfer/python/scripts/__init__.py new file mode 100755 index 0000000000..59372f9379 --- /dev/null +++ b/libs/ultrainfer/python/scripts/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/ultrainfer/python/scripts/build_gpu.sh b/libs/ultrainfer/python/scripts/build_gpu.sh new file mode 100755 index 0000000000..720e8a8338 --- /dev/null +++ b/libs/ultrainfer/python/scripts/build_gpu.sh @@ -0,0 +1,12 @@ +export ENABLE_ORT_BACKEND=ON +export ENABLE_OPENVINO_BACKEND=ON +export ENABLE_PADDLE_BACKEND=ON +export ENABLE_TRT_BACKEND=ON +export TRT_DIRECTORY=/ultrainfer/libs/TensorRT-8.4.1.5 +export CUDA_DIRECTORY=/usr/local/cuda +export ENABLE_VISION=ON +export WITH_GPU=ON +export CMAKE_CXX_COMPILER=/usr/local/gcc-8.2/bin/g++ + +python setup.py build +python setup.py bdist_wheel diff --git a/libs/ultrainfer/python/scripts/process_libraries.py.in b/libs/ultrainfer/python/scripts/process_libraries.py.in new file mode 100755 index 0000000000..68d66f96a4 --- /dev/null +++ b/libs/ultrainfer/python/scripts/process_libraries.py.in @@ -0,0 +1,207 @@ + +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil +import subprocess +import platform + +user_specified_dirs = ['@OPENCV_DIRECTORY@', '@ORT_DIRECTORY@', ] +PACKAGE_NAME = os.getenv("PACKAGE_NAME", "ultrainfer") +PY_PACKAGE_NAME = PACKAGE_NAME + "_main" + + +def process_on_linux(current_dir): + rpaths = ["$ORIGIN:$ORIGIN/libs"] + fd_libs = list() + libs_path = os.path.join(current_dir, PACKAGE_NAME, "libs") + for f in os.listdir(libs_path): + filename = os.path.join(libs_path, f) + if not os.path.isfile(filename): + continue + if f.count(PACKAGE_NAME) and f.count(".so") > 0: + fd_libs.append(filename) + + cmake_build_dir = os.path.join(current_dir, ".setuptools-cmake-build") + patchelf_bin_path = os.path.join(cmake_build_dir, "third_libs/patchelf/bin/patchelf") + if not os.path.exists(patchelf_bin_path): + patchelf_bin_path = "patchelf" + + third_libs_path = os.path.join(libs_path, "third_libs") + + # remove some useless opencv file in python wheels to decrease package size + if os.path.exists(os.path.join(third_libs_path, "opencv")): + for root, dirs, files in os.walk(os.path.join(third_libs_path, "opencv")): + for f in files: + items = f.strip().split('.') + if len(items) != 4: + os.remove(os.path.join(root, f)) + continue + if items[0].strip() not in ["libopencv_highgui", "libopencv_video", "libopencv_videoio", "libopencv_imgcodecs", "libopencv_imgproc", "libopencv_core", "libopencv_calib3d", "libopencv_features2d", "libopencv_flann"]: + os.remove(os.path.join(root, f)) + + all_libs_paths = [third_libs_path] + user_specified_dirs + for path in all_libs_paths: + for root, dirs, files in os.walk(path): + for d in dirs: + if d not in ["lib", "lib64"]: + continue + rel_path = os.path.relpath(os.path.join(root, d), libs_path) + if path in user_specified_dirs: + # Note(zhoushunjie): Use the absolute path for user_specified_dirs + rpath = os.path.join(root, d) + else: + rpath = "$ORIGIN/" + rel_path + rpaths.append(rpath) + for lib in fd_libs: + command = "{} --set-rpath '{}' {}".format(patchelf_bin_path, ":".join(rpaths), lib) + if platform.machine() != 'sw_64' and platform.machine() != 'mips64': + assert subprocess.Popen( + command, + shell=True) != 0, "patchelf {} failed, the command: {}".format( + command, lib) + + +def process_on_mac(current_dir): + fd_libs = list() + libs_path = os.path.join(current_dir, PACKAGE_NAME, "libs") + cmake_build_dir = os.path.join(current_dir, ".setuptools-cmake-build") + for f in os.listdir(libs_path): + filename = os.path.join(libs_path, f) + if not os.path.isfile(filename): + continue + if f.count(PACKAGE_NAME) > 0 and (f.count(".dylib") > 0 or + f.count(".so") > 0): + fd_libs.append(filename) + + commands = list() + pre_commands = list() + for lib in fd_libs: + if lib.count(PY_PACKAGE_NAME) > 0: + pre_commands.append( + "install_name_tool -delete_rpath {} ".format(cmake_build_dir) + lib) + commands.append("install_name_tool -id @loader_path " + lib) + commands.append("install_name_tool -add_rpath @loader_path " + lib) + + third_libs_path = os.path.join(libs_path, "third_libs") + cmake_third_libs_path = os.path.join(cmake_build_dir, "third_libs", "install") + all_libs_paths = [cmake_third_libs_path] + user_specified_dirs + for path in all_libs_paths: + for root, dirs, files in os.walk(path): + for d in dirs: + if d not in ["lib", "lib64"]: + continue + rel_path = os.path.relpath(os.path.join(root, d), cmake_third_libs_path) + if path in user_specified_dirs: + # Note(zhoushunjie): Use the absolute path for user_specified_dirs + need_delete_rpath = os.path.join(root, d) + need_add_rpath = os.path.join(root, d) + else: + need_delete_rpath = os.path.join(root, d) + need_add_rpath = "@loader_path/third_libs/" + rel_path + for lib in fd_libs: + if lib.count(PY_PACKAGE_NAME) > 0: + pre_commands.append( + "install_name_tool -delete_rpath {} {}".format(need_delete_rpath, lib)) + commands.append( + "install_name_tool -add_rpath {} {}".format(need_add_rpath, lib)) + + for command in pre_commands: + try: + os.system(command) + except: + print("Skip execute command: " + command) + + for command in commands: + assert os.system( + command) == 0, "command execute failed! command: {}".format( + command) + +def process_on_windows(current_dir): + libs_path = os.path.join(current_dir, PACKAGE_NAME, "libs") + third_libs_path = os.path.join(libs_path, "third_libs") + for root, dirs, files in os.walk(third_libs_path): + for f in files: + file_path = os.path.join(root, f) + if f.count('onnxruntime') > 0 and f.endswith('.dll'): + shutil.copy(file_path, libs_path) + + +def get_all_files(dirname): + files = list() + for root, dirs, filenames in os.walk(dirname): + for f in filenames: + fullname = os.path.join(root, f) + files.append(fullname) + return files + + +def process_libraries(current_dir): + if platform.system().lower() == "linux": + process_on_linux(current_dir) + elif platform.system().lower() == "darwin": + process_on_mac(current_dir) + elif platform.system().lower() == "windows": + process_on_windows(current_dir) + + all_files = get_all_files(os.path.join(current_dir, PACKAGE_NAME, "libs")) + package_data = list() + + if platform.system().lower() == "windows": + + def check_windows_legal_file(f): + # Note(zhoushunjie): Special case for some library + # File 'plugins.xml' is special case of openvino. + for special_file in ['plugins.xml']: + if special_file in f: + return True + return False + + for f in all_files: + if f.endswith(".pyd") or f.endswith("lib") or f.endswith( + "dll") or check_windows_legal_file(f): + package_data.append( + os.path.relpath(f, os.path.join(current_dir, + PACKAGE_NAME))) + + return package_data + + filters = [".vcxproj", ".png", ".java", ".h", ".cc", ".cpp", ".hpp"] + for f in all_files: + remain = True + for flt in filters: + if f.count(flt) > 0: + remain = False + filename = os.path.split(f)[-1] +# Note(zhoushunjie): To add the trt libs below will increase the size of whl package by 450M. + if filename in [ + "libnvinfer_plugin.so", + "libnvinfer.so", "libnvonnxparser.so", + "libnvparsers.so", "libnvcaffe_parser.so" + ]: + continue + + for lib_prefix in ["libnvinfer_plugin.so.8.", + "libnvinfer.so.8.", "libnvonnxparser.so.8.", + "libnvparsers.so.8.", "libnvcaffe_parser.so.8."]: + if filename.startswith(lib_prefix): + remain = False + break + + if remain: + package_data.append( + os.path.relpath(f, os.path.join(current_dir, PACKAGE_NAME))) + return package_data diff --git a/libs/ultrainfer/python/setup.py b/libs/ultrainfer/python/setup.py new file mode 100755 index 0000000000..b456b95c92 --- /dev/null +++ b/libs/ultrainfer/python/setup.py @@ -0,0 +1,485 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file refered to github.com/onnx/onnx.git + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import shutil +import os + +TOP_DIR = os.path.realpath(os.path.dirname(__file__)) +TOP_DIR = os.path.split(TOP_DIR)[0] +PACKAGE_NAME = os.getenv("PACKAGE_NAME", "ultrainfer") +wheel_name = os.getenv("WHEEL_NAME", "ultrainfer-python") + +if not os.path.exists(PACKAGE_NAME): + shutil.copytree("ultrainfer", PACKAGE_NAME) + +from distutils.spawn import find_executable +from distutils import sysconfig, log +import setuptools +import setuptools.command.build_py +import setuptools.command.develop +import setuptools.command.build_ext + +from collections import namedtuple +from contextlib import contextmanager +import glob +import shlex +import subprocess +import sys +import platform +from textwrap import dedent +import multiprocessing + +with open(os.path.join(TOP_DIR, "python", "requirements.txt")) as fin: + REQUIRED_PACKAGES = fin.read() + +if os.getenv("BUILD_ON_CPU", "OFF") == "ON": + os.environ["ENABLE_PADDLE_BACKEND"] = "ON" + os.environ["ENABLE_ORT_BACKEND"] = "ON" + os.environ["ENABLE_OPENVINO_BACKEND"] = "ON" + os.environ["ENABLE_VISION"] = "ON" + os.environ["ENABLE_TEXT"] = "ON" + os.environ["WITH_GPU"] = "OFF" + +setup_configs = dict() +setup_configs["LIBRARY_NAME"] = PACKAGE_NAME +setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main" +# Backend options +setup_configs["ENABLE_TVM_BACKEND"] = os.getenv("ENABLE_TVM_BACKEND", "OFF") +setup_configs["ENABLE_RKNPU2_BACKEND"] = os.getenv("ENABLE_RKNPU2_BACKEND", "OFF") +setup_configs["ENABLE_SOPHGO_BACKEND"] = os.getenv("ENABLE_SOPHGO_BACKEND", "OFF") +setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "OFF") +setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND", "OFF") +setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND", "OFF") +setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND", "OFF") +setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF") +setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF") +setup_configs["ENABLE_PADDLE2ONNX"] = os.getenv("ENABLE_PADDLE2ONNX", "OFF") +setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF") +setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF") +setup_configs["ENABLE_CVCUDA"] = os.getenv("ENABLE_CVCUDA", "OFF") +setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF") +setup_configs["ENABLE_BENCHMARK"] = os.getenv("ENABLE_BENCHMARK", "OFF") +# Hardware options +setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF") +setup_configs["WITH_IPU"] = os.getenv("WITH_IPU", "OFF") +setup_configs["WITH_OPENCL"] = os.getenv("WITH_OPENCL", "OFF") +setup_configs["WITH_TIMVX"] = os.getenv("WITH_TIMVX", "OFF") +setup_configs["WITH_DIRECTML"] = os.getenv("WITH_DIRECTML", "OFF") +setup_configs["WITH_ASCEND"] = os.getenv("WITH_ASCEND", "OFF") +setup_configs["WITH_KUNLUNXIN"] = os.getenv("WITH_KUNLUNXIN", "OFF") +setup_configs["RKNN2_TARGET_SOC"] = os.getenv("RKNN2_TARGET_SOC", "") +# Custom deps settings +setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED") +setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", "/usr/local/cuda") +setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "") +setup_configs["ORT_DIRECTORY"] = os.getenv("ORT_DIRECTORY", "") +setup_configs["OPENVINO_DIRECTORY"] = os.getenv("OPENVINO_DIRECTORY", "") +setup_configs["PADDLEINFERENCE_DIRECTORY"] = os.getenv("PADDLEINFERENCE_DIRECTORY", "") +setup_configs["PADDLEINFERENCE_VERSION"] = os.getenv("PADDLEINFERENCE_VERSION", "") +setup_configs["PADDLEINFERENCE_URL"] = os.getenv("PADDLEINFERENCE_URL", "") +setup_configs["PADDLEINFERENCE_API_COMPAT_2_4_x"] = os.getenv( + "PADDLEINFERENCE_API_COMPAT_2_4_x", "OFF" +) +setup_configs["PADDLEINFERENCE_API_COMPAT_2_5_x"] = os.getenv( + "PADDLEINFERENCE_API_COMPAT_2_5_x", "OFF" +) +setup_configs["PADDLEINFERENCE_API_COMPAT_2_6_x"] = os.getenv( + "PADDLEINFERENCE_API_COMPAT_2_6_x", "OFF" +) +setup_configs["PADDLEINFERENCE_API_COMPAT_DEV"] = os.getenv( + "PADDLEINFERENCE_API_COMPAT_DEV", "OFF" +) +setup_configs["PADDLEINFERENCE_API_CUSTOM_OP"] = os.getenv( + "PADDLEINFERENCE_API_CUSTOM_OP", "OFF" +) +setup_configs["PADDLE2ONNX_URL"] = os.getenv("PADDLE2ONNX_URL", "") +setup_configs["PADDLELITE_URL"] = os.getenv("PADDLELITE_URL", "") + +# Other settings +setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF") +setup_configs["BUILD_PADDLE2ONNX"] = os.getenv("BUILD_PADDLE2ONNX", "OFF") + +if setup_configs["RKNN2_TARGET_SOC"] != "" or setup_configs["BUILD_ON_JETSON"] != "OFF": + REQUIRED_PACKAGES = REQUIRED_PACKAGES.replace("opencv-python", "") + +if wheel_name == "ultrainfer-python": + if setup_configs["WITH_GPU"] == "ON" or setup_configs["BUILD_ON_JETSON"] == "ON": + wheel_name = "ultrainfer-gpu-python" + elif setup_configs["WITH_IPU"] == "ON": + wheel_name = "ultrainfer-ipu-python" + +if os.getenv("CMAKE_CXX_COMPILER", None) is not None: + setup_configs["CMAKE_CXX_COMPILER"] = os.getenv("CMAKE_CXX_COMPILER") + +SRC_DIR = os.path.join(TOP_DIR, PACKAGE_NAME) +PYTHON_SRC_DIR = os.path.join(TOP_DIR, "python", PACKAGE_NAME) +CMAKE_BUILD_DIR = os.path.join(TOP_DIR, "python", ".setuptools-cmake-build") + +WINDOWS = os.name == "nt" + +CMAKE = find_executable("cmake3") or find_executable("cmake") +MAKE = find_executable("make") + +setup_requires = [] +extras_require = {} + +################################################################################ +# Global variables for controlling the build variant +################################################################################ + +# Default value is set to TRUE\1 to keep the settings same as the current ones. +# However going forward the recomemded way to is to set this to False\0 +USE_MSVC_STATIC_RUNTIME = bool(os.getenv("USE_MSVC_STATIC_RUNTIME", "1") == "1") +ONNX_NAMESPACE = os.getenv("ONNX_NAMESPACE", "paddle2onnx") +################################################################################ +# Version +################################################################################ + +try: + git_version = ( + subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=TOP_DIR) + .decode("ascii") + .strip() + ) +except (OSError, subprocess.CalledProcessError): + git_version = None + +extra_version_info = "" +if setup_configs["PADDLEINFERENCE_VERSION"] != "": + extra_version_info += "." + setup_configs["PADDLEINFERENCE_VERSION"] + +with open(os.path.join(TOP_DIR, "VERSION_NUMBER")) as version_file: + VersionInfo = namedtuple( + "VersionInfo", + [ + "version", + "git_version", + "extra_version_info", + "enable_trt_backend", + "enable_paddle_backend", + "with_gpu", + ], + )( + version=version_file.read().strip(), + git_version=git_version, + extra_version_info=extra_version_info.strip("."), + enable_trt_backend=setup_configs["ENABLE_TRT_BACKEND"], + enable_paddle_backend=setup_configs["ENABLE_PADDLE_BACKEND"], + with_gpu=setup_configs["WITH_GPU"], + ) + +################################################################################ +# Pre Check +################################################################################ + +assert CMAKE, 'Could not find "cmake" executable!' + +################################################################################ +# Utilities +################################################################################ + + +@contextmanager +def cd(path): + if not os.path.isabs(path): + raise RuntimeError("Can only cd to absolute path, got: {}".format(path)) + orig_path = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(orig_path) + + +################################################################################ +# Customized commands +################################################################################ + + +class NoOptionCommand(setuptools.Command): + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + +def get_all_files(dirname): + files = list() + for root, dirs, filenames in os.walk(dirname): + for f in filenames: + fullname = os.path.join(root, f) + files.append(fullname) + return files + + +class create_version(NoOptionCommand): + def run(self): + with open(os.path.join(PYTHON_SRC_DIR, "code_version.py"), "w") as f: + f.write( + dedent( + """\ + # This file is generated by setup.py. DO NOT EDIT! + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + version = '{version}' + git_version = '{git_version}' + extra_version_info = '{extra_version_info}' + enable_trt_backend = '{enable_trt_backend}' + enable_paddle_backend = '{enable_paddle_backend}' + with_gpu = '{with_gpu}' + """.format( + **dict(VersionInfo._asdict()) + ) + ) + ) + + +class cmake_build(setuptools.Command): + """ + Compiles everything when `python setupmnm.py build` is run using cmake. + Custom args can be passed to cmake by specifying the `CMAKE_ARGS` + environment variable. + The number of CPUs used by `make` can be specified by passing `-j` + to `setup.py build`. By default all CPUs are used. + """ + + user_options = [ + (str("jobs="), str("j"), str("Specifies the number of jobs to use with make")) + ] + + built = False + + def initialize_options(self): + self.jobs = None + + def finalize_options(self): + if sys.version_info[0] >= 3: + self.set_undefined_options("build", ("parallel", "jobs")) + if self.jobs is None and os.getenv("MAX_JOBS") is not None: + self.jobs = os.getenv("MAX_JOBS") + self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs) + + def run(self): + if cmake_build.built: + return + cmake_build.built = True + if not os.path.exists(CMAKE_BUILD_DIR): + os.makedirs(CMAKE_BUILD_DIR) + + with cd(CMAKE_BUILD_DIR): + build_type = "Release" + # configure + cmake_args = [ + CMAKE, + "-DPYTHON_INCLUDE_DIR={}".format(sysconfig.get_python_inc()), + "-DPYTHON_EXECUTABLE={}".format(sys.executable), + "-DBUILD_ULTRAINFER_PYTHON=ON", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", + "-DONNX_NAMESPACE={}".format(ONNX_NAMESPACE), + "-DPY_EXT_SUFFIX={}".format( + sysconfig.get_config_var("EXT_SUFFIX") or "" + ), + ] + cmake_args.append("-DCMAKE_BUILD_TYPE=%s" % build_type) + for k, v in setup_configs.items(): + cmake_args.append("-D{}={}".format(k, v)) + if WINDOWS: + cmake_args.extend( + [ + # we need to link with libpython on windows, so + # passing python version to window in order to + # find python in cmake + "-DPY_VERSION={}".format( + "{0}.{1}".format(*sys.version_info[:2]) + ), + ] + ) + if platform.architecture()[0] == "64bit": + cmake_args.extend(["-A", "x64", "-T", "host=x64"]) + else: + cmake_args.extend(["-A", "Win32", "-T", "host=x86"]) + if "CMAKE_ARGS" in os.environ: + extra_cmake_args = shlex.split(os.environ["CMAKE_ARGS"]) + # prevent crossfire with downstream scripts + del os.environ["CMAKE_ARGS"] + log.info("Extra cmake args: {}".format(extra_cmake_args)) + cmake_args.extend(extra_cmake_args) + cmake_args.append(TOP_DIR) + subprocess.check_call(cmake_args) + + build_args = [CMAKE, "--build", os.curdir] + if WINDOWS: + build_args.extend(["--config", build_type]) + build_args.extend(["--", "/maxcpucount:{}".format(self.jobs)]) + else: + build_args.extend(["--", "-j", str(self.jobs)]) + subprocess.check_call(build_args) + + +class build_py(setuptools.command.build_py.build_py): + def run(self): + self.run_command("create_version") + self.run_command("cmake_build") + + generated_python_files = glob.glob( + os.path.join(CMAKE_BUILD_DIR, PACKAGE_NAME, "*.py") + ) + glob.glob(os.path.join(CMAKE_BUILD_DIR, PACKAGE_NAME, "*.pyi")) + + for src in generated_python_files: + dst = os.path.join(TOP_DIR, os.path.relpath(src, CMAKE_BUILD_DIR)) + self.copy_file(src, dst) + + return setuptools.command.build_py.build_py.run(self) + + +class develop(setuptools.command.develop.develop): + def run(self): + self.run_command("build_py") + setuptools.command.develop.develop.run(self) + + +class build_ext(setuptools.command.build_ext.build_ext): + def run(self): + self.run_command("cmake_build") + setuptools.command.build_ext.build_ext.run(self) + + def build_extensions(self): + for ext in self.extensions: + fullname = self.get_ext_fullname(ext.name) + filename = os.path.basename(self.get_ext_filename(fullname)) + + lib_path = CMAKE_BUILD_DIR + if os.name == "nt": + debug_lib_dir = os.path.join(lib_path, "Debug") + release_lib_dir = os.path.join(lib_path, "Release") + if os.path.exists(debug_lib_dir): + lib_path = debug_lib_dir + elif os.path.exists(release_lib_dir): + lib_path = release_lib_dir + src = os.path.join(lib_path, filename) + dst = os.path.join(os.path.realpath(self.build_lib), PACKAGE_NAME, filename) + self.copy_file(src, dst) + + +cmdclass = { + "create_version": create_version, + "cmake_build": cmake_build, + "build_py": build_py, + "develop": develop, + "build_ext": build_ext, +} + +################################################################################ +# Extensions +################################################################################ + +ext_modules = [ + setuptools.Extension( + name=str(PACKAGE_NAME + "." + setup_configs["PY_LIBRARY_NAME"]), sources=[] + ), +] + +################################################################################ +# Packages +################################################################################ + +# no need to do fancy stuff so far +if PACKAGE_NAME != "ultrainfer": + packages = setuptools.find_packages(exclude=["ultrainfer*", "scripts"]) +else: + packages = setuptools.find_packages(exclude=["xencrypt*", "scripts"]) + +################################################################################ +# Test +################################################################################ + +if sys.version_info[0] == 3: + # Mypy doesn't work with Python 2 + extras_require["mypy"] = ["mypy==0.600"] + +################################################################################ +# Final +################################################################################ + +package_data = {PACKAGE_NAME: ["LICENSE", "ThirdPartyNotices.txt"]} + +if sys.argv[1] == "install" or sys.argv[1] == "bdist_wheel": + shutil.copy( + os.path.join(TOP_DIR, "ThirdPartyNotices.txt"), + os.path.join(TOP_DIR, "python", PACKAGE_NAME), + ) + shutil.copy( + os.path.join(TOP_DIR, "LICENSE"), os.path.join(TOP_DIR, "python", PACKAGE_NAME) + ) + if not os.path.exists( + os.path.join(TOP_DIR, "python", PACKAGE_NAME, "libs", "third_libs") + ): + print( + f"Didn't detect path: {PACKAGE_NAME}/libs/third_libs exist, please execute `python setup.py build` first" + ) + sys.exit(0) + from scripts.process_libraries import process_libraries + + all_lib_data = process_libraries(os.path.split(os.path.abspath(__file__))[0]) + package_data[PACKAGE_NAME].extend(all_lib_data) + setuptools.setup( + name=wheel_name, + version=VersionInfo.version + extra_version_info, + ext_modules=ext_modules, + description="Deploy Kit Tool For Deeplearning models.", + packages=packages, + package_data=package_data, + include_package_data=True, + setup_requires=setup_requires, + extras_require=extras_require, + author="ultrainfer", + install_requires=REQUIRED_PACKAGES, + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + ], + license="Apache 2.0", + ) +else: + setuptools.setup( + name=wheel_name, + version=VersionInfo.version + extra_version_info, + description="Deploy Kit Tool For Deeplearning models.", + ext_modules=ext_modules, + cmdclass=cmdclass, + packages=packages, + package_data=package_data, + include_package_data=False, + setup_requires=setup_requires, + extras_require=extras_require, + author="ultrainfer", + install_requires=REQUIRED_PACKAGES, + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + ], + license="Apache 2.0", + ) diff --git a/libs/ultrainfer/python/ultrainfer/__init__.py b/libs/ultrainfer/python/ultrainfer/__init__.py new file mode 100755 index 0000000000..12bc5a7236 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/__init__.py @@ -0,0 +1,186 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +import os +import sys +import platform + +# Create a symbol link to tensorrt library. +trt_directory = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "libs/third_libs/tensorrt/lib/" +) +if os.name != "nt" and os.path.exists(trt_directory): + logging.basicConfig(level=logging.INFO) + for trt_lib in [ + "libnvcaffe_parser.so", + "libnvinfer_plugin.so", + "libnvinfer.so", + "libnvonnxparser.so", + "libnvparsers.so", + ]: + dst = os.path.join(trt_directory, trt_lib) + src = os.path.join(trt_directory, trt_lib + ".8") + if not os.path.exists(dst): + try: + os.symlink(src, dst) + logging.info(f"Create a symbolic link pointing to {src} named {dst}.") + except OSError as e: + logging.warning( + f"Failed to create a symbolic link pointing to {src} by an unprivileged user. " + "It may failed when you use Paddle TensorRT backend. " + "Please use administator privilege to import ultrainfer at first time." + ) + break + + # HACK: Reset the root logger config that got messed up by FD. + root_logger = logging.getLogger() + root_logger.level = logging.WARNING + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + +from .code_version import version, git_version, extra_version_info +from .code_version import enable_trt_backend, enable_paddle_backend, with_gpu + +# Note(zhoushunjie): Fix the import order of paddle and ultrainfer library. +# This solution will be removed it when the confilct of paddle and +# ultrainfer is fixed. + +# Note(qiuyanjun): Add backward compatible for paddle 2.4.x +sys_platform = platform.platform().lower() + + +def get_paddle_version(): + paddle_version = "" + try: + import pkg_resources + + paddle_version = pkg_resources.require("paddlepaddle-gpu")[0].version.split( + ".post" + )[0] + except: + try: + paddle_version = pkg_resources.require("paddlepaddle")[0].version.split( + ".post" + )[0] + except: + pass + return paddle_version + + +def should_import_paddle(): + if ("paddle2.4" in extra_version_info) or ("post24" in extra_version_info): + paddle_version = get_paddle_version() + if ( + paddle_version != "" + and paddle_version <= "2.4.2" + and paddle_version != "0.0.0" + ): + return True + return False + + +def should_set_tensorrt(): + if ( + with_gpu == "ON" + and enable_paddle_backend == "ON" + and enable_trt_backend == "ON" + ): + return True + return False + + +def tensorrt_is_avaliable(): + # Note(qiuyanjun): Only support linux now. + found_trt_lib = False + if ("linux" in sys_platform) and ("LD_LIBRARY_PATH" in os.environ.keys()): + for lib_path in os.environ["LD_LIBRARY_PATH"].split(":"): + if os.path.exists(os.path.join(lib_path, "libnvinfer.so")): + found_trt_lib = True + break + return found_trt_lib + + +try: + # windows: no conflict between ultrainfer and paddle. + # linux: must import paddle first to solve the conflict. + # macos: still can not solve the conflict between ultrainfer and paddle, + # due to the global flags redefined in paddle/paddle_inference so. + # we got the error (ERROR: flag 'xxx' was defined more than once). + if "linux" in sys_platform: + if should_import_paddle(): + import paddle # need import paddle first for paddle2.4.x + + # check whether tensorrt in LD_LIBRARY_PATH for ultrainfer + if should_set_tensorrt() and (not tensorrt_is_avaliable()): + if os.path.exists(trt_directory): + logging.info( + "\n[WARNING] Can not find TensorRT lib in LD_LIBRARY_PATH for UltraInfer! \ + \n[WARNING] Please export [ YOUR CUSTOM TensorRT ] lib path to LD_LIBRARY_PATH first, or run the command: \ + \n[WARNING] Linux: 'export LD_LIBRARY_PATH=$(python -c 'from ultrainfer import trt_directory; print(trt_directory)'):$LD_LIBRARY_PATH'" + ) + else: + logging.info( + "\n[WARNING] Can not find TensorRT lib in LD_LIBRARY_PATH for UltraInfer! \ + \n[WARNING] Please export [YOUR CUSTOM TensorRT] lib path to LD_LIBRARY_PATH first." + ) +except: + pass + + +os.environ["FLAGS_enable_pir_api"] = "0" +logging.warning( + "Please note that we have set the environment variable \ +'FLAGS_enable_pir_api' to 'False' to ensure the correct operation of the Paddle backend." +) + + +from .c_lib_wrap import ( + ModelFormat, + Backend, + FDDataType, + TensorInfo, + Device, + is_built_with_gpu, + is_built_with_ort, + ModelFormat, + is_built_with_paddle, + is_built_with_trt, + get_default_cuda_directory, +) + + +def set_logger(enable_info=True, enable_warning=True): + """Set behaviour of logger while using UltraInfer + + :param enable_info: (boolean)Whether to print out log level of INFO + :param enable_warning: (boolean)Whether to print out log level of WARNING, recommend to set to True + """ + from .c_lib_wrap import set_logger + + set_logger(enable_info, enable_warning) + + +from .runtime import Runtime, RuntimeOption +from .model import UltraInferModel +from . import c_lib_wrap as C +from . import vision +from . import pipeline +from . import text +from . import ts +from .download import download, download_and_decompress, download_model, get_model_list + + +__version__ = version diff --git a/libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in b/libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in new file mode 100755 index 0000000000..248f2b66af --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in @@ -0,0 +1,190 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +import logging +import os +import sys + +user_specified_dirs = ['@OPENCV_DIRECTORY@', '@ORT_DIRECTORY@', ] + + +def is_built_with_gpu() -> bool: + return True if "@WITH_GPU@" == "ON" else False + + +def is_built_with_ort() -> bool: + return True if "@ENABLE_ORT_BACKEND@" == "ON" else False + + +def is_built_with_trt() -> bool: + return True if "@ENABLE_TRT_BACKEND@" == "ON" else False + + +def is_built_with_paddle() -> bool: + return True if "@ENABLE_PADDLE_BACKEND@" == "ON" else False + + +def is_built_with_poros() ->bool: + return True if "@ENABLE_POROS_BACKEND@" == "ON" else False + + +def is_built_with_openvino() ->bool: + return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False + + +def get_default_cuda_directory() -> str: + if not is_built_with_gpu(): + return "" + return r"@CUDA_DIRECTORY@".strip() + + +def get_default_cuda_major_version() -> str: + if not is_built_with_gpu(): + return "" + # TODO(qiuyanjun): get cuda version from cmake. + return "11" + + +def find_cudart(search_dir: str) -> bool: + if search_dir is None: + logging.info("[UltraInfer][ERROR]: search_dir can not be NoneTpye.") + return False + # TODO(qiuyanjun): add Linux cudart *.so check + cudart_lib_name = f"cudart64_{get_default_cuda_major_version()}0.dll" + cudart_lib_path = os.path.join(search_dir, cudart_lib_name) + return os.path.exists(cudart_lib_path) + + +def find_cudart_from_sys() -> bool: + # TODO(qiuyanjun): add Linux system paths + sys_paths = os.environ["path"].strip().split(";") + for sys_path in sys_paths: + if find_cudart(sys_path): + logging.info(f"[UltraInfer][INFO]: Successfully found CUDA ToolKit from system PATH env -> {sys_path}") + return True + return False + + +def add_system_search_paths(): + # TODO(qiuyanjun): add Linux system paths + sys_paths = os.environ["path"].strip().split(";") + for sys_path in sys_paths: + if os.path.exists(sys_path) and sys.version_info[:2] >= (3, 8): + try: + os.add_dll_directory(sys_path) + except: + continue + + +def add_dll_search_dir(dir_path): + os.environ["path"] = dir_path + ";" + os.environ["path"] + sys.path.insert(0, dir_path) + if sys.version_info[:2] >= (3, 8): + os.add_dll_directory(dir_path) + + +def add_custom_cuda_path(): + if is_built_with_gpu(): + # if UltraInfer built with gpu and want to run + # in windows, we need to add CUDA_DIRECTORY into + # dll search paths to make sure UltraInfer.dll + # can link cudart correctly. we search the + # default path firstly and try to add into + # paths. User should set it manually if the + # cuda toolkit is not locate in the default + # path we assume. + base_url = "https://github.com/PaddlePaddle/FastDeploy/blob/" + default_cuda_dir = get_default_cuda_directory() + default_cuda_version = get_default_cuda_major_version() # 11 + cuda_shared_lib_dir = os.path.join(default_cuda_dir, "bin") + custom_cuda_envs = ["CUDA_DIRECTORY", "CUDA_HOME", "CUDA_ROOT", "CUDA_PATH"] + custom_cuda_dir = "NOTFOUNDED" + if not os.path.exists(cuda_shared_lib_dir): + # try to get cuda directory from user's local env + for custom_env in custom_cuda_envs: + custom_cuda_dir = os.getenv(custom_env, "NOTFOUNDED") + custom_cuda_dir = custom_cuda_dir.strip().split(";")[0] + if os.path.exists(custom_cuda_dir) and custom_cuda_dir != "NOTFOUNDED": + break + if not os.path.exists(custom_cuda_dir) or custom_cuda_dir == "NOTFOUNDED": + logging.warnings.warn(f"\n--- UltraInfer was built with gpu, \ + \n--- but the default cuda directory does not exists. \ + \n--- Please setup one of {custom_cuda_envs} manually, \ + \n--- this path should look like: {default_cuda_dir}. \ + \n--- Check FAQ: {base_url + 'develop/docs/FAQ.md'}") + return + # path to cuda dlls + cuda_shared_lib_dir = os.path.join(custom_cuda_dir, "bin") + add_dll_search_dir(cuda_shared_lib_dir) + # try pre find cudart with major version, e.g 11.x/10.x + if not find_cudart(cuda_shared_lib_dir): + custom_cuda_version = os.path.basename(custom_cuda_dir) + logging.warnings.warn( + f"\n--- UltraInfer was built with CUDA major version {default_cuda_version}, \ + \n--- but found custom CUDA version {custom_cuda_version} at {custom_cuda_dir} \ + \n--- Please setup one of {custom_cuda_envs} manually, \ + \n--- this path should look like: {default_cuda_dir}. \ + \n--- Check FAQ: {base_url + 'develop/docs/FAQ.md'}") + return + logging.info(f"[UltraInfer][INFO]: Successfully found CUDA ToolKit from -> {cuda_shared_lib_dir}") + + +if os.name == "nt": + # cuda/cudnn libs + if is_built_with_gpu(): + add_system_search_paths() + if not find_cudart_from_sys(): + add_custom_cuda_path() + + current_path = os.path.abspath(__file__) + dirname = os.path.dirname(current_path) + third_libs_dir = os.path.join(dirname, "libs") + all_dirs = user_specified_dirs + [third_libs_dir] + for dir in all_dirs: + if os.path.exists(dir): + add_dll_search_dir(dir) + for root, dirs, filenames in os.walk(dir): + for d in dirs: + if d == "lib" or d == "bin": + add_dll_search_dir(os.path.join(dirname, root, d)) + + +try: + from .libs.@PY_LIBRARY_NAME@ import * +except Exception as e: + raise RuntimeError(f"UltraInfer initalized failed! Error: {e}") + + +def TensorInfoStr(tensor_info): + message = "TensorInfo(name : '{}', dtype : '{}', shape : '{}')".format( + tensor_info.name, tensor_info.dtype, tensor_info.shape) + return message + + +def RuntimeOptionStr(runtime_option): + attrs = dir(runtime_option) + message = "RuntimeOption(\n" + for attr in attrs: + if attr.startswith("__"): + continue + if hasattr(getattr(runtime_option, attr), "__call__"): + continue + message += " {} : {}\t\n".format(attr, getattr(runtime_option, attr)) + message.strip("\n") + message += ")" + return message + + +TensorInfo.__repr__ = TensorInfoStr +RuntimeOption.__repr__ = RuntimeOptionStr diff --git a/libs/ultrainfer/python/ultrainfer/download.py b/libs/ultrainfer/python/ultrainfer/download.py new file mode 100755 index 0000000000..d458b8d40a --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/download.py @@ -0,0 +1,274 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import shutil +import requests +import time +import zipfile +import tarfile +import hashlib +import tqdm +import logging + +from .utils.hub_model_server import model_server +from .utils import hub_env as hubenv + +DOWNLOAD_RETRY_LIMIT = 3 + + +def md5check(fullname, md5sum=None): + if md5sum is None: + return True + + logging.info("File {} md5 checking...".format(fullname)) + md5 = hashlib.md5() + with open(fullname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + md5.update(chunk) + calc_md5sum = md5.hexdigest() + + if calc_md5sum != md5sum: + logging.info( + "File {} md5 check failed, {}(calc) != " + "{}(base)".format(fullname, calc_md5sum, md5sum) + ) + return False + return True + + +def move_and_merge_tree(src, dst): + """ + Move src directory to dst, if dst is already exists, + merge src to dst + """ + if not osp.exists(dst): + shutil.move(src, dst) + else: + if not osp.isdir(src): + shutil.move(src, dst) + return + for fp in os.listdir(src): + src_fp = osp.join(src, fp) + dst_fp = osp.join(dst, fp) + if osp.isdir(src_fp): + if osp.isdir(dst_fp): + move_and_merge_tree(src_fp, dst_fp) + else: + shutil.move(src_fp, dst_fp) + elif osp.isfile(src_fp) and not osp.isfile(dst_fp): + shutil.move(src_fp, dst_fp) + + +def download(url, path, rename=None, md5sum=None, show_progress=False): + """ + Download from url, save to path. + url (str): download url + path (str): download to given path + """ + if not osp.exists(path): + os.makedirs(path) + + fname = osp.split(url)[-1] + fullname = osp.join(path, fname) + if rename is not None: + fullname = osp.join(path, rename) + retry_cnt = 0 + while not (osp.exists(fullname) and md5check(fullname, md5sum)): + if retry_cnt < DOWNLOAD_RETRY_LIMIT: + retry_cnt += 1 + else: + logging.debug("{} download failed.".format(fname)) + raise RuntimeError( + "Download from {} failed. " "Retry limit reached".format(url) + ) + + logging.info("Downloading {} from {}".format(fname, url)) + + req = requests.get(url, stream=True) + if req.status_code != 200: + raise RuntimeError( + "Downloading from {} failed with code " + "{}!".format(url, req.status_code) + ) + + # For protecting download interupted, download to + # tmp_fullname firstly, move tmp_fullname to fullname + # after download finished + tmp_fullname = fullname + "_tmp" + total_size = req.headers.get("content-length") + with open(tmp_fullname, "wb") as f: + if total_size and show_progress: + for chunk in tqdm.tqdm( + req.iter_content(chunk_size=1024), + total=(int(total_size) + 1023) // 1024, + unit="KB", + ): + f.write(chunk) + else: + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + shutil.move(tmp_fullname, fullname) + logging.debug("{} download completed.".format(fname)) + + return fullname + + +def decompress(fname): + """ + Decompress for zip and tar file + """ + logging.info("Decompressing {}...".format(fname)) + + # For protecting decompressing interupted, + # decompress to fpath_tmp directory firstly, if decompress + # successed, move decompress files to fpath and delete + # fpath_tmp and remove download compress file. + fpath = osp.split(fname)[0] + fpath_tmp = osp.join(fpath, "tmp") + if osp.isdir(fpath_tmp): + shutil.rmtree(fpath_tmp) + os.makedirs(fpath_tmp) + + if fname.find(".tar") >= 0 or fname.find(".tgz") >= 0: + with tarfile.open(fname) as tf: + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + safe_extract(tf, path=fpath_tmp) + elif fname.find(".zip") >= 0: + with zipfile.ZipFile(fname) as zf: + zf.extractall(path=fpath_tmp) + else: + raise TypeError("Unsupport compress file type {}".format(fname)) + + for f in os.listdir(fpath_tmp): + src_dir = osp.join(fpath_tmp, f) + dst_dir = osp.join(fpath, f) + move_and_merge_tree(src_dir, dst_dir) + + shutil.rmtree(fpath_tmp) + logging.debug("{} decompressed.".format(fname)) + return dst_dir + + +def url2dir(url, path, rename=None): + full_name = download(url, path, rename, show_progress=True) + print("File is donwloaded, now extracting...") + if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0: + return decompress(full_name) + + +def download_and_decompress(url, path=".", rename=None): + fname = osp.split(url)[-1] + fullname = osp.join(path, fname) + # if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')): + # fullname = osp.join(path, fname.split('.')[0]) + nranks = 0 + if nranks <= 1: + dst_dir = url2dir(url, path, rename) + if dst_dir is not None: + fullname = dst_dir + else: + lock_path = fullname + ".lock" + if not os.path.exists(fullname): + with open(lock_path, "w"): + os.utime(lock_path, None) + if nranks == 0: + dst_dir = url2dir(url, path, rename) + if dst_dir is not None: + fullname = dst_dir + os.remove(lock_path) + else: + while os.path.exists(lock_path): + time.sleep(1) + return + + +def get_model_list(category: str = None): + """ + Get all pre-trained models information supported by fd.download_model. + Args: + category(str): model category, if None, list all models in all categories. + Returns: + results(dict): a dictionary, key is category, value is a list which contains models information. + """ + result = model_server.get_model_list() + if result["status"] != 0: + raise ValueError( + "Failed to get pretrained models information from hub model server." + ) + result = result["data"] + if category is None: + return result + elif category in result: + return {category: result[category]} + else: + raise ValueError( + "No pretrained model in category {} can be downloaded now.".format(category) + ) + + +def download_model( + name: str, path: str = None, format: str = None, version: str = None +): + """ + Download pre-trained model for UltraInfer inference engine. + Args: + name: model name + path(str): local path for saving model. If not set, default is hubenv.MODEL_HOME + format(str): UltraInfer model format + version(str) : UltraInfer model version + """ + result = model_server.search_model(name, format, version) + if path is None: + path = hubenv.MODEL_HOME + if result: + url = result[0]["url"] + format = result[0]["format"] + version = result[0]["version"] + fullpath = download(url, path, show_progress=True) + model_server.stat_model(name, format, version) + if format == "paddle": + if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0: + archive_path = fullpath + fullpath = decompress(fullpath) + try: + os.rename(fullpath, os.path.join(os.path.dirname(fullpath), name)) + fullpath = os.path.join(os.path.dirname(fullpath), name) + os.remove(archive_path) + except FileExistsError: + pass + print("Successfully download model at path: {}".format(fullpath)) + return fullpath + else: + print("ERROR: Could not find a model named {}".format(name)) diff --git a/libs/ultrainfer/python/ultrainfer/model.py b/libs/ultrainfer/python/ultrainfer/model.py new file mode 100755 index 0000000000..3166abd6e9 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/model.py @@ -0,0 +1,88 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +import abc +import logging +from . import c_lib_wrap as C + + +class BaseUltraInferModel(metaclass=abc.ABCMeta): + @abc.abstractmethod + def model_name(self): + raise NotImplementedError + + @abc.abstractmethod + def num_inputs_of_runtime(self): + raise NotImplementedError + + @abc.abstractmethod + def num_outputs_of_runtime(self): + raise NotImplementedError + + +class UltraInferModel(BaseUltraInferModel): + def __init__(self, option): + self._model = None + if option is None: + self._runtime_option = C.RuntimeOption() + else: + self._runtime_option = option._option + + def model_name(self): + return self._model.model_name() + + def num_inputs_of_runtime(self): + return self._model.num_inputs_of_runtime() + + def num_outputs_of_runtime(self): + return self._model.num_outputs_of_runtime() + + def input_info_of_runtime(self, index): + assert ( + index < self.num_inputs_of_runtime() + ), "The index:{} must be less than number of inputs:{}.".format( + index, self.num_inputs_of_runtime() + ) + return self._model.input_info_of_runtime(index) + + def output_info_of_runtime(self, index): + assert ( + index < self.num_outputs_of_runtime() + ), "The index:{} must be less than number of outputs:{}.".format( + index, self.num_outputs_of_runtime() + ) + return self._model.output_info_of_runtime(index) + + def enable_record_time_of_runtime(self): + self._model.enable_record_time_of_runtime() + + def disable_record_time_of_runtime(self): + self._model.disable_record_time_of_runtime() + + def print_statis_info_of_runtime(self): + return self._model.print_statis_info_of_runtime() + + def get_profile_time(self): + """Get profile time of Runtime after the profile process is done.""" + return self._model.get_profile_time() + + @property + def runtime_option(self): + return self._model.runtime_option if self._model is not None else None + + @property + def initialized(self): + if self._model is None: + return False + return self._model.initialized() diff --git a/libs/ultrainfer/python/ultrainfer/pipeline/__init__.py b/libs/ultrainfer/python/ultrainfer/pipeline/__init__.py new file mode 100755 index 0000000000..d5ff43ef02 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/pipeline/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .pptinypose import PPTinyPose diff --git a/libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py b/libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py new file mode 100755 index 0000000000..d0f0a27ab3 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py @@ -0,0 +1,58 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from ... import c_lib_wrap as C + + +class PPTinyPose(object): + def __init__(self, det_model=None, pptinypose_model=None): + """Set initialized detection model object and pptinypose model object + + :param det_model: (ultrainfer.vision.detection.PicoDet)Initialized detection model object + :param pptinypose_model: (ultrainfer.vision.keypointdetection.PPTinyPose)Initialized pptinypose model object + """ + assert ( + det_model is not None or pptinypose_model is not None + ), "The det_model and pptinypose_model cannot be None." + self._pipeline = C.pipeline.PPTinyPose( + det_model._model, pptinypose_model._model + ) + + def predict(self, input_image): + """Predict the keypoint detection result for an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: KeyPointDetectionResult + """ + return self._pipeline.predict(input_image) + + @property + def detection_model_score_threshold(self): + """Atrribute of PPTinyPose pipeline model. Stating the score threshold for detectin model to filter bbox before inputting pptinypose model + + :return: value of detection_model_score_threshold(float) + """ + return self._pipeline.detection_model_score_threshold + + @detection_model_score_threshold.setter + def detection_model_score_threshold(self, value): + """Set attribute detection_model_score_threshold of PPTinyPose pipeline model. + + :param value: (float)The value to set use_dark + """ + assert isinstance( + value, float + ), "The value to set `detection_model_score_threshold` must be type of float." + self._pipeline.detection_model_score_threshold = value diff --git a/libs/ultrainfer/python/ultrainfer/py_only/__init__.py b/libs/ultrainfer/python/ultrainfer/py_only/__init__.py new file mode 100755 index 0000000000..4437de5040 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import ts, vision +from .base import PyOnlyUltraInferModel, PyOnlyProcessor, PyOnlyProcessorChain diff --git a/libs/ultrainfer/python/ultrainfer/py_only/base.py b/libs/ultrainfer/python/ultrainfer/py_only/base.py new file mode 100755 index 0000000000..4edd522357 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/base.py @@ -0,0 +1,59 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import logging + +from ..model import BaseUltraInferModel +from ..runtime import Runtime, RuntimeOption + +_logger = logging.getLogger(__name__) + + +class PyOnlyUltraInferModel(BaseUltraInferModel): + def __init__(self, option): + super().__init__() + if option is None: + self._option = RuntimeOption() + else: + self._option = option + self._update_option() + self._runtime = Runtime(self._option) + _logger.debug("Python-only model initialized") + + def num_inputs_of_runtime(self): + return self._runtime.num_inputs() + + def num_outputs_of_runtime(self): + return self._runtime.num_outputs() + + def _update_option(self): + pass + + +class PyOnlyProcessor(metaclass=abc.ABCMeta): + @abc.abstractmethod + def __call__(self, data): + raise NotImplementedError + + +class PyOnlyProcessorChain(object): + def __init__(self, processors): + super().__init__() + self._processors = processors + + def __call__(self, data): + for processor in self._processors: + data = processor(data) + return data diff --git a/libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py b/libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py new file mode 100755 index 0000000000..6de43119b9 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import processors +from .model import PyOnlyTSModel diff --git a/libs/ultrainfer/python/ultrainfer/py_only/ts/model.py b/libs/ultrainfer/python/ultrainfer/py_only/ts/model.py new file mode 100755 index 0000000000..8a996fb297 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/ts/model.py @@ -0,0 +1,25 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from ..base import PyOnlyUltraInferModel + + +class PyOnlyTSModel(PyOnlyUltraInferModel): + @abc.abstractmethod + def batch_predict(self, ts_list): + raise NotImplementedError + + def predict(self, ts): + return self.batch_predict([ts])[0] diff --git a/libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py b/libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py new file mode 100755 index 0000000000..cdc72c94d9 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py @@ -0,0 +1,582 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional, Union, Dict + +import chinese_calendar +import joblib +import numpy as np +import pandas as pd +from pandas.tseries.offsets import DateOffset, Easter, Day +from pandas.tseries import holiday as hd +from sklearn.preprocessing import StandardScaler + +from ..base import PyOnlyProcessor + +__all__ = [ + "CutOff", + "Normalize", + "Denormalize", + "BuildTSDataset", + "CalcTimeFeatures", + "BuildPaddedMask", + "DataFrame2Arrays", +] + +_MAX_WINDOW = 183 + 17 +_EASTER_SUNDAY = hd.Holiday("Easter Sunday", month=1, day=1, offset=[Easter(), Day(0)]) +_NEW_YEARS_DAY = hd.Holiday("New Years Day", month=1, day=1) +_SUPER_BOWL = hd.Holiday( + "Superbowl", month=2, day=1, offset=DateOffset(weekday=hd.SU(1)) +) +_MOTHERS_DAY = hd.Holiday( + "Mothers Day", month=5, day=1, offset=DateOffset(weekday=hd.SU(2)) +) +_INDEPENDENCE_DAY = hd.Holiday("Independence Day", month=7, day=4) +_CHRISTMAS_EVE = hd.Holiday("Christmas", month=12, day=24) +_CHRISTMAS_DAY = hd.Holiday("Christmas", month=12, day=25) +_NEW_YEARS_EVE = hd.Holiday("New Years Eve", month=12, day=31) +_BLACK_FRIDAY = hd.Holiday( + "Black Friday", + month=11, + day=1, + offset=[pd.DateOffset(weekday=hd.TH(4)), Day(1)], +) +_CYBER_MONDAY = hd.Holiday( + "Cyber Monday", + month=11, + day=1, + offset=[pd.DateOffset(weekday=hd.TH(4)), Day(4)], +) + +_HOLYDAYS = [ + hd.EasterMonday, + hd.GoodFriday, + hd.USColumbusDay, + hd.USLaborDay, + hd.USMartinLutherKingJr, + hd.USMemorialDay, + hd.USPresidentsDay, + hd.USThanksgivingDay, + _EASTER_SUNDAY, + _NEW_YEARS_DAY, + _SUPER_BOWL, + _MOTHERS_DAY, + _INDEPENDENCE_DAY, + _CHRISTMAS_EVE, + _CHRISTMAS_DAY, + _NEW_YEARS_EVE, + _BLACK_FRIDAY, + _CYBER_MONDAY, +] + + +def _cal_year( + x: np.datetime64, +): + return x.year + + +def _cal_month( + x: np.datetime64, +): + return x.month + + +def _cal_day( + x: np.datetime64, +): + return x.day + + +def _cal_hour( + x: np.datetime64, +): + return x.hour + + +def _cal_weekday( + x: np.datetime64, +): + return x.dayofweek + + +def _cal_quarter( + x: np.datetime64, +): + return x.quarter + + +def _cal_hourofday( + x: np.datetime64, +): + return x.hour / 23.0 - 0.5 + + +def _cal_dayofweek( + x: np.datetime64, +): + return x.dayofweek / 6.0 - 0.5 + + +def _cal_dayofmonth( + x: np.datetime64, +): + return x.day / 30.0 - 0.5 + + +def _cal_dayofyear( + x: np.datetime64, +): + return x.dayofyear / 364.0 - 0.5 + + +def _cal_weekofyear( + x: np.datetime64, +): + return x.weekofyear / 51.0 - 0.5 + + +def _cal_holiday( + x: np.datetime64, +): + return float(chinese_calendar.is_holiday(x)) + + +def _cal_workday( + x: np.datetime64, +): + return float(chinese_calendar.is_workday(x)) + + +def _cal_minuteofhour( + x: np.datetime64, +): + return x.minute / 59 - 0.5 + + +def _cal_monthofyear( + x: np.datetime64, +): + return x.month / 11.0 - 0.5 + + +_CAL_DATE_METHOD = { + "year": _cal_year, + "month": _cal_month, + "day": _cal_day, + "hour": _cal_hour, + "weekday": _cal_weekday, + "quarter": _cal_quarter, + "minuteofhour": _cal_minuteofhour, + "monthofyear": _cal_monthofyear, + "hourofday": _cal_hourofday, + "dayofweek": _cal_dayofweek, + "dayofmonth": _cal_dayofmonth, + "dayofyear": _cal_dayofyear, + "weekofyear": _cal_weekofyear, + "is_holiday": _cal_holiday, + "is_workday": _cal_workday, +} + + +def _load_from_one_dataframe( + data: Union[pd.DataFrame, pd.Series], + time_col: Optional[str] = None, + value_cols: Optional[Union[List[str], str]] = None, + freq: Optional[Union[str, int]] = None, + drop_tail_nan: bool = False, + dtype: Optional[Union[type, Dict[str, type]]] = None, +): + series_data = None + if value_cols is None: + if isinstance(data, pd.Series): + series_data = data.copy() + else: + series_data = data.loc[:, data.columns != time_col].copy() + else: + series_data = data.loc[:, value_cols].copy() + + if time_col: + if time_col not in data.columns: + raise ValueError( + "The time column: {} doesn't exist in the `data`!".format(time_col) + ) + time_col_vals = data.loc[:, time_col] + else: + time_col_vals = data.index + + if np.issubdtype(time_col_vals.dtype, np.integer) and isinstance(freq, str): + time_col_vals = time_col_vals.astype(str) + + if np.issubdtype(time_col_vals.dtype, np.integer): + if freq: + if not isinstance(freq, int) or freq < 1: + raise ValueError( + "The type of `freq` should be `int` when the type of `time_col` is `RangeIndex`." + ) + else: + freq = 1 + start_idx, stop_idx = min(time_col_vals), max(time_col_vals) + freq + if (stop_idx - start_idx) / freq != len(data): + raise ValueError("The number of rows doesn't match with the RangeIndex!") + time_index = pd.RangeIndex(start=start_idx, stop=stop_idx, step=freq) + elif np.issubdtype(time_col_vals.dtype, np.object_) or np.issubdtype( + time_col_vals.dtype, np.datetime64 + ): + time_col_vals = pd.to_datetime(time_col_vals, infer_datetime_format=True) + time_index = pd.DatetimeIndex(time_col_vals) + if freq: + if not isinstance(freq, str): + raise ValueError( + "The type of `freq` should be `str` when the type of `time_col` is `DatetimeIndex`." + ) + else: + # If freq is not provided and automatic inference fail, throw exception + freq = pd.infer_freq(time_index) + if freq is None: + raise ValueError( + "Failed to infer the `freq`. A valid `freq` is required." + ) + if freq[0] == "-": + freq = freq[1:] + else: + raise ValueError("The type of `time_col` is invalid.") + if isinstance(series_data, pd.Series): + series_data = series_data.to_frame() + series_data.set_index(time_index, inplace=True) + series_data.sort_index(inplace=True) + return series_data + + +def _load_from_dataframe( + df: pd.DataFrame, + group_id: str = None, + time_col: Optional[str] = None, + target_cols: Optional[Union[List[str], str]] = None, + label_col: Optional[Union[List[str], str]] = None, + observed_cov_cols: Optional[Union[List[str], str]] = None, + feature_cols: Optional[Union[List[str], str]] = None, + known_cov_cols: Optional[Union[List[str], str]] = None, + static_cov_cols: Optional[Union[List[str], str]] = None, + freq: Optional[Union[str, int]] = None, + fill_missing_dates: bool = False, + fillna_method: str = "pre", + fillna_window_size: int = 10, + **kwargs, +): + dfs = [] # seperate multiple group + if group_id is not None: + group_unique = df[group_id].unique() + for column in group_unique: + dfs.append(df[df[group_id].isin([column])]) + else: + dfs = [df] + res = [] + if label_col: + if isinstance(label_col, str) and len(label_col) > 1: + raise ValueError("The length of label_col must be 1.") + target_cols = label_col + if feature_cols: + observed_cov_cols = feature_cols + for df in dfs: + target = None + observed_cov = None + known_cov = None + static_cov = dict() + if not any([target_cols, observed_cov_cols, known_cov_cols, static_cov_cols]): + target = _load_from_one_dataframe( + df, + time_col, + [a for a in df.columns if a != time_col], + freq, + ) + + else: + if target_cols: + target = _load_from_one_dataframe( + df, + time_col, + target_cols, + freq, + ) + + if observed_cov_cols: + observed_cov = _load_from_one_dataframe( + df, + time_col, + observed_cov_cols, + freq, + ) + + if known_cov_cols: + known_cov = _load_from_one_dataframe( + df, + time_col, + known_cov_cols, + freq, + ) + + if static_cov_cols: + if isinstance(static_cov_cols, str): + static_cov_cols = [static_cov_cols] + for col in static_cov_cols: + if col not in df.columns or len(np.unique(df[col])) != 1: + raise ValueError( + "static cov cals data is not in columns or schema is not right!" + ) + static_cov[col] = df[col].iloc[0] + res.append( + { + "past_target": target, + "observed_cov_numeric": observed_cov, + "known_cov_numeric": known_cov, + "static_cov_numeric": static_cov, + } + ) + return res[0] + + +def _distance_to_holiday(holiday): + def _distance_to_day(index): + holiday_date = holiday.dates( + index - pd.Timedelta(days=_MAX_WINDOW), + index + pd.Timedelta(days=_MAX_WINDOW), + ) + assert ( + len(holiday_date) != 0 + ), f"No closest holiday for the date index {index} found." + # It sometimes returns two dates if it is exactly half a year after the + # holiday. In this case, the smaller distance (182 days) is returned. + return float((index - holiday_date[0]).days) + + return _distance_to_day + + +def _to_time_features( + dataset, freq, feature_cols, extend_points, inplace: bool = False +): + new_ts = dataset + if not inplace: + new_ts = dataset.copy() + # Get known_cov + kcov = new_ts["known_cov_numeric"] + if not kcov: + tf_kcov = new_ts["past_target"].index.to_frame() + else: + tf_kcov = kcov.index.to_frame() + time_col = tf_kcov.columns[0] + if np.issubdtype(tf_kcov[time_col].dtype, np.integer): + raise ValueError( + "The time_col can't be the type of numpy.integer, and it must be the type of numpy.datetime64" + ) + if not kcov: + freq = freq if freq is not None else pd.infer_freq(tf_kcov[time_col]) + extend_time = pd.date_range( + start=tf_kcov[time_col][-1], + freq=freq, + periods=extend_points + 1, + closed="right", + name=time_col, + ).to_frame() + tf_kcov = pd.concat([tf_kcov, extend_time]) + + for k in feature_cols: + if k != "holidays": + v = tf_kcov[time_col].apply(lambda x: _CAL_DATE_METHOD[k](x)) + v.index = tf_kcov[time_col] + + if new_ts["known_cov_numeric"] is None: + new_ts["known_cov_numeric"] = pd.DataFrame(v.rename(k), index=v.index) + else: + new_ts["known_cov_numeric"][k] = v.rename(k).reindex( + new_ts["known_cov_numeric"].index + ) + + else: + holidays_col = [] + for i, H in enumerate(_HOLYDAYS): + v = tf_kcov[time_col].apply(_distance_to_holiday(H)) + v.index = tf_kcov[time_col] + holidays_col.append(k + "_" + str(i)) + if new_ts["known_cov_numeric"] is None: + new_ts["known_cov_numeric"] = pd.DataFrame( + v.rename(k + "_" + str(i)), index=v.index + ) + else: + new_ts["known_cov_numeric"][k + "_" + str(i)] = v.rename(k).reindex( + new_ts["known_cov_numeric"].index + ) + + scaler = StandardScaler() + scaler.fit(new_ts["known_cov_numeric"][holidays_col]) + new_ts["known_cov_numeric"][holidays_col] = scaler.transform( + new_ts["known_cov_numeric"][holidays_col] + ) + return new_ts + + +class CutOff(PyOnlyProcessor): + def __init__(self, size): + super().__init__() + self._size = size + + def __call__(self, data): + ts = data["ts"] + ori_ts = data["ori_ts"] + + skip_len = self._size.get("skip_chunk_len", 0) + if len(ts) < self._size["in_chunk_len"] + skip_len: + raise ValueError( + f"The length of the input data is {len(ts)}, but it should be at least {self._size['in_chunk_len'] + self._size['skip_chunk_len']} for training." + ) + ts_data = ts[-(self._size["in_chunk_len"] + skip_len) :] + + return {**data, "ts": ts_data, "ori_ts": ts_data} + + +class Normalize(PyOnlyProcessor): + def __init__(self, scale_path, params_info): + super().__init__() + self._scaler = joblib.load(scale_path) + self._params_info = params_info + + def __call__(self, data): + ts = data["ts"] + + if self._params_info.get("target_cols", None) is not None: + ts[self._params_info["target_cols"]] = self._scaler.transform( + ts[self._params_info["target_cols"]] + ) + if self._params_info.get("feature_cols", None) is not None: + ts[self._params_info["feature_cols"]] = self._scaler.transform( + ts[self._params_info["feature_cols"]] + ) + + return {**data, "ts": ts} + + +class Denormalize(PyOnlyProcessor): + def __init__(self, scale_path, params_info): + super().__init__() + self._scaler = joblib.load(scale_path) + self._params_info = params_info + + def __call__(self, data): + pred = data["pred"] + + scale_cols = pred.columns.values.tolist() + pred[scale_cols] = self._scaler.inverse_transform(pred[scale_cols]) + + return {**data, "pred": pred} + + +class BuildTSDataset(PyOnlyProcessor): + def __init__(self, params_info): + super().__init__() + self._params_info = params_info + + def __call__(self, data): + ts = data["ts"] + ori_ts = data["ori_ts"] + + ts_data = _load_from_dataframe(ts, **self._params_info) + + return {**data, "ts": ts_data, "ori_ts": ts_data} + + +class CalcTimeFeatures(PyOnlyProcessor): + def __init__(self, params_info, size, holiday=False): + super().__init__() + self._freq = params_info["freq"] + self._size = size + self._holiday = holiday + + def __call__(self, data): + ts = data["ts"] + + if not self._holiday: + ts = _to_time_features( + ts, + self._freq, + ["hourofday", "dayofmonth", "dayofweek", "dayofyear"], + self._size["out_chunk_len"], + ) + else: + ts = _to_time_features( + ts, + self._freq, + [ + "minuteofhour", + "hourofday", + "dayofmonth", + "dayofweek", + "dayofyear", + "monthofyear", + "weekofyear", + "holidays", + ], + self._size["out_chunk_len"], + ) + + return {**data, "ts": ts} + + +class BuildPaddedMask(PyOnlyProcessor): + def __init__(self, input_data): + super().__init__() + self._input_data = input_data + + def __call__(self, data): + ts = data["ts"] + + if "features" in self._input_data: + ts["features"] = ts["past_target"] + + if "pad_mask" in self._input_data: + target_dim = len(ts["features"]) + max_length = self._input_data["pad_mask"][-1] + if max_length > 0: + ones = np.ones(max_length, dtype=np.int32) + if max_length != target_dim: + target_ndarray = np.array(ts["features"]).astype(np.float32) + target_ndarray_final = np.zeros( + [max_length, target_dim], dtype=np.int32 + ) + end = min(target_dim, max_length) + target_ndarray_final[:end, :] = target_ndarray + ts["features"] = target_ndarray_final + ones[end:] = 0.0 + ts["pad_mask"] = ones + else: + ts["pad_mask"] = ones + + return {**data, "ts": ts} + + +class DataFrame2Arrays(PyOnlyProcessor): + def __init__(self, input_data): + super().__init__() + self._input_data = input_data + + def __call__(self, data): + ts = data["ts"] + + ts_list = [] + input_name = list(self._input_data.keys()) + input_name.sort() + for key in input_name: + ts_list.append(np.array(ts[key]).astype("float32")) + + return {**data, "ts": ts_list} diff --git a/libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py b/libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py new file mode 100755 index 0000000000..784aa87714 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import processors +from .model import PyOnlyVisionModel diff --git a/libs/ultrainfer/python/ultrainfer/py_only/vision/model.py b/libs/ultrainfer/python/ultrainfer/py_only/vision/model.py new file mode 100755 index 0000000000..0be93de2fa --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/vision/model.py @@ -0,0 +1,26 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc + +from ..base import PyOnlyUltraInferModel + + +class PyOnlyVisionModel(PyOnlyUltraInferModel): + @abc.abstractmethod + def batch_predict(self, imgs): + raise NotImplementedError + + def predict(self, img): + return self.batch_predict([img])[0] diff --git a/libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py b/libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py new file mode 100755 index 0000000000..ba534854ca --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py @@ -0,0 +1,465 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import numpy as np +import cv2 +from PIL import Image + +from ..base import PyOnlyProcessor + +__all__ = [ + "GetImageInfo", + "Flip", + "Crop", + "Resize", + "ResizeByLong", + "ResizeByShort", + "Pad", + "PadStride", + "Normalize", + "ToCHWImage", + "LaTeXOCRReisizeNormImg", +] + + +def _resize(im, target_size, interp): + w, h = target_size + im = cv2.resize(im, (w, h), interpolation=interp) + return im + + +def _flip_h(im): + if len(im.shape) == 3: + im = im[:, ::-1, :] + elif len(im.shape) == 2: + im = im[:, ::-1] + return im + + +def _flip_v(im): + if len(im.shape) == 3: + im = im[::-1, :, :] + elif len(im.shape) == 2: + im = im[::-1, :] + return im + + +def _slice(im, coords): + x1, y1, x2, y2 = coords + im = im[y1:y2, x1:x2, ...] + return im + + +def _pad(im, pad, val): + if isinstance(pad, int): + pad = [pad] * 4 + if len(pad) != 4: + raise ValueError + chns = 1 if im.ndim == 2 else im.shape[2] + im = cv2.copyMakeBorder(im, *pad, cv2.BORDER_CONSTANT, value=(val,) * chns) + return im + + +def _check_image_size(input_): + if not ( + isinstance(input_, (list, tuple)) + and len(input_) == 2 + and isinstance(input_[0], int) + and isinstance(input_[1], int) + ): + raise TypeError(f"{input_} cannot represent a valid image size.") + + +class GetImageInfo(PyOnlyProcessor): + def __call__(self, data): + img = data["img"] + + return {**data, "img_size": [img.shape[1], img.shape[0]]} + + +class Flip(PyOnlyProcessor): + def __init__(self, mode="H"): + super().__init__() + if mode not in ("H", "V"): + raise ValueError("`mode` should be 'H' or 'V'.") + self._mode = mode + + def __call__(self, data): + img = data["img"] + + if self._mode == "H": + img = _flip_h(img) + elif self._mode == "V": + img = _flip_v(img) + + return {**data, "img": img} + + +class Crop(PyOnlyProcessor): + def __init__(self, crop_size, mode="C"): + super().__init__() + if isinstance(crop_size, int): + crop_size = [crop_size, crop_size] + _check_image_size(crop_size) + + self._crop_size = crop_size + + if mode not in ("C", "TL"): + raise ValueError("Unsupported interpolation method") + self._mode = mode + + def __call__(self, data): + img = data["img"] + + h, w = img.shape[:2] + cw, ch = self._crop_size + if self._mode == "C": + x1 = max(0, (w - cw) // 2) + y1 = max(0, (h - ch) // 2) + elif self._mode == "TL": + x1, y1 = 0, 0 + x2 = min(w, x1 + cw) + y2 = min(h, y1 + ch) + coords = (x1, y1, x2, y2) + if coords == (0, 0, w, h): + raise ValueError( + f"Input image ({w}, {h}) smaller than the target size ({cw}, {ch})." + ) + img = _slice(img, coords=coords) + + return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]} + + +class _BaseResize(PyOnlyProcessor): + _INTERP_DICT = { + "NEAREST": cv2.INTER_NEAREST, + "LINEAR": cv2.INTER_LINEAR, + "CUBIC": cv2.INTER_CUBIC, + "AREA": cv2.INTER_AREA, + "LANCZOS4": cv2.INTER_LANCZOS4, + } + + def __init__(self, size_divisor, interp): + super().__init__() + + if size_divisor is not None: + assert isinstance( + size_divisor, int + ), "`size_divisor` should be None or int." + self._size_divisor = size_divisor + + try: + interp = self._INTERP_DICT[interp] + except KeyError: + raise ValueError( + "`interp` should be one of {}.".format(self._INTERP_DICT.keys()) + ) + self._interp = interp + + @staticmethod + def _rescale_size(img_size, target_size): + scale = min(max(target_size) / max(img_size), min(target_size) / min(img_size)) + rescaled_size = [round(i * scale) for i in img_size] + return rescaled_size, scale + + +class Resize(_BaseResize): + def __init__( + self, target_size, keep_ratio=False, size_divisor=None, interp="LINEAR" + ): + super().__init__(size_divisor=size_divisor, interp=interp) + + if isinstance(target_size, int): + target_size = [target_size, target_size] + _check_image_size(target_size) + self._target_size = target_size + + self._keep_ratio = keep_ratio + + def __call__(self, data): + img = data["img"] + + target_size = self._target_size + original_size = img.shape[:2][::-1] + + if self._keep_ratio: + h, w = img.shape[0:2] + target_size, _ = self._rescale_size((w, h), self._target_size) + + if self._size_divisor: + target_size = [ + math.ceil(i / self._size_divisor) * self._size_divisor + for i in target_size + ] + + img_scale_w, img_scale_h = [ + target_size[0] / original_size[0], + target_size[1] / original_size[1], + ] + img = _resize(img, target_size, interp=self._interp) + + return { + **data, + "img": img, + "img_size": [img.shape[1], img.shape[0]], + "scale_factors": [img_scale_w, img_scale_h], + } + + +class ResizeByLong(_BaseResize): + def __init__(self, target_long_edge, size_divisor=None, interp="LINEAR"): + super().__init__(size_divisor=size_divisor, interp=interp) + self._target_long_edge = target_long_edge + + def __call__(self, data): + img = data["img"] + + h, w = img.shape[:2] + scale = self._target_long_edge / max(h, w) + h_resize = round(h * scale) + w_resize = round(w * scale) + if self._size_divisor is not None: + h_resize = math.ceil(h_resize / self._size_divisor) * self._size_divisor + w_resize = math.ceil(w_resize / self._size_divisor) * self._size_divisor + + img = _resize(img, (w_resize, h_resize), interp=self._interp) + + return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]} + + +class ResizeByShort(_BaseResize): + INPUT_KEYS = "img" + OUTPUT_KEYS = ["img", "img_size"] + DEAULT_INPUTS = {"img": "img"} + DEAULT_OUTPUTS = {"img": "img", "img_size": "img_size"} + + def __init__(self, target_short_edge, size_divisor=None, interp="LINEAR"): + super().__init__(size_divisor=size_divisor, interp=interp) + self._target_short_edge = target_short_edge + + def __call__(self, data): + img = data["img"] + + h, w = img.shape[:2] + scale = self._target_short_edge / min(h, w) + h_resize = round(h * scale) + w_resize = round(w * scale) + if self._size_divisor is not None: + h_resize = math.ceil(h_resize / self._size_divisor) * self._size_divisor + w_resize = math.ceil(w_resize / self._size_divisor) * self._size_divisor + + img = _resize(img, (w_resize, h_resize), interp=self._interp) + + return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]} + + +class Pad(PyOnlyProcessor): + def __init__(self, target_size, val=127.5): + super().__init__() + + if isinstance(target_size, int): + target_size = [target_size, target_size] + _check_image_size(target_size) + self._target_size = target_size + + self._val = val + + def __call__(self, data): + img = data["img"] + + h, w = img.shape[:2] + tw, th = self._target_size + ph = th - h + pw = tw - w + + if ph < 0 or pw < 0: + raise ValueError( + f"Input image ({w}, {h}) smaller than the target size ({tw}, {th})." + ) + else: + img = _pad(img, pad=(0, ph, 0, pw), val=self._val) + + return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]} + + +class PadStride(PyOnlyProcessor): + INPUT_KEYS = "img" + OUTPUT_KEYS = "img" + DEAULT_INPUTS = {"img": "img"} + DEAULT_OUTPUTS = {"img": "img"} + + def __init__(self, stride=0): + super().__init__() + self._coarsest_stride = stride + + def __call__(self, data): + img = data["img"] + + im = img + coarsest_stride = self._coarsest_stride + if coarsest_stride <= 0: + return {"img": im} + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + + return {**data, "img": padding_im} + + +class Normalize(PyOnlyProcessor): + def __init__(self, scale=1.0 / 255, mean=0.5, std=0.5, preserve_dtype=False): + super().__init__() + self._scale = np.float32(scale) + if isinstance(mean, float): + mean = [mean] + self._mean = np.asarray(mean).astype("float32") + if isinstance(std, float): + std = [std] + self._std = np.asarray(std).astype("float32") + self._preserve_dtype = preserve_dtype + + def __call__(self, data): + img = data["img"] + + old_type = img.dtype + # XXX: If `old_type` has higher precision than float32, + # we will lose some precision. + img = img.astype("float32", copy=False) + img *= self._scale + img -= self._mean + img /= self._std + if self._preserve_dtype: + img = img.astype(old_type, copy=False) + + return {**data, "img": img} + + +class ToCHWImage(PyOnlyProcessor): + def __call__(self, data): + img = data["img"] + + img = img.transpose((2, 0, 1)) + + return {**data, "img": img} + + +class BGR2RGB(PyOnlyProcessor): + def __call__(self, data): + img = data["img"] + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + return {**data, "img": img} + + +class LaTeXOCRReisizeNormImg(PyOnlyProcessor): + """for ocr image resize and normalization""" + + def __init__(self, rec_image_shape=(3, 48, 320)): + super().__init__() + self.rec_image_shape = rec_image_shape + + def pad_(self, img, divable=32): + threshold = 128 + data = np.array(img.convert("LA")) + if data[..., -1].var() == 0: + data = (data[..., 0]).astype(np.uint8) + else: + data = (255 - data[..., -1]).astype(np.uint8) + data = (data - data.min()) / (data.max() - data.min()) * 255 + if data.mean() > threshold: + # To invert the text to white + gray = 255 * (data < threshold).astype(np.uint8) + else: + gray = 255 * (data > threshold).astype(np.uint8) + data = 255 - data + + coords = cv2.findNonZero(gray) # Find all non-zero points (text) + a, b, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box + rect = data[b : b + h, a : a + w] + im = Image.fromarray(rect).convert("L") + dims = [] + for x in [w, h]: + div, mod = divmod(x, divable) + dims.append(divable * (div + (1 if mod > 0 else 0))) + padded = Image.new("L", dims, 255) + padded.paste(im, (0, 0, im.size[0], im.size[1])) + return padded + + def minmax_size_( + self, + img, + max_dimensions, + min_dimensions, + ): + if max_dimensions is not None: + ratios = [a / b for a, b in zip(img.size, max_dimensions)] + if any([r > 1 for r in ratios]): + size = np.array(img.size) // max(ratios) + img = img.resize(tuple(size.astype(int)), Image.BILINEAR) + if min_dimensions is not None: + # hypothesis: there is a dim in img smaller than min_dimensions, and return a proper dim >= min_dimensions + padded_size = [ + max(img_dim, min_dim) + for img_dim, min_dim in zip(img.size, min_dimensions) + ] + if padded_size != list(img.size): # assert hypothesis + padded_im = Image.new("L", padded_size, 255) + padded_im.paste(img, img.getbbox()) + img = padded_im + return img + + def norm_img_latexocr(self, img): + # CAN only predict gray scale image + shape = (1, 1, 3) + mean = [0.7931, 0.7931, 0.7931] + std = [0.1738, 0.1738, 0.1738] + scale = np.float32(1.0 / 255.0) + min_dimensions = [32, 32] + max_dimensions = [672, 192] + mean = np.array(mean).reshape(shape).astype("float32") + std = np.array(std).reshape(shape).astype("float32") + + im_h, im_w = img.shape[:2] + if ( + min_dimensions[0] <= im_w <= max_dimensions[0] + and min_dimensions[1] <= im_h <= max_dimensions[1] + ): + pass + else: + img = Image.fromarray(np.uint8(img)) + img = self.minmax_size_(self.pad_(img), max_dimensions, min_dimensions) + img = np.array(img) + im_h, im_w = img.shape[:2] + img = np.dstack([img, img, img]) + img = (img.astype("float32") * scale - mean) / std + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + divide_h = math.ceil(im_h / 16) * 16 + divide_w = math.ceil(im_w / 16) * 16 + img = np.pad( + img, ((0, divide_h - im_h), (0, divide_w - im_w)), constant_values=(1, 1) + ) + img = img[:, :, np.newaxis].transpose(2, 0, 1) + img = img.astype("float32") + return img + + def __call__(self, data): + """apply""" + img = data["img"] + img = self.norm_img_latexocr(img) + return {"img": img} diff --git a/libs/ultrainfer/python/ultrainfer/runtime.py b/libs/ultrainfer/python/ultrainfer/runtime.py new file mode 100755 index 0000000000..e558159295 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/runtime.py @@ -0,0 +1,706 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +import logging +import numpy as np +from . import ModelFormat +from . import c_lib_wrap as C + + +class Runtime: + """UltraInfer Runtime object.""" + + def __init__(self, runtime_option): + """Initialize a UltraInfer Runtime object. + + :param runtime_option: (ultrainfer.RuntimeOption)Options for UltraInfer Runtime + """ + + self._runtime = C.Runtime() + self.runtime_option = runtime_option + assert self._runtime.init( + self.runtime_option._option + ), "Initialize Runtime Failed!" + + def forward(self, *inputs): + """[Only for Poros backend] Inference with input data for poros + + :param data: (list[str : numpy.ndarray])The input data list + :return list of numpy.ndarray + """ + if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT: + raise Exception( + "The forward function is only used for Poros backend, please call infer function" + ) + inputs_dict = dict() + for i in range(len(inputs)): + inputs_dict["x" + str(i)] = inputs[i] + return self.infer(inputs_dict) + + def infer(self, data): + """Inference with input data. + + :param data: (dict[str : numpy.ndarray])The input data dict, key value must keep same with the loaded model + :return list of numpy.ndarray + """ + assert isinstance(data, dict) or isinstance( + data, list + ), "The input data should be type of dict or list." + if isinstance(data, dict): + for k, v in data.items(): + if isinstance(v, np.ndarray) and not v.data.contiguous: + data[k] = np.ascontiguousarray(data[k]) + + return self._runtime.infer(data) + + def bind_input_tensor(self, name, fdtensor): + """Bind FDTensor by name, no copy and share input memory + + :param name: (str)The name of input data. + :param fdtensor: (ultrainfer.FDTensor)The input FDTensor. + """ + self._runtime.bind_input_tensor(name, fdtensor) + + def bind_output_tensor(self, name, fdtensor): + """Bind FDTensor by name, no copy and share output memory + + :param name: (str)The name of output data. + :param fdtensor: (ultrainfer.FDTensor)The output FDTensor. + """ + self._runtime.bind_output_tensor(name, fdtensor) + + def zero_copy_infer(self): + """No params inference the model. + + the input and output data need to pass through the bind_input_tensor and get_output_tensor interfaces. + """ + self._runtime.infer() + + def get_output_tensor(self, name): + """Get output FDTensor by name, no copy and share backend output memory + + :param name: (str)The name of output data. + :return ultrainfer.FDTensor + """ + return self._runtime.get_output_tensor(name) + + def compile(self, warm_datas): + """[Only for Poros backend] compile with prewarm data for poros + + :param data: (list[str : numpy.ndarray])The prewarm data list + :return TorchScript Model + """ + if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT: + raise Exception( + "The compile function is only used for Poros backend, please call infer function" + ) + assert isinstance(warm_datas, list), "The prewarm data should be type of list." + for i in range(len(warm_datas)): + warm_data = warm_datas[i] + if isinstance(warm_data[0], np.ndarray): + warm_data = list(data for data in warm_data) + else: + warm_data = list(data.numpy() for data in warm_data) + warm_datas[i] = warm_data + return self._runtime.compile(warm_datas, self.runtime_option._option) + + def num_inputs(self): + """Get number of inputs of the loaded model.""" + return self._runtime.num_inputs() + + def num_outputs(self): + """Get number of outputs of the loaded model.""" + return self._runtime.num_outputs() + + def get_input_info(self, index): + """Get input information of the loaded model. + + :param index: (int)Index of the input + :return ultrainfer.TensorInfo + """ + assert isinstance( + index, int + ), "The input parameter index should be type of int." + assert ( + index < self.num_inputs() + ), "The input parameter index:{} should less than number of inputs:{}.".format( + index, self.num_inputs + ) + return self._runtime.get_input_info(index) + + def get_output_info(self, index): + """Get output information of the loaded model. + + :param index: (int)Index of the output + :return ultrainfer.TensorInfo + """ + assert isinstance( + index, int + ), "The input parameter index should be type of int." + assert ( + index < self.num_outputs() + ), "The input parameter index:{} should less than number of outputs:{}.".format( + index, self.num_outputs + ) + return self._runtime.get_output_info(index) + + def get_profile_time(self): + """Get profile time of Runtime after the profile process is done.""" + return self._runtime.get_profile_time() + + +class RuntimeOption: + """Options for UltraInfer Runtime.""" + + __slots__ = ["_option"] + + def __init__(self): + """Initialize a UltraInfer RuntimeOption object.""" + + self._option = C.RuntimeOption() + + def set_model_path( + self, model_path, params_path="", model_format=ModelFormat.PADDLE + ): + """Set path of model file and parameters file + + :param model_path: (str)Path of model file + :param params_path: (str)Path of parameters file + :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT + """ + return self._option.set_model_path(model_path, params_path, model_format) + + def set_model_buffer( + self, model_buffer, params_buffer="", model_format=ModelFormat.PADDLE + ): + """Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory + :param model_buffer: (bytes)The memory buffer of model + :param params_buffer: (bytes)The memory buffer of the parameters + :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT + """ + return self._option.set_model_buffer(model_buffer, params_buffer, model_format) + + def use_gpu(self, device_id=0): + """Inference with Nvidia GPU + + :param device_id: (int)The index of GPU will be used for inference, default 0 + """ + if not C.is_built_with_gpu(): + logging.warning( + "The installed ultrainfer-python package is not built with GPU, will force to use CPU. To use GPU, following the commands to install ultrainfer-gpu-python." + ) + return + return self._option.use_gpu(device_id) + + def use_kunlunxin( + self, + device_id=0, + l3_workspace_size=16 * 1024 * 1024, + locked=False, + autotune=True, + autotune_file="", + precision="int16", + adaptive_seqlen=False, + enable_multi_stream=False, + gm_default_size=0, + ): + """Inference with KunlunXin XPU + + :param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0 + :param l3_workspace_size: (int)The size of the video memory allocated by the l3 cache, the maximum is 16M, default 16M + :param locked: (bool)Whether the allocated L3 cache can be locked. If false, it means that the L3 cache is not locked, + and the allocated L3 cache can be shared by multiple models, and multiple models + :param autotune: (bool)Whether to autotune the conv operator in the model. + If true, when the conv operator of a certain dimension is executed for the first time, + it will automatically search for a better algorithm to improve the performance of subsequent conv operators of the same dimension. + :param autotune_file: (str)Specify the path of the autotune file. If autotune_file is specified, + the algorithm specified in the file will be used and autotune will not be performed again. + :param precision: (str)Calculation accuracy of multi_encoder + :param adaptive_seqlen: (bool)adaptive_seqlen Is the input of multi_encoder variable length + :param enable_multi_stream: (bool)Whether to enable the multi stream of KunlunXin XPU. + :param gm_default_size The default size of context global memory of KunlunXin XPU. + """ + return self._option.use_kunlunxin( + device_id, + l3_workspace_size, + locked, + autotune, + autotune_file, + precision, + adaptive_seqlen, + enable_multi_stream, + gm_default_size, + ) + + def use_cpu(self): + """Inference with CPU""" + return self._option.use_cpu() + + def use_rknpu2( + self, rknpu2_name=C.CpuName.RK356X, rknpu2_core=C.CoreMask.RKNN_NPU_CORE_AUTO + ): + return self._option.use_rknpu2(rknpu2_name, rknpu2_core) + + def use_sophgo(self): + """Inference with SOPHGO TPU""" + return self._option.use_sophgo() + + def use_ascend(self): + """Inference with Huawei Ascend NPU""" + return self._option.use_ascend() + + def disable_valid_backend_check(self): + """Disable checking validity of backend during inference""" + return self._option.disable_valid_backend_check() + + def enable_valid_backend_check(self): + """Enable checking validity of backend during inference""" + return self._option.enable_valid_backend_check() + + def set_cpu_thread_num(self, thread_num=-1): + """Set number of threads if inference with CPU + + :param thread_num: (int)Number of threads, if not positive, means the number of threads is decided by the backend, default -1 + """ + return self._option.set_cpu_thread_num(thread_num) + + def set_ort_graph_opt_level(self, level=-1): + """Set graph optimization level for ONNX Runtime backend + + :param level: (int)Optimization level, -1 means the default setting + """ + logging.warning( + "`RuntimeOption.set_ort_graph_opt_level` will be deprecated in v1.2.0, please use `RuntimeOption.graph_optimize_level = 99` instead." + ) + self._option.ort_option.graph_optimize_level = level + + def use_paddle_backend(self): + """Use Paddle Inference backend, support inference Paddle model on CPU/Nvidia GPU.""" + return self._option.use_paddle_backend() + + def use_paddle_infer_backend(self): + """Wrapper function of use_paddle_backend(), use Paddle Inference backend, support inference Paddle model on CPU/Nvidia GPU.""" + return self.use_paddle_backend() + + def use_poros_backend(self): + """Use Poros backend, support inference TorchScript model on CPU/Nvidia GPU.""" + return self._option.use_poros_backend() + + def use_ort_backend(self): + """Use ONNX Runtime backend, support inference Paddle/ONNX model on CPU/Nvidia GPU.""" + return self._option.use_ort_backend() + + def use_tvm_backend(self): + """Use TVM Runtime backend, support inference TVM model on CPU.""" + return self._option.use_tvm_backend() + + def use_trt_backend(self): + """Use TensorRT backend, support inference Paddle/ONNX model on Nvidia GPU.""" + return self._option.use_trt_backend() + + def use_openvino_backend(self): + """Use OpenVINO backend, support inference Paddle/ONNX model on CPU.""" + return self._option.use_openvino_backend() + + def use_lite_backend(self): + """Use Paddle Lite backend, support inference Paddle model on ARM CPU.""" + return self._option.use_lite_backend() + + def use_paddle_lite_backend(self): + """Wrapper function of use_lite_backend(), use Paddle Lite backend, support inference Paddle model on ARM CPU.""" + return self.use_lite_backend() + + def set_lite_context_properties(self, context_properties): + """Set nnadapter context properties for Paddle Lite backend.""" + logging.warning( + "`RuntimeOption.set_lite_context_properties` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_context_properties = ...` instead." + ) + self._option.paddle_lite_option.nnadapter_context_properties = ( + context_properties + ) + + def set_lite_model_cache_dir(self, model_cache_dir): + """Set nnadapter model cache dir for Paddle Lite backend.""" + logging.warning( + "`RuntimeOption.set_lite_model_cache_dir` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_model_cache_dir = ...` instead." + ) + + self._option.paddle_lite_option.nnadapter_model_cache_dir = model_cache_dir + + def set_lite_dynamic_shape_info(self, dynamic_shape_info): + """Set nnadapter dynamic shape info for Paddle Lite backend.""" + logging.warning( + "`RuntimeOption.set_lite_dynamic_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_dynamic_shape_info = ...` instead." + ) + self._option.paddle_lite_option.nnadapter_dynamic_shape_info = ( + dynamic_shape_info + ) + + def set_lite_subgraph_partition_path(self, subgraph_partition_path): + """Set nnadapter subgraph partition path for Paddle Lite backend.""" + logging.warning( + "`RuntimeOption.set_lite_subgraph_partition_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_path = ...` instead." + ) + self._option.paddle_lite_option.nnadapter_subgraph_partition_config_path = ( + subgraph_partition_path + ) + + def set_lite_subgraph_partition_config_buffer(self, subgraph_partition_buffer): + """Set nnadapter subgraph partition buffer for Paddle Lite backend.""" + logging.warning( + "`RuntimeOption.set_lite_subgraph_partition_buffer` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ...` instead." + ) + self._option.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ( + subgraph_partition_buffer + ) + + def set_lite_mixed_precision_quantization_config_path( + self, mixed_precision_quantization_config_path + ): + """Set nnadapter mixed precision quantization config path for Paddle Lite backend..""" + logging.warning( + "`RuntimeOption.set_lite_mixed_precision_quantization_config_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ...` instead." + ) + self._option.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ( + mixed_precision_quantization_config_path + ) + + def set_paddle_mkldnn(self, use_mkldnn=True): + """Enable/Disable MKLDNN while using Paddle Inference backend, mkldnn is enabled by default.""" + logging.warning( + "`RuntimeOption.set_paddle_mkldnn` will be derepcated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_mkldnn = True` instead." + ) + self._option.paddle_infer_option.enable_mkldnn = True + + def set_openvino_device(self, name="CPU"): + """Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... + This interface is deprecated, please use `RuntimeOption.openvino_option.set_device` instead. + """ + logging.warning( + "`RuntimeOption.set_openvino_device` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_device` instead." + ) + self._option.openvino_option.set_device(name) + + def set_openvino_shape_info(self, shape_info): + """Set shape information of the models' inputs, used for GPU to fix the shape + This interface is deprecated, please use `RuntimeOption.openvino_option.set_shape_info` instead. + + :param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]} + """ + logging.warning( + "`RuntimeOption.set_openvino_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_shape_info` instead." + ) + self._option.openvino_option.set_shape_info(shape_info) + + def set_openvino_cpu_operators(self, operators): + """While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU + This interface is deprecated, please use `RuntimeOption.openvino_option.set_cpu_operators` instead. + + :param operators: (list of string)list of operators' name, e.g ["MulticlasNms"] + """ + logging.warning( + "`RuntimeOption.set_openvino_cpu_operators` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_cpu_operators` instead." + ) + self._option.openvino_option.set_cpu_operators(operators) + + def enable_paddle_log_info(self): + """Enable print out the debug log information while using Paddle Inference backend, the log information is disabled by default.""" + logging.warning( + "RuntimeOption.enable_paddle_log_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_log_info = True` instead." + ) + self._option.paddle_infer_option.enable_log_info = True + + def disable_paddle_log_info(self): + """Disable print out the debug log information while using Paddle Inference backend, the log information is disabled by default.""" + logging.warning( + "RuntimeOption.disable_paddle_log_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_log_info = False` instead." + ) + self._option.paddle_infer_option.enable_log_info = False + + def set_paddle_mkldnn_cache_size(self, cache_size): + """Set size of shape cache while using Paddle Inference backend with MKLDNN enabled, default will cache all the dynamic shape.""" + logging.warning( + "RuntimeOption.set_paddle_mkldnn_cache_size` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.mkldnn_cache_size = {}` instead.".format( + cache_size + ) + ) + self._option.paddle_infer_option.mkldnn_cache_size = cache_size + + def enable_lite_fp16(self): + """Enable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default.""" + logging.warning( + "`RuntimeOption.enable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = True` instead." + ) + self._option.paddle_lite_option.enable_fp16 = True + + def disable_lite_fp16(self): + """Disable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default.""" + logging.warning( + "`RuntimeOption.disable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = False` instead." + ) + self._option.paddle_lite_option.enable_fp16 = False + + def set_lite_power_mode(self, mode): + """Set POWER mode while using Paddle Lite backend on ARM CPU.""" + logging.warning( + "`RuntimeOption.set_lite_powermode` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.power_mode = {}` instead.".format( + mode + ) + ) + self._option.paddle_lite_option.power_mode = mode + + def set_trt_input_shape( + self, tensor_name, min_shape, opt_shape=None, max_shape=None + ): + """Set shape range information while using TensorRT backend with loadding a model contains dynamic input shape. While inference with a new input shape out of the set shape range, the tensorrt engine will be rebuilt to expand the shape range information. + + :param tensor_name: (str)Name of input which has dynamic shape + :param min_shape: (list of int)Minimum shape of the input, e.g [1, 3, 224, 224] + :param opt_shape: (list of int)Optimize shape of the input, this offten set as the most common input shape, if set to None, it will keep same with min_shape + :param max_shape: (list of int)Maximum shape of the input, e.g [8, 3, 224, 224], if set to None, it will keep same with the min_shape + """ + logging.warning( + "`RuntimeOption.set_trt_input_shape` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.set_shape()` instead." + ) + if opt_shape is None and max_shape is None: + opt_shape = min_shape + max_shape = min_shape + else: + assert ( + opt_shape is not None and max_shape is not None + ), "Set min_shape only, or set min_shape, opt_shape, max_shape both." + return self._option.trt_option.set_shape( + tensor_name, min_shape, opt_shape, max_shape + ) + + def set_trt_input_data( + self, tensor_name, min_input_data, opt_input_data=None, max_input_data=None + ): + """Set input data while using TensorRT backend with loadding a model contains dynamic input shape. + + :param tensor_name: (str)Name of input which has dynamic shape + :param min_input_data: (list of int)Input data for Minimum shape of the input. + :param opt_input_data: (list of int)Input data for Optimize shape of the input, if set to None, it will keep same with min_input_data + :param max_input_data: (list of int)Input data for Maximum shape of the input, if set to None, it will keep same with the min_input_data + """ + logging.warning( + "`RuntimeOption.set_trt_input_data` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.set_input_data()` instead." + ) + if opt_input_data is None and max_input_data is None: + opt_input_data = min_input_data + opt_input_data = min_input_data + else: + assert ( + opt_input_data is not None and max_input_data is not None + ), "Set min_input_data only, or set min_input_data, opt_input_data, max_input_data both." + return self._option.trt_option.set_input_data( + tensor_name, min_input_data, opt_input_data, max_input_data + ) + + def set_trt_cache_file(self, cache_file_path): + """Set a cache file path while using TensorRT backend. While loading a Paddle/ONNX model with set_trt_cache_file("./tensorrt_cache/model.trt"), if file `./tensorrt_cache/model.trt` exists, it will skip building tensorrt engine and load the cache file directly; if file `./tensorrt_cache/model.trt` doesn't exist, it will building tensorrt engine and save the engine as binary string to the cache file. + + :param cache_file_path: (str)Path of tensorrt cache file + """ + logging.warning( + "`RuntimeOption.set_trt_cache_file` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.serialize_file = {}` instead.".format( + cache_file_path + ) + ) + self._option.trt_option.serialize_file = cache_file_path + + def enable_trt_fp16(self): + """Enable half precision inference while using TensorRT backend, notice that not all the Nvidia GPU support FP16, in those cases, will fallback to FP32 inference.""" + logging.warning( + "`RuntimeOption.enable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = True` instead." + ) + self._option.trt_option.enable_fp16 = True + + def disable_trt_fp16(self): + """Disable half precision inference while suing TensorRT backend.""" + logging.warning( + "`RuntimeOption.disable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = False` instead." + ) + self._option.trt_option.enable_fp16 = False + + def enable_pinned_memory(self): + """Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.""" + return self._option.enable_pinned_memory() + + def disable_pinned_memory(self): + """Disable pinned memory.""" + return self._option.disable_pinned_memory() + + def enable_paddle_to_trt(self): + """While using TensorRT backend, enable_paddle_to_trt() will change to use Paddle Inference backend, and use its integrated TensorRT instead.""" + logging.warning( + "`RuntimeOption.enable_paddle_to_trt` will be deprecated in v1.2.l0, if you want to run tensorrt with Paddle Inference backend, please use the following method, " + ) + logging.warning(" ==============================================") + logging.warning(" import ultrainfer as fd") + logging.warning(" option = fd.RuntimeOption()") + logging.warning(" option.use_gpu(0)") + logging.warning(" option.use_paddle_infer_backend()") + logging.warning(" option.paddle_infer_option.enable_trt = True") + logging.warning(" ==============================================") + self._option.use_paddle_backend() + self._option.paddle_infer_option.enable_trt = True + + def set_trt_max_workspace_size(self, trt_max_workspace_size): + """Set max workspace size while using TensorRT backend.""" + logging.warning( + "`RuntimeOption.set_trt_max_workspace_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_workspace_size = {}` instead.".format( + trt_max_workspace_size + ) + ) + self._option.trt_option.max_workspace_size = trt_max_workspace_size + + def set_trt_max_batch_size(self, trt_max_batch_size): + """Set max batch size while using TensorRT backend.""" + logging.warning( + "`RuntimeOption.set_trt_max_batch_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_batch_size = {}` instead.".format( + trt_max_batch_size + ) + ) + self._option.trt_option.max_batch_size = trt_max_batch_size + + def enable_paddle_trt_collect_shape(self): + """Enable collect subgraph shape information while using Paddle Inference with TensorRT""" + logging.warning( + "`RuntimeOption.enable_paddle_trt_collect_shape` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.collect_trt_shape = True` instead." + ) + self._option.paddle_infer_option.collect_trt_shape = True + + def disable_paddle_trt_collect_shape(self): + """Disable collect subgraph shape information while using Paddle Inference with TensorRT""" + logging.warning( + "`RuntimeOption.disable_paddle_trt_collect_shape` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.collect_trt_shape = False` instead." + ) + self._option.paddle_infer_option.collect_trt_shape = False + + def delete_paddle_backend_pass(self, pass_name): + """Delete pass by name in paddle backend""" + logging.warning( + "`RuntimeOption.delete_paddle_backend_pass` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.delete_pass` instead." + ) + self._option.paddle_infer_option.delete_pass(pass_name) + + def disable_paddle_trt_ops(self, ops): + """Disable some ops in paddle trt backend""" + logging.warning( + "`RuntimeOption.disable_paddle_trt_ops` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.disable_trt_ops()` instead." + ) + self._option.disable_trt_ops(ops) + + def use_ipu( + self, + device_num=1, + micro_batch_size=1, + enable_pipelining=False, + batches_per_step=1, + ): + return self._option.use_ipu( + device_num, micro_batch_size, enable_pipelining, batches_per_step + ) + + def set_ipu_config( + self, + enable_fp16=False, + replica_num=1, + available_memory_proportion=1.0, + enable_half_partial=False, + ): + logging.warning( + "`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead." + ) + self._option.paddle_infer_option.set_ipu_config( + enable_fp16, replica_num, available_memory_proportion, enable_half_partial + ) + + @property + def poros_option(self): + """Get PorosBackendOption object to configure Poros backend + + :return PorosBackendOption + """ + return self._option.poros_option + + @property + def paddle_lite_option(self): + """Get LiteBackendOption object to configure Paddle Lite backend + + :return LiteBackendOption + """ + return self._option.paddle_lite_option + + @property + def openvino_option(self): + """Get OpenVINOOption object to configure OpenVINO backend + + :return OpenVINOOption + """ + return self._option.openvino_option + + @property + def ort_option(self): + """Get OrtBackendOption object to configure ONNX Runtime backend + + :return OrtBackendOption + """ + return self._option.ort_option + + @property + def trt_option(self): + """Get TrtBackendOption object to configure TensorRT backend + + :return TrtBackendOption + """ + return self._option.trt_option + + @property + def paddle_infer_option(self): + """Get PaddleBackendOption object to configure Paddle Inference backend + + :return PaddleBackendOption + """ + return self._option.paddle_infer_option + + def enable_profiling(self, inclue_h2d_d2h=False, repeat=100, warmup=50): + """Set the profile mode as 'true'. + :param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime. + :param repeat Repeat times for runtime inference. + :param warmup Warmup times for runtime inference. + """ + return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup) + + def disable_profiling(self): + """Set the profile mode as 'false'.""" + return self._option.disable_profiling() + + def set_external_raw_stream(self, cuda_stream): + """Set the external raw stream used by ultrainfer runtime.""" + self._option.set_external_raw_stream(cuda_stream) + + def __repr__(self): + attrs = dir(self._option) + message = "RuntimeOption(\n" + for attr in attrs: + if attr.startswith("__"): + continue + if hasattr(getattr(self._option, attr), "__call__"): + continue + message += " {} : {}\t\n".format(attr, getattr(self._option, attr)) + message.strip("\n") + message += ")" + return message diff --git a/libs/ultrainfer/python/ultrainfer/text/__init__.py b/libs/ultrainfer/python/ultrainfer/text/__init__.py new file mode 100755 index 0000000000..a21623f7e9 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/text/__init__.py @@ -0,0 +1,18 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from . import uie +from .uie import UIEModel +from .uie import SchemaLanguage diff --git a/libs/ultrainfer/python/ultrainfer/text/uie/__init__.py b/libs/ultrainfer/python/ultrainfer/text/uie/__init__.py new file mode 100755 index 0000000000..c3554f763c --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/text/uie/__init__.py @@ -0,0 +1,105 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import logging +from ... import RuntimeOption, UltraInferModel, ModelFormat +from ... import c_lib_wrap as C + + +class SchemaLanguage(object): + ZH = 0 + EN = 1 + + +class SchemaNode(object): + def __init__(self, name, children=[]): + schema_node_children = [] + if isinstance(children, str): + children = [children] + for child in children: + if isinstance(child, str): + schema_node_children += [C.text.SchemaNode(child, [])] + elif isinstance(child, dict): + for key, val in child.items(): + schema_node_child = SchemaNode(key, val) + schema_node_children += [schema_node_child._schema_node] + else: + assert "The type of child of SchemaNode should be str or dict." + self._schema_node = C.text.SchemaNode(name, schema_node_children) + self._schema_node_children = schema_node_children + + +class UIEModel(UltraInferModel): + def __init__( + self, + model_file, + params_file, + vocab_file, + position_prob=0.5, + max_length=128, + schema=[], + batch_size=64, + runtime_option=RuntimeOption(), + model_format=ModelFormat.PADDLE, + schema_language=SchemaLanguage.ZH, + ): + if isinstance(schema, list): + schema = SchemaNode("", schema)._schema_node_children + elif isinstance(schema, dict): + schema_tmp = [] + for key, val in schema.items(): + schema_tmp += [SchemaNode(key, val)._schema_node] + schema = schema_tmp + else: + assert "The type of schema should be list or dict." + schema_language = C.text.SchemaLanguage(schema_language) + self._model = C.text.UIEModel( + model_file, + params_file, + vocab_file, + position_prob, + max_length, + schema, + batch_size, + runtime_option._option, + model_format, + schema_language, + ) + assert self.initialized, "UIEModel initialize failed." + + def set_schema(self, schema): + if isinstance(schema, list): + schema = SchemaNode("", schema)._schema_node_children + elif isinstance(schema, dict): + schema_tmp = [] + for key, val in schema.items(): + schema_tmp += [SchemaNode(key, val)._schema_node] + schema = schema_tmp + self._model.set_schema(schema) + + def predict(self, texts, return_dict=False): + results = self._model.predict(texts) + if not return_dict: + return results + new_results = [] + for result in results: + uie_result = dict() + for key, uie_results in result.items(): + uie_result[key] = list() + for uie_res in uie_results: + uie_result[key].append(uie_res.get_dict()) + new_results += [uie_result] + return new_results diff --git a/libs/ultrainfer/python/ultrainfer/ts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/__init__.py new file mode 100755 index 0000000000..2128c729e5 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/ts/__init__.py @@ -0,0 +1,18 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from . import anomalydetection +from . import classification +from . import forecasting diff --git a/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py new file mode 100755 index 0000000000..a4e90a611b --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .ppts import * diff --git a/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py new file mode 100755 index 0000000000..ca7938f864 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py @@ -0,0 +1,168 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import os +from copy import deepcopy +import numpy as np +import pandas as pd +from typing import List +from dataclasses import dataclass + +from .... import UltraInferModel, ModelFormat +from ....py_only.ts import PyOnlyTSModel +from ....utils.misc import load_config +from ....py_only import PyOnlyProcessorChain +from ....py_only.ts import PyOnlyTSModel, processors as P + + +class PyOnlyAnomalyDetectionModel(PyOnlyTSModel): + def __init__( + self, + model_file, + params_file, + config_file, + scaler_file=None, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + self._model_file = model_file + self._params_file = params_file + self._model_format = model_format + super().__init__(runtime_option) + if scaler_file is None: + config_dir = os.path.dirname(config_file) + scaler_file = os.path.join(config_dir, "scaler.pkl") + self._config = load_config(config_file) + self._preprocessor = _PyOnlyAnomalyDetectionPreprocessor( + self._config, scaler_file + ) + self._postprocessor = _PyOnlyAnomalyDetectionPostprocessor(self._config) + + def model_name(): + return "PyOnlyAnomalyDetectionModel" + + def batch_predict(self, ts_list): + data_list = [] + for csv_data in ts_list: + data = {"ori_ts": deepcopy(csv_data), "ts": csv_data} + data = self._preprocessor.run(data) + data_list.append(data) + + input_data = {} + input_num = self._runtime.num_inputs() + for idx in range(input_num): + input_name = self._runtime.get_input_info(idx).name + ts_data = np.stack( + [data["ts"][idx] for data in data_list], axis=0, dtype=np.float32 + ) + ts_data = np.ascontiguousarray(ts_data) + input_data[input_name] = ts_data + + output_arrs = self._runtime.infer(input_data) + + results = [] + for idx, data in enumerate(output_arrs[0]): + data = {"ori_ts": data_list[idx]["ori_ts"], "pred": data} + result = self._postprocessor.run(data) + results.append(result) + return results + + def _update_option(self): + self._option.set_model_path( + self._model_file, self._params_file, self._model_format + ) + + +class _PyOnlyAnomalyDetectionPreprocessor(object): + def __init__(self, config, scaler_file): + super().__init__() + self.scaler_file = scaler_file + processors = self._build_processors(config) + self._processor_chain = PyOnlyProcessorChain(processors) + + def run(self, data): + return self._processor_chain(data) + + def _build_processors(self, config): + processors = [] + processors.append(P.CutOff(config["size"])) + + if config.get("scale", None): + if not os.path.exists(self.scaler_file): + raise Exception(f"Cannot find scaler file: {self.scaler_file}") + processors.append(P.Normalize(self.scaler_file, config["info_params"])) + + processors.append(P.BuildTSDataset(config["info_params"])) + + if config.get("time_feat", None): + processors.append( + P.CalcTimeFeatures( + config["info_params"], + config["size"], + config["holiday"], + ) + ) + + processors.append(P.DataFrame2Arrays(config["input_data"])) + return processors + + +class _PyOnlyAnomalyDetectionPostprocessor(object): + def __init__(self, config): + super().__init__() + self.model_threshold = config["model_threshold"] + self.info_params = config["info_params"] + + def run(self, data): + ori_ts = data["ori_ts"] + pred = data["pred"] + if ori_ts.get("past_target", None) is not None: + ts = ori_ts["past_target"] + elif ori_ts.get("observed_cov_numeric", None) is not None: + ts = ori_ts["observed_cov_numeric"] + elif ori_ts.get("known_cov_numeric", None) is not None: + ts = ori_ts["known_cov_numeric"] + elif ori_ts.get("static_cov_numeric", None) is not None: + ts = ori_ts["static_cov_numeric"] + else: + raise ValueError("No value in ori_ts") + column_name = ( + self.info_params["target_cols"] + if "target_cols" in self.info_params + else self.info_params["feature_cols"] + ) + + anomaly_score = np.mean(np.square(pred - np.array(ts)), axis=-1) + anomaly_label = (anomaly_score >= self.model_threshold) + 0 + + past_target_index = ts.index + past_target_index.name = self.info_params["time_col"] + + label = anomaly_label.tolist() + dates = past_target_index.tolist() + col_names = ["label"] + data = [label] + result = _PyOnlyAnomalyDetectionResult( + dates=dates, col_names=col_names, data=data + ) + return result + + +@dataclass +class _PyOnlyAnomalyDetectionResult(object): + dates: List[int] + col_names: List[str] + data: List[List[int]] diff --git a/libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py new file mode 100755 index 0000000000..a4e90a611b --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .ppts import * diff --git a/libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py new file mode 100755 index 0000000000..8fb4cc5778 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py @@ -0,0 +1,128 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import os +from copy import deepcopy +import numpy as np +from dataclasses import dataclass + +from .... import ModelFormat +from ....py_only.ts import PyOnlyTSModel +from ....utils.misc import load_config +from ....py_only import PyOnlyProcessorChain +from ....py_only.ts import PyOnlyTSModel, processors as P + + +class PyOnlyClassificationModel(PyOnlyTSModel): + def __init__( + self, + model_file, + params_file, + config_file, + scaler_file=None, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + self._model_file = model_file + self._params_file = params_file + self._model_format = model_format + super().__init__(runtime_option) + if scaler_file is None: + config_dir = os.path.dirname(config_file) + scaler_file = os.path.join(config_dir, "scaler.pkl") + self._config = load_config(config_file) + self._preprocessor = _PyOnlyClassificationPreprocessor( + self._config, scaler_file + ) + self._postprocessor = _PyOnlyClassificationPostprocessor() + + def model_name(): + return "PyOnlyClassificationModel" + + def batch_predict(self, ts_list): + data_list = [] + for csv_data in ts_list: + data = {"ori_ts": deepcopy(csv_data), "ts": csv_data} + data = self._preprocessor.run(data) + data_list.append(data) + + input_data = {} + input_num = self._runtime.num_inputs() + for idx in range(input_num): + input_name = self._runtime.get_input_info(idx).name + ts_data = np.stack( + [data["ts"][idx] for data in data_list], axis=0, dtype=np.float32 + ) + ts_data = np.ascontiguousarray(ts_data) + input_data[input_name] = ts_data + + output_arrs = self._runtime.infer(input_data) + + results = [] + for data in output_arrs[0]: + data = {"pred": data} + result = self._postprocessor.run(data) + results.append(result) + return results + + def _update_option(self): + self._option.set_model_path( + self._model_file, self._params_file, self._model_format + ) + + +class _PyOnlyClassificationPreprocessor(object): + def __init__(self, config, scaler_file): + super().__init__() + self.scaler_file = scaler_file + processors = self._build_processors(config) + self._processor_chain = PyOnlyProcessorChain(processors) + + def run(self, data): + return self._processor_chain(data) + + def _build_processors(self, config): + processors = [] + + if config.get("scale", None): + if not os.path.exists(self.scaler_file): + raise Exception(f"Cannot find scaler file: {self.scaler_file}") + processors.append(P.Normalize(self.scaler_file, config["info_params"])) + + processors.append(P.BuildTSDataset(config["info_params"])) + processors.append(P.BuildPaddedMask(config["input_data"])) + processors.append(P.DataFrame2Arrays(config["input_data"])) + return processors + + +class _PyOnlyClassificationPostprocessor(object): + def __init__(self): + super().__init__() + + def run(self, data): + pred_ts = data["pred"] + pred_ts -= np.max(pred_ts, axis=-1, keepdims=True) + pred_ts = np.exp(pred_ts) / np.sum(np.exp(pred_ts), axis=-1, keepdims=True) + class_id = np.argmax(pred_ts, axis=-1) + pred_score = pred_ts[class_id] + result = _PyOnlyClassificationResult(class_id=class_id, score=pred_score) + return result + + +@dataclass +class _PyOnlyClassificationResult(object): + class_id: int + score: float diff --git a/libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py new file mode 100755 index 0000000000..a4e90a611b --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .ppts import * diff --git a/libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py new file mode 100755 index 0000000000..56a5634163 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py @@ -0,0 +1,195 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import os +from copy import deepcopy +import numpy as np +import pandas as pd +from typing import List +from dataclasses import dataclass + +from .... import UltraInferModel, ModelFormat +from ....py_only.ts import PyOnlyTSModel +from ....utils.misc import load_config +from ....py_only import PyOnlyProcessorChain +from ....py_only.ts import PyOnlyTSModel, processors as P + + +class PyOnlyForecastingModel(PyOnlyTSModel): + def __init__( + self, + model_file, + params_file, + config_file, + scaler_file=None, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + self._model_file = model_file + self._params_file = params_file + self._model_format = model_format + super().__init__(runtime_option) + if scaler_file is None: + config_dir = os.path.dirname(config_file) + scaler_file = os.path.join(config_dir, "scaler.pkl") + self._config = load_config(config_file) + self._preprocessor = _PyOnlyForecastingPreprocessor(self._config, scaler_file) + self._postprocessor = _PyOnlyForecastingPostprocessor(self._config, scaler_file) + + def model_name(): + return "PyOnlyForecastingModel" + + def batch_predict(self, ts_list): + data_list = [] + for csv_data in ts_list: + data = {"ori_ts": deepcopy(csv_data), "ts": csv_data} + data = self._preprocessor.run(data) + data_list.append(data) + + input_data = {} + input_num = self._runtime.num_inputs() + for idx in range(input_num): + input_name = self._runtime.get_input_info(idx).name + ts_data = np.stack( + [data["ts"][idx] for data in data_list], axis=0, dtype=np.float32 + ) + ts_data = np.ascontiguousarray(ts_data) + input_data[input_name] = ts_data + + output_arrs = self._runtime.infer(input_data) + + results = [] + for idx, data in enumerate(output_arrs[0]): + data = {"ori_ts": data_list[idx]["ori_ts"], "pred": data} + result = self._postprocessor.run(data) + results.append(result) + return results + + def _update_option(self): + self._option.set_model_path( + self._model_file, self._params_file, self._model_format + ) + + +class _PyOnlyForecastingPreprocessor(object): + def __init__(self, config, scaler_file): + super().__init__() + self.scaler_file = scaler_file + processors = self._build_processors(config) + self._processor_chain = PyOnlyProcessorChain(processors) + + def run(self, data): + return self._processor_chain(data) + + def _build_processors(self, config): + processors = [] + processors.append(P.CutOff(config["size"])) + + if config.get("scale", None): + if not os.path.exists(self.scaler_file): + raise Exception(f"Cannot find scaler file: {self.scaler_file}") + processors.append(P.Normalize(self.scaler_file, config["info_params"])) + + processors.append(P.BuildTSDataset(config["info_params"])) + + if config.get("time_feat", None): + processors.append( + P.CalcTimeFeatures( + config["info_params"], + config["size"], + config["holiday"], + ) + ) + + processors.append(P.DataFrame2Arrays(config["input_data"])) + return processors + + +class _PyOnlyForecastingPostprocessor(object): + def __init__(self, config, scaler_file): + super().__init__() + self.scaler_file = scaler_file + self.info_params = config["info_params"] + processors = self._build_processors(config) + self._processor_chain = PyOnlyProcessorChain(processors) + + def run(self, data): + ori_ts = data["ori_ts"] + pred = data["pred"] + if ori_ts.get("past_target", None) is not None: + ts = ori_ts["past_target"] + elif ori_ts.get("observed_cov_numeric", None) is not None: + ts = ori_ts["observed_cov_numeric"] + elif ori_ts.get("known_cov_numeric", None) is not None: + ts = ori_ts["known_cov_numeric"] + elif ori_ts.get("static_cov_numeric", None) is not None: + ts = ori_ts["static_cov_numeric"] + else: + raise ValueError("No value in ori_ts") + + column_name = ( + self.info_params["target_cols"] + if "target_cols" in self.info_params + else self.info_params["feature_cols"] + ) + if isinstance(self.info_params["freq"], str): + past_target_index = ts.index + if past_target_index.freq is None: + past_target_index.freq = pd.infer_freq(ts.index) + future_target_index = pd.date_range( + past_target_index[-1] + past_target_index.freq, + periods=pred.shape[0], + freq=self.info_params["freq"], + name=self.info_params["time_col"], + ) + elif isinstance(self.info_params["freq"], int): + start_idx = max(ts.index) + 1 + stop_idx = start_idx + pred.shape[0] + future_target_index = pd.RangeIndex( + start=start_idx, + stop=stop_idx, + step=self.info_params["freq"], + name=self.info_params["time_col"], + ) + + future_target = pd.DataFrame( + np.reshape(pred, newshape=[pred.shape[0], -1]), + index=future_target_index, + columns=column_name, + ) + data = {"pred": future_target} + forecast_dataframe = self._processor_chain(data) + forecast = forecast_dataframe["pred"] + col_names = forecast.columns.tolist() + data = [forecast[col_name].tolist() for col_name in col_names] + dates = [int(i.timestamp()) for i in forecast.index] + result = _PyOnlyForecastingResult(dates=dates, col_names=col_names, data=data) + return result + + def _build_processors(self, config): + processors = [] + if config.get("scale", None): + if not os.path.exists(self.scaler_file): + raise Exception(f"Cannot find scaler file: {self.scaler_file}") + processors.append(P.Denormalize(self.scaler_file, config["info_params"])) + return processors + + +@dataclass +class _PyOnlyForecastingResult(object): + dates: List[int] + col_names: List[str] + data: List[List[float]] diff --git a/libs/ultrainfer/python/ultrainfer/utils/__init__.py b/libs/ultrainfer/python/ultrainfer/utils/__init__.py new file mode 100755 index 0000000000..2379939471 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/utils/__init__.py @@ -0,0 +1,14 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .example_resource import get_detection_test_image diff --git a/libs/ultrainfer/python/ultrainfer/utils/example_resource.py b/libs/ultrainfer/python/ultrainfer/utils/example_resource.py new file mode 100755 index 0000000000..dd41fc7664 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/utils/example_resource.py @@ -0,0 +1,26 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import download +from . import hub_env + + +def get_detection_test_image(path=None): + if path is None: + path = hub_env.RESOURCE_HOME + fullpath = download( + url="https://bj.bcebos.com/paddlehub/fastdeploy/example/detection_test_image.jpg", + path=path, + ) + return fullpath diff --git a/libs/ultrainfer/python/ultrainfer/utils/hub_config.py b/libs/ultrainfer/python/ultrainfer/utils/hub_config.py new file mode 100755 index 0000000000..3e3d4a5d23 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/utils/hub_config.py @@ -0,0 +1,76 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import os +import time +import json +import uuid +import yaml + +from . import hub_env as hubenv + + +class HubConfig: + """ + UltraInfer model management configuration class. + """ + + def __init__(self): + self._initialize() + self.file = os.path.join(hubenv.CONF_HOME, "config.yaml") + + if not os.path.exists(self.file): + self.flush() + return + + with open(self.file, "r") as file: + try: + cfg = yaml.load(file, Loader=yaml.FullLoader) + self.data.update(cfg) + except: + ... + + def _initialize(self): + # Set default configuration values. + self.data = {} + self.data["server"] = "http://paddlepaddle.org.cn/paddlehub" + + def reset(self): + """Reset configuration to default.""" + self._initialize() + self.flush() + + @property + def server(self): + """Model server url.""" + return self.data["server"] + + @server.setter + def server(self, url: str): + self.data["server"] = url + self.flush() + + def flush(self): + """Flush the current configuration into the configuration file.""" + with open(self.file, "w") as file: + cfg = json.loads(json.dumps(self.data)) + yaml.dump(cfg, file) + + def __str__(self): + cfg = json.loads(json.dumps(self.data)) + return yaml.dump(cfg) + + +config = HubConfig() diff --git a/libs/ultrainfer/python/ultrainfer/utils/hub_env.py b/libs/ultrainfer/python/ultrainfer/utils/hub_env.py new file mode 100755 index 0000000000..10eb2bbc18 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/utils/hub_env.py @@ -0,0 +1,57 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This module is used to store environmental variables for ultrainfer model hub. + +ULTRAINFER_HUB_HOME --> the root directory for storing ultrainfer model hub related data. Default to ~/.ultrainfer. Users can change the +├ default value through the ULTRAINFER_HUB_HOME environment variable. +├── MODEL_HOME --> Store the downloaded ultrainfer models. +├── CONF_HOME --> Store the default configuration files. +""" + +import os + + +def _get_user_home(): + return os.path.expanduser("~") + + +def _get_hub_home(): + if "ULTRAINFER_HUB_HOME" in os.environ: + home_path = os.environ["ULTRAINFER_HUB_HOME"] + if os.path.exists(home_path): + if os.path.isdir(home_path): + return home_path + else: + raise RuntimeError( + "The environment variable ULTRAINFER_HUB_HOME {} is not a directory.".format( + home_path + ) + ) + else: + return home_path + return os.path.join(_get_user_home(), ".ultrainfer") + + +def _get_sub_home(directory): + home = os.path.join(_get_hub_home(), directory) + os.makedirs(home, exist_ok=True) + return home + + +USER_HOME = _get_user_home() +HUB_HOME = _get_hub_home() +MODEL_HOME = _get_sub_home("models") +CONF_HOME = _get_sub_home("conf") +RESOURCE_HOME = _get_sub_home("resources") diff --git a/libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py b/libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py new file mode 100755 index 0000000000..3fd05e0c51 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py @@ -0,0 +1,134 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import requests +from typing import List + +from .hub_config import config + + +class ServerConnectionError(Exception): + def __init__(self, url: str): + self.url = url + + def __str__(self): + tips = "Can't connect to UltraInfer Model Server: {}".format(self.url) + return tips + + +class ModelServer(object): + """ + UltraInfer server source + + Args: + url(str) : Url of the server + timeout(int) : Request timeout + """ + + def __init__(self, url: str, timeout: int = 10): + self._url = url + self._timeout = timeout + + def search_model( + self, name: str, format: str = None, version: str = None + ) -> List[dict]: + """ + Search model from model server. + + Args: + name(str) : UltraInfer model name + format(str): UltraInfer model format + version(str) : UltraInfer model version + Return: + result(list): search results + """ + params = {} + params["name"] = name + if format: + params["format"] = format + if version: + params["version"] = version + result = self.request(path="ultrainfer_search", params=params) + if result["status"] == 0 and len(result["data"]) > 0: + return result["data"] + return None + + def stat_model(self, name: str, format: str, version: str): + """ + Note a record when download a model for statistics. + + Args: + name(str) : UltraInfer model name + format(str): UltraInfer model format + version(str) : UltraInfer model version + Return: + is_successful(bool): True if successful, False otherwise + """ + params = {} + params["name"] = name + params["format"] = format + params["version"] = version + params["from"] = "ultrainfer" + try: + result = self.request(path="stat", params=params) + except Exception: + return False + if result["status"] == 0: + return True + else: + return False + + def request(self, path: str, params: dict) -> dict: + """Request server.""" + api = "{}/{}".format(self._url, path) + try: + result = requests.get(api, params, timeout=self._timeout) + return result.json() + except requests.exceptions.ConnectionError as e: + raise ServerConnectionError(self._url) + + def get_model_list(self): + """ + Get all pre-trained models information in dataset. + Return: + result(dict): key is category name, value is a list which contains models \ + information such as name, format and version. + """ + api = "{}/{}".format(self._url, "ultrainfer_listmodels") + try: + result = requests.get(api, timeout=self._timeout) + return result.json() + except requests.exceptions.ConnectionError as e: + raise ServerConnectionError(self._url) + + def is_connected(self): + return self.check(self._url) + + @classmethod + def check(cls, url: str) -> bool: + """ + Check if the specified url is a valid model server + + Args: + url(str) : Url to check + """ + try: + r = requests.get(url + "/search") + return r.status_code == 200 + except: + return False + + +model_server = ModelServer(config.server) diff --git a/libs/ultrainfer/python/ultrainfer/utils/misc.py b/libs/ultrainfer/python/ultrainfer/utils/misc.py new file mode 100755 index 0000000000..bac49d9dc0 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/utils/misc.py @@ -0,0 +1,20 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + + +def load_config(config_path): + with open(config_path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) diff --git a/libs/ultrainfer/python/ultrainfer/vision/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/__init__.py new file mode 100755 index 0000000000..f63607950c --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/__init__.py @@ -0,0 +1,41 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from . import detection +from . import classification +from . import segmentation +from . import tracking +from . import keypointdetection +from . import matting +from . import facedet +from . import facealign +from . import faceid +from . import ocr +from . import headpose +from . import sr +from . import evaluation +from . import generation +from . import perception +from .utils import fd_result_to_json +from .visualize import * +from .. import C + + +def enable_flycv(): + return C.vision.enable_flycv() + + +def disable_flycv(): + return C.vision.disable_flycv() diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py new file mode 100755 index 0000000000..af2b7f000d --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py @@ -0,0 +1,36 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .contrib.yolov5cls import YOLOv5Cls +from .ppcls import * +from .ppshitu import PPShiTuV2Detector +from .ppshitu import PPShiTuV2Recognizer +from .ppshitu import PPShiTuV2RecognizerPreprocessor +from .ppshitu import PPShiTuV2RecognizerPostprocessor +from .contrib.resnet import ResNet + +PPLCNet = PaddleClasModel +PPLCNetv2 = PaddleClasModel +EfficientNet = PaddleClasModel +GhostNet = PaddleClasModel +MobileNetv1 = PaddleClasModel +MobileNetv2 = PaddleClasModel +MobileNetv3 = PaddleClasModel +ShuffleNetv2 = PaddleClasModel +SqueezeNet = PaddleClasModel +Inceptionv3 = PaddleClasModel +PPHGNet = PaddleClasModel +ResNet50vd = PaddleClasModel +SwinTransformer = PaddleClasModel diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py new file mode 100755 index 0000000000..487e92d1d7 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py @@ -0,0 +1,104 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class ResNet(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a image classification model exported by torchvision.ResNet. + + :param model_file: (str)Path of model file, e.g resnet/resnet50.onnx + :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX + """ + + # call super() to initialize the backend_option + # the result of initialization will be saved in self._runtime_option + super(ResNet, self).__init__(runtime_option) + + self._model = C.vision.classification.ResNet( + model_file, params_file, self._runtime_option, model_format + ) + # self.initialized shows the initialization of the model is successful or not + + assert self.initialized, "ResNet initialize failed." + + # Predict and return the inference result of "input_image". + def predict(self, input_image, topk=1): + """Classify an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param topk: (int)The topk result by the classify confidence score, default 1 + :return: ClassifyResult + """ + return self._model.predict(input_image, topk) + + # Implement the setter and getter method for variables + @property + def size(self): + """ + Returns the preprocess image size, default size = [224, 224]; + """ + return self._model.size + + @property + def mean_vals(self): + """ + Returns the mean value of normlization, default mean_vals = [0.485f, 0.456f, 0.406f]; + """ + return self._model.mean_vals + + @property + def std_vals(self): + """ + Returns the std value of normlization, default std_vals = [0.229f, 0.224f, 0.225f]; + """ + return self._model.std_vals + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @mean_vals.setter + def mean_vals(self, value): + assert isinstance( + value, list + ), "The value to set `mean_vals` must be type of list." + self._model.mean_vals = value + + @std_vals.setter + def std_vals(self, value): + assert isinstance( + value, list + ), "The value to set `std_vals` must be type of list." + self._model.std_vals = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py new file mode 100755 index 0000000000..522d8a5428 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py @@ -0,0 +1,140 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv5ClsPreprocessor: + def __init__(self): + """Create a preprocessor for YOLOv5Cls""" + self._preprocessor = C.vision.classification.YOLOv5ClsPreprocessor() + + def run(self, input_ims): + """Preprocess input images for YOLOv5Cls + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [224, 224] + """ + return self._preprocessor.size + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + +class YOLOv5ClsPostprocessor: + def __init__(self): + """Create a postprocessor for YOLOv5Cls""" + self._postprocessor = C.vision.classification.YOLOv5ClsPostprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for YOLOv5Cls + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of ClassifyResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def topk(self): + """ + topk for postprocessing, default is 1 + """ + return self._postprocessor.topk + + @topk.setter + def topk(self, topk): + assert isinstance(topk, int), "The value to set `top k` must be type of int." + self._postprocessor.topk = topk + + +class YOLOv5Cls(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv5Cls model exported by YOLOv5Cls. + + :param model_file: (str)Path of model file, e.g ./YOLOv5Cls.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(YOLOv5Cls, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.ONNX + ), "YOLOv5Cls only support model format of ModelFormat.ONNX now." + self._model = C.vision.classification.YOLOv5Cls( + model_file, params_file, self._runtime_option, model_format + ) + + assert self.initialized, "YOLOv5Cls initialize failed." + + def predict(self, input_image): + """Classify an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: ClassifyResult + """ + assert input_image is not None, "Input image is None." + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of ClassifyResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get YOLOv5ClsPreprocessor object of the loaded model + + :return YOLOv5ClsPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get YOLOv5ClsPostprocessor object of the loaded model + + :return YOLOv5ClsPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py new file mode 100755 index 0000000000..b19dc5908a --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py @@ -0,0 +1,288 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import logging +from dataclasses import dataclass +from typing import List + +import numpy as np + +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C +from ...common import ProcessorManager +from ....py_only import PyOnlyProcessorChain +from ....py_only.vision import PyOnlyVisionModel, processors as P +from ....utils.misc import load_config + + +class PaddleClasPreprocessor(ProcessorManager): + def __init__(self, config_file): + """Create a preprocessor for PaddleClasModel from configuration file + + :param config_file: (str)Path of configuration file, e.g resnet50/inference_cls.yaml + """ + super(PaddleClasPreprocessor, self).__init__() + self._manager = C.vision.classification.PaddleClasPreprocessor(config_file) + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + def initial_resize_on_cpu(self, v): + """ + When the initial operator is Resize, and input image size is large, + maybe it's better to run resize on CPU, because the HostToDevice memcpy + is time consuming. Set this True to run the initial resize on CPU. + :param: v: True or False + """ + self._manager.initial_resize_on_cpu(v) + + +class PaddleClasPostprocessor: + def __init__(self, topk=1): + """Create a postprocessor for PaddleClasModel + + :param topk: (int)Filter the top k classify label + """ + self._postprocessor = C.vision.classification.PaddleClasPostprocessor(topk) + + def run(self, runtime_results): + """Postprocess the runtime results for PaddleClasModel + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of ClassifyResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class PaddleClasModel(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a image classification model exported by PaddleClas. + + :param model_file: (str)Path of model file, e.g resnet50/inference.pdmodel + :param params_file: (str)Path of parameters file, e.g resnet50/inference.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str) Path of configuration file for deploy, e.g resnet50/inference_cls.yaml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PaddleClasModel, self).__init__(runtime_option) + self._model = C.vision.classification.PaddleClasModel( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleClas model initialize failed." + + def clone(self): + """Clone PaddleClasModel object + + :return: a new PaddleClasModel object + """ + + class PaddleClasCloneModel(PaddleClasModel): + def __init__(self, model): + self._model = model + + clone_model = PaddleClasCloneModel(self._model.clone()) + return clone_model + + def predict(self, im, topk=1): + """Classify an input image + + :param im: (numpy.ndarray) The input image data, a 3-D array with layout HWC, BGR format + :param topk: (int) Filter the topk classify result, default 1 + :return: ClassifyResult + """ + + self.postprocessor.topk = topk + return self._model.predict(im) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of ClassifyResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get PaddleClasPreprocessor object of the loaded model + + :return PaddleClasPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get PaddleClasPostprocessor object of the loaded model + + :return PaddleClasPostprocessor + """ + return self._model.postprocessor + + +class _PyOnlyMultilabelClassificationPreprocessor(object): + def __init__(self, config): + super().__init__() + processors = self._build_processors(config) + processors.insert(0, P.BGR2RGB()) + self._processor_chain = PyOnlyProcessorChain(processors) + + def run(self, data): + return self._processor_chain(data) + + def _build_processors(self, config): + processors = [] + for item in config: + tf_type = next(iter(item)) + args = item[tf_type] + if tf_type == "ResizeImage": + if args.keys() - {"resize_short", "size", "backend", "interpolation"}: + raise ValueError + args.setdefault("resize_short", None) + args.setdefault("size", None) + # TODO: `backend` & `interpolation` + if not (args["resize_short"] or args["size"]): + raise ValueError + if args.get("resize_short"): + processor = P.ResizeByShort( + target_short_edge=args["resize_short"], + size_divisor=None, + interp="LINEAR", + ) + else: + processor = P.Resize(target_size=args["size"]) + elif tf_type == "CropImage": + if args.keys() - {"size"}: + raise ValueError + args.setdefault("size", 224) + processor = P.Crop(crop_size=args["size"]) + elif tf_type == "NormalizeImage": + if args.keys() - {"mean", "std", "scale", "order", "channel_num"}: + raise ValueError + args.setdefault("mean", [0.485, 0.456, 0.406]) + args.setdefault("std", [0.229, 0.224, 0.225]) + args.setdefault("scale", 1 / 255) + args.setdefault("order", "") + args.setdefault("channel_num", 3) + if args["order"] != "": + raise ValueError + if args["channel_num"] != 3: + raise ValueError + processor = P.Normalize( + scale=args["scale"], mean=args["mean"], std=args["std"] + ) + elif tf_type == "ToCHWImage": + if args: + raise ValueError + processor = P.ToCHWImage() + else: + raise ValueError("Unknown transform type") + processors.append(processor) + return processors + + +@dataclass +class _PyOnlyMultilabelClassificationResult(object): + label_ids: List[int] + scores: List[float] + + +class _PyOnlyMultilabelClassificationPostprocessor(object): + def __init__(self, config): + super().__init__() + self._threshold = config["threshold"] + + def run(self, data): + pred = data["pred"] + + pred_index = np.where(pred >= self._threshold)[0].astype("int32") + index = pred_index[np.argsort(pred[pred_index])][::-1] + clas_id_list = [] + score_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(pred[i].item()) + + result = _PyOnlyMultilabelClassificationResult( + label_ids=clas_id_list, scores=score_list + ) + return result + + +class PyOnlyMultilabelClassificationModel(PyOnlyVisionModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + self._model_file = model_file + self._params_file = params_file + self._model_format = model_format + super().__init__(runtime_option) + self._config = load_config(config_file) + self._preprocessor = _PyOnlyMultilabelClassificationPreprocessor( + self._config["PreProcess"]["transform_ops"] + ) + self._postprocessor = _PyOnlyMultilabelClassificationPostprocessor( + self._config["PostProcess"]["MultiLabelThreshOutput"] + ) + + def model_name(): + return "PyOnlyMultilabelImageClassificationModel" + + def batch_predict(self, imgs): + data_list = [] + for img in imgs: + data = {"img": img} + data = self._preprocessor.run(data) + data_list.append(data) + + input_name = self._runtime.get_input_info(0).name + imgs = np.stack([data["img"] for data in data_list], axis=0, dtype=np.float32) + imgs = np.ascontiguousarray(imgs) + output_arrs = self._runtime.infer({input_name: imgs}) + + results = [] + for pred in output_arrs[0]: + data = {"pred": pred} + result = self._postprocessor.run(data) + results.append(result) + return results + + def _update_option(self): + self._option.set_model_path( + self._model_file, self._params_file, self._model_format + ) diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py new file mode 100755 index 0000000000..f5b6e8f96e --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py @@ -0,0 +1,145 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C +from ...common import ProcessorManager +from ...detection.ppdet import PicoDet + + +class PPShiTuV2Detector(PicoDet): + """Detect main body from an input image.""" + + ... + + +class PPShiTuV2RecognizerPreprocessor(ProcessorManager): + def __init__(self, config_file): + """Create a preprocessor for PPShiTuV2Recognizer from configuration file + + :param config_file: (str)Path of configuration file, e.g PPLCNet/inference_cls.yaml + """ + super(PPShiTuV2RecognizerPreprocessor, self).__init__() + self._manager = C.vision.classification.PPShiTuV2RecognizerPreprocessor( + config_file + ) + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + def initial_resize_on_cpu(self, v): + """ + When the initial operator is Resize, and input image size is large, + maybe it's better to run resize on CPU, because the HostToDevice memcpy + is time consuming. Set this True to run the initial resize on CPU. + :param: v: True or False + """ + self._manager.initial_resize_on_cpu(v) + + +class PPShiTuV2RecognizerPostprocessor: + def __init__(self, topk=1): + """Create a postprocessor for PPShiTuV2Recognizer""" + self._postprocessor = C.vision.classification.PPShiTuV2RecognizerPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for PPShiTuV2Recognizer + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of ClassifyResult, the feature vector is ClassifyResult.feature (If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class PPShiTuV2Recognizer(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a image PPShiTuV2Recognizer model exported by PaddleClas. + + :param model_file: (str)Path of model file, e.g PPLCNet/inference.pdmodel + :param params_file: (str)Path of parameters file, e.g PPLCNet/inference.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str) Path of configuration file for deploy, e.g PPLCNet/inference_cls.yaml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPShiTuV2Recognizer, self).__init__(runtime_option) + self._model = C.vision.classification.PPShiTuV2Recognizer( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PPShiTuV2Recognizer model initialize failed." + + def clone(self): + """Clone PPShiTuV2Recognizer object + + :return: a new PPShiTuV2Recognizer object + """ + + class PPShiTuV2RecognizerCloneModel(PPShiTuV2Recognizer): + def __init__(self, model): + self._model = model + + clone_model = PPShiTuV2RecognizerCloneModel(self._model.clone()) + return clone_model + + def predict(self, im): + """Extract feature from an input image + + :param im: (numpy.ndarray) The input image data, a 3-D array with layout HWC, BGR format + :return: ClassifyResult + """ + + return self._model.predict(im) + + def batch_predict(self, images): + """Extract features from a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of ClassifyResult, the feature vector is ClassifyResult.feature + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get PPShiTuV2RecognizerPreprocessor object of the loaded model + + :return PPShiTuV2RecognizerPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get PPShiTuV2RecognizerPostprocessor object of the loaded model + + :return PPShiTuV2RecognizerPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/common/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/common/__init__.py new file mode 100755 index 0000000000..e14a4b433a --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/common/__init__.py @@ -0,0 +1,18 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .manager import ProcessorManager +from .manager import PyProcessorManager +from .processors import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/common/manager.py b/libs/ultrainfer/python/ultrainfer/vision/common/manager.py new file mode 100755 index 0000000000..6adc0acac4 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/common/manager.py @@ -0,0 +1,69 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from abc import ABC, abstractmethod +from ... import c_lib_wrap as C + + +class ProcessorManager: + def __init__(self): + self._manager = None + + def run(self, input_ims): + """Process input image + + :param: input_ims: (list of numpy.ndarray) The input images + :return: list of FDTensor + """ + return self._manager.run(input_ims) + + def use_cuda(self, enable_cv_cuda=False, gpu_id=-1): + """Use CUDA processors + + :param: enable_cv_cuda: Ture: use CV-CUDA, False: use CUDA only + :param: gpu_id: GPU device id + """ + return self._manager.use_cuda(enable_cv_cuda, gpu_id) + + +class PyProcessorManager(ABC): + """ + PyProcessorManager is used to define a customized processor in python + """ + + def __init__(self): + self._manager = C.vision.processors.ProcessorManager() + + def use_cuda(self, enable_cv_cuda=False, gpu_id=-1): + """Use CUDA processors + + :param: enable_cv_cuda: Ture: use CV-CUDA, False: use CUDA only + :param: gpu_id: GPU device id + """ + return self._manager.use_cuda(enable_cv_cuda, gpu_id) + + def __call__(self, images): + image_batch = C.vision.FDMatBatch() + image_batch.from_mats(images) + + self._manager.pre_apply(image_batch) + outputs = self.apply(image_batch) + self._manager.post_apply() + return outputs + + @abstractmethod + def apply(self, image_batch): + print("This function has to be implemented.") + return [] diff --git a/libs/ultrainfer/python/ultrainfer/vision/common/processors.py b/libs/ultrainfer/python/ultrainfer/vision/common/processors.py new file mode 100755 index 0000000000..ba90fbc2de --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/common/processors.py @@ -0,0 +1,152 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from ... import c_lib_wrap as C + + +class Processor: + def __init__(self): + self.processor = None + + def __call__(self, mat): + """call for processing input. + + :param mat: The input data FDMat or FDMatBatch. + """ + self.processor(mat) + + +class ResizeByShort(Processor): + def __init__(self, target_size: int, interp=1, use_scale=True, max_hw=[]): + """Create a ResizeByShort operation with the given parameters. + + :param target_size: The target short size to resize the image + :param interp: Optionally, the interpolation mode for resizing image + :param use_scale: Optionally, whether to scale image + :param max_hw: Max spatial size which is used by ResizeByShort + """ + self.processor = C.vision.processors.ResizeByShort( + target_size, interp, use_scale, max_hw + ) + + +class CenterCrop(Processor): + def __init__(self, width, height): + """Create a CenterCrop operation with the given parameters. + + :param width: Desired width of the cropped image + :param height: Desired height of the cropped image + """ + self.processor = C.vision.processors.CenterCrop(width, height) + + +class Pad(Processor): + def __init__(self, top: int, bottom: int, left: int, right: int, value=[]): + """Create a Pad operation with the given parameters. + + :param top: The top padding + :param bottom: The bottom padding + :param left: The left padding + :param right: The right padding + :param value: the value that is used to pad on the input image + """ + self.processor = C.vision.processors.Pad(top, bottom, left, right, value) + + +class NormalizeAndPermute(Processor): + def __init__(self, mean=[], std=[], is_scale=True, min=[], max=[], swap_rb=False): + """Creae a Normalize and a Permute operation with the given parameters. + + :param mean: A list containing the mean of each channel + :param std: A list containing the standard deviation of each channel + :param is_scale: Specifies if the image are being scaled or not + :param min: A list containing the minimum value of each channel + :param max: A list containing the maximum value of each channel + """ + self.processor = C.vision.processors.NormalizeAndPermute( + mean, std, is_scale, min, max, swap_rb + ) + + +class Cast(Processor): + def __init__(self, dtype="float"): + """Creat a new cast opereaton with given dtype + + :param dtype: Target dtype of the output + """ + self.processor = C.vision.processors.Cast(dtype) + + +class HWC2CHW(Processor): + def __init__(self): + """Creat a new hwc2chw processor with default dtype. + + :return An instance of processor `HWC2CHW` + """ + self.processor = C.vision.processors.HWC2CHW() + + +class Normalize(Processor): + def __init__(self, mean, std, is_scale=True, min=[], max=[], swap_rb=False): + """Creat a new normalize opereator with given paremeters. + + :param mean: A list containing the mean of each channel + :param std: A list containing the standard deviation of each channel + :param is_scale: Specifies if the image are being scaled or not + :param min: A list containing the minimum value of each channel + :param max: A list containing the maximum value of each channel + """ + self.processor = C.vision.processors.Normalize( + mean, std, is_scale, min, max, swap_rb + ) + + +class PadToSize(Processor): + def __init__(self, width, height, value=[]): + """Create a new PadToSize opereator with given parameters. + + :param width: Desired width of the output image + :param height: Desired height of the output image + :param value: Values to pad with + """ + self.processor = C.vision.processors.PadToSize(width, height, value) + + +class Resize(Processor): + def __init__( + self, width, height, scale_w=-1.0, scale_h=-1.0, interp=1, use_scale=False + ): + """Create a Resize operation with the given parameters. + + :param width: Desired width of the output image + :param height: Desired height of the output image + :param scale_w: Scales the width in x-direction + :param scale_h: Scales the height in y-direction + :param interp: Optionally, the interpolation mode for resizing image + :param use_scale: Optionally, whether to scale image + """ + self.processor = C.vision.processors.Resize( + width, height, scale_w, scale_h, interp, use_scale + ) + + +class StridePad(Processor): + def __init__(self, stride, value=[]): + """Create a StridePad processor with given parameters. + + :param stride: Stride of the processor + :param value: Values to pad with + """ + self.processor = C.vision.processors.StridePad(stride, value) diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py new file mode 100755 index 0000000000..61a372258e --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py @@ -0,0 +1,30 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib.yolov7 import * +from .contrib.yolor import YOLOR +from .contrib.scaled_yolov4 import ScaledYOLOv4 +from .contrib.nanodet_plus import NanoDetPlus +from .contrib.yolox import YOLOX +from .contrib.yolov5 import * +from .contrib.yolov5seg import * +from .contrib.fastestdet import * +from .contrib.yolov5lite import YOLOv5Lite +from .contrib.yolov6 import YOLOv6 +from .contrib.yolov7end2end_trt import YOLOv7End2EndTRT +from .contrib.yolov7end2end_ort import YOLOv7End2EndORT +from .contrib.yolov8 import * +from .ppdet import * +from .contrib.rkyolo import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py new file mode 100755 index 0000000000..e10276afd7 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py @@ -0,0 +1,157 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class FastestDetPreprocessor: + def __init__(self): + """Create a preprocessor for FastestDet""" + self._preprocessor = C.vision.detection.FastestDetPreprocessor() + + def run(self, input_ims): + """Preprocess input images for FastestDet + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [352, 352] + """ + return self._preprocessor.size + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + +class FastestDetPostprocessor: + def __init__(self): + """Create a postprocessor for FastestDet""" + self._postprocessor = C.vision.detection.FastestDetPostprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for FastestDet + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.65 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.45 + """ + return self._postprocessor.nms_threshold + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + +class FastestDet(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a FastestDet model exported by FastestDet. + + :param model_file: (str)Path of model file, e.g ./FastestDet.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(FastestDet, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.ONNX + ), "FastestDet only support model format of ModelFormat.ONNX now." + self._model = C.vision.detection.FastestDet( + model_file, params_file, self._runtime_option, model_format + ) + + assert self.initialized, "FastestDet initialize failed." + + def predict(self, input_image): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: DetectionResult + """ + assert input_image is not None, "Input image is None." + return self._model.predict(input_image) + + def batch_predict(self, images): + assert len(images) == 1, "FastestDet is only support 1 image in batch_predict" + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get FastestDetPreprocessor object of the loaded model + + :return FastestDetPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get FastestDetPostprocessor object of the loaded model + + :return FastestDetPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py new file mode 100755 index 0000000000..cd82537536 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py @@ -0,0 +1,135 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class NanoDetPlus(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a NanoDetPlus model exported by NanoDet. + + :param model_file: (str)Path of model file, e.g ./nanodet.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(NanoDetPlus, self).__init__(runtime_option) + + self._model = C.vision.detection.NanoDetPlus( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "NanoDetPlus initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟NanoDetPlus模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [416, 416]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default (320, 320) + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def keep_ratio(self): + # keep aspect ratio or not when perform resize operation. This option is set as false by default in NanoDet-Plus + return self._model.keep_ratio + + @property + def downsample_strides(self): + # downsample strides for NanoDet-Plus to generate anchors, will take (8, 16, 32, 64) as default values + return self._model.downsample_strides + + @property + def max_wh(self): + # for offseting the boxes by classes when using NMS, default 4096 + return self._model.max_wh + + @property + def reg_max(self): + """ + reg_max for GFL regression, default 7 + """ + return self._model.reg_max + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @keep_ratio.setter + def keep_ratio(self, value): + assert isinstance( + value, bool + ), "The value to set `keep_ratio` must be type of bool." + self._model.keep_ratio = value + + @downsample_strides.setter + def downsample_strides(self, value): + assert isinstance( + value, list + ), "The value to set `downsample_strides` must be type of list." + self._model.downsample_strides = value + + @max_wh.setter + def max_wh(self, value): + assert isinstance( + value, float + ), "The value to set `max_wh` must be type of float." + self._model.max_wh = value + + @reg_max.setter + def reg_max(self, value): + assert isinstance(value, int), "The value to set `reg_max` must be type of int." + self._model.reg_max = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py new file mode 100755 index 0000000000..9b77f54ab8 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .rkyolov5 import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py new file mode 100755 index 0000000000..cdf0a715b1 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py @@ -0,0 +1,315 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from ..... import UltraInferModel, ModelFormat +from ..... import c_lib_wrap as C + + +class RKYOLOPreprocessor: + def __init__(self): + """Create a preprocessor for RKYOLOV5""" + self._preprocessor = C.vision.detection.RKYOLOPreprocessor() + + def run(self, input_ims): + """Preprocess input images for RKYOLOV5 + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @property + def padding_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ + # padding value, size should be the same as channels + return self._preprocessor.padding_value + + @property + def is_scale_up(self): + """ + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true + """ + return self._preprocessor.is_scale_up + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._preprocessor.padding_value = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value + + +class RKYOLOPostprocessor: + def __init__(self): + """Create a postprocessor for RKYOLOV5""" + self._postprocessor = C.vision.detection.RKYOLOPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for RKYOLOV5 + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + def set_anchor(self, anchor): + self._postprocessor.set_anchor(anchor) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.25 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.nms_threshold + + @property + def class_num(self): + """ + class_num for postprocessing, default is 80 + """ + return self._postprocessor.class_num + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + @class_num.setter + def class_num(self, class_num): + """ + class_num for postprocessing, default is 80 + """ + assert isinstance( + class_num, int + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.class_num = class_num + + +class RKYOLOV5(UltraInferModel): + def __init__(self, model_file, runtime_option=None, model_format=ModelFormat.RKNN): + """Load a RKYOLOV5 model exported by RKYOLOV5. + + :param model_file: (str)Path of model file, e.g ./yolov5.rknn + :param params_file: (str)Path of parameters file, e.g , if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(RKYOLOV5, self).__init__(runtime_option) + + self._model = C.vision.detection.RKYOLOV5( + model_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "RKYOLOV5 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + self.postprocessor.conf_threshold = conf_threshold + self.postprocessor.nms_threshold = nms_iou_threshold + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get RKYOLOV5Preprocessor object of the loaded model + + :return RKYOLOV5Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get RKYOLOV5Postprocessor object of the loaded model + + :return RKYOLOV5Postprocessor + """ + return self._model.postprocessor + + +class RKYOLOX(UltraInferModel): + def __init__(self, model_file, runtime_option=None, model_format=ModelFormat.RKNN): + """Load a RKYOLOX model exported by RKYOLOX. + + :param model_file: (str)Path of model file, e.g ./yolox.rknn + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(RKYOLOX, self).__init__(runtime_option) + + self._model = C.vision.detection.RKYOLOX( + model_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "RKYOLOV5 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + self.postprocessor.conf_threshold = conf_threshold + self.postprocessor.nms_threshold = nms_iou_threshold + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get RKYOLOV5Preprocessor object of the loaded model + + :return RKYOLOV5Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get RKYOLOV5Postprocessor object of the loaded model + + :return RKYOLOV5Postprocessor + """ + return self._model.postprocessor + + +class RKYOLOV7(UltraInferModel): + def __init__(self, model_file, runtime_option=None, model_format=ModelFormat.RKNN): + """Load a RKYOLOX model exported by RKYOLOV7. + + :param model_file: (str)Path of model file, e.g ./yolov7.rknn + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(RKYOLOV7, self).__init__(runtime_option) + + self._model = C.vision.detection.RKYOLOV7( + model_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "RKYOLOV5 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + self.postprocessor.conf_threshold = conf_threshold + self.postprocessor.nms_threshold = nms_iou_threshold + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get RKYOLOV5Preprocessor object of the loaded model + + :return RKYOLOV5Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get RKYOLOV5Postprocessor object of the loaded model + + :return RKYOLOV5Postprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py new file mode 100755 index 0000000000..84dad54fa5 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py @@ -0,0 +1,146 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class ScaledYOLOv4(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a ScaledYOLOv4 model exported by ScaledYOLOv4. + + :param model_file: (str)Path of model file, e.g ./scaled_yolov4.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(ScaledYOLOv4, self).__init__(runtime_option) + + self._model = C.vision.detection.ScaledYOLOv4( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "ScaledYOLOv4 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟ScaledYOLOv4模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @property + def max_wh(self): + # for offseting the boxes by classes when using NMS + return self._model.max_wh + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value + + @max_wh.setter + def max_wh(self, value): + assert isinstance( + value, float + ), "The value to set `max_wh` must be type of float." + self._model.max_wh = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py new file mode 100755 index 0000000000..1c4cfae6d5 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py @@ -0,0 +1,145 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOR(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOR model exported by YOLOR + + :param model_file: (str)Path of model file, e.g ./yolor.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOR, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOR( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOR initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟YOLOR模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @property + def max_wh(self): + # for offseting the boxes by classes when using NMS + return self._model.max_wh + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value + + @max_wh.setter + def max_wh(self, value): + assert isinstance( + value, float + ), "The value to set `max_wh` must be type of float." + self._model.max_wh = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py new file mode 100755 index 0000000000..1ec1da0d80 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py @@ -0,0 +1,227 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv5Preprocessor: + def __init__(self): + """Create a preprocessor for YOLOv5""" + self._preprocessor = C.vision.detection.YOLOv5Preprocessor() + + def run(self, input_ims): + """Preprocess input images for YOLOv5 + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @property + def padding_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ + # padding value, size should be the same as channels + return self._preprocessor.padding_value + + @property + def is_scale_up(self): + """ + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true + """ + return self._preprocessor.is_scale_up + + @property + def is_mini_pad(self): + """ + is_mini_pad for preprocessing, pad to the minimum rectange which height and width is times of stride, default false + """ + return self._preprocessor.is_mini_pad + + @property + def stride(self): + """ + stride for preprocessing, only for mini_pad mode, default 32 + """ + return self._preprocessor.stride + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._preprocessor.padding_value = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._preprocessor.is_mini_pad = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._preprocessor.stride = value + + +class YOLOv5Postprocessor: + def __init__(self): + """Create a postprocessor for YOLOv5""" + self._postprocessor = C.vision.detection.YOLOv5Postprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for YOLOv5 + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.25 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.nms_threshold + + @property + def multi_label(self): + """ + multi_label for postprocessing, set true for eval, default is True + """ + return self._postprocessor.multi_label + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + @multi_label.setter + def multi_label(self, value): + assert isinstance( + value, bool + ), "The value to set `multi_label` must be type of bool." + self._postprocessor.multi_label = value + + +class YOLOv5(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv5 model exported by YOLOv5. + + :param model_file: (str)Path of model file, e.g ./yolov5.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv5, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv5( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv5 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + self.postprocessor.conf_threshold = conf_threshold + self.postprocessor.nms_threshold = nms_iou_threshold + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get YOLOv5Preprocessor object of the loaded model + + :return YOLOv5Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get YOLOv5Postprocessor object of the loaded model + + :return YOLOv5Postprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py new file mode 100755 index 0000000000..2cd7d7d878 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py @@ -0,0 +1,191 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv5Lite(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv5Lite model exported by YOLOv5Lite. + + :param model_file: (str)Path of model file, e.g ./yolov5lite.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv5Lite, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv5Lite( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv5Lite initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟YOLOv5Lite模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @property + def max_wh(self): + # for offseting the boxes by classes when using NMS + return self._model.max_wh + + @property + def is_decode_exported(self): + """ + whether the model_file was exported with decode module. + The official YOLOv5Lite/export.py script will export ONNX file without decode module. + Please set it 'true' manually if the model file was exported with decode module. + False : ONNX files without decode module. True : ONNX file with decode module. + default False + """ + return self._model.is_decode_exported + + @property + def anchor_config(self): + return self._model.anchor_config + + @property + def downsample_strides(self): + """ + downsample strides for YOLOv5Lite to generate anchors, will take (8,16,32) as default values, might have stride=64. + """ + return self._model.downsample_strides + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value + + @max_wh.setter + def max_wh(self, value): + assert isinstance( + value, float + ), "The value to set `max_wh` must be type of float." + self._model.max_wh = value + + @is_decode_exported.setter + def is_decode_exported(self, value): + assert isinstance( + value, bool + ), "The value to set `is_decode_exported` must be type of bool." + self._model.is_decode_exported = value + + @anchor_config.setter + def anchor_config(self, anchor_config_val): + assert isinstance( + anchor_config_val, list + ), "The value to set `anchor_config` must be type of tuple or list." + assert isinstance( + anchor_config_val[0], list + ), "The value to set `anchor_config` must be 2-dimensions tuple or list" + self._model.anchor_config = anchor_config_val + + @downsample_strides.setter + def downsample_strides(self, value): + assert isinstance( + value, list + ), "The value to set `downsample_strides` must be type of list." + self._model.downsample_strides = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py new file mode 100755 index 0000000000..0c304bd1d6 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py @@ -0,0 +1,222 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv5SegPreprocessor: + def __init__(self): + """Create a preprocessor for YOLOv5Seg""" + self._preprocessor = C.vision.detection.YOLOv5SegPreprocessor() + + def run(self, input_ims): + """Preprocess input images for YOLOv5Seg + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @property + def padding_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ + # padding value, size should be the same as channels + return self._preprocessor.padding_value + + @property + def is_scale_up(self): + """ + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true + """ + return self._preprocessor.is_scale_up + + @property + def is_mini_pad(self): + """ + is_mini_pad for preprocessing, pad to the minimum rectange which height and width is times of stride, default false + """ + return self._preprocessor.is_mini_pad + + @property + def stride(self): + """ + stride for preprocessing, only for mini_pad mode, default 32 + """ + return self._preprocessor.stride + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._preprocessor.padding_value = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._preprocessor.is_mini_pad = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._preprocessor.stride = value + + +class YOLOv5SegPostprocessor: + def __init__(self): + """Create a postprocessor for YOLOv5Seg""" + self._postprocessor = C.vision.detection.YOLOv5SegPostprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for YOLOv5Seg + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.25 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.nms_threshold + + @property + def multi_label(self): + """ + multi_label for postprocessing, set true for eval, default is True + """ + return self._postprocessor.multi_label + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + @multi_label.setter + def multi_label(self, value): + assert isinstance( + value, bool + ), "The value to set `multi_label` must be type of bool." + self._postprocessor.multi_label = value + + +class YOLOv5Seg(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv5Seg model exported by YOLOv5. + + :param model_file: (str)Path of model file, e.g ./yolov5s-seg.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(YOLOv5Seg, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv5Seg( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "YOLOv5Seg initialize failed." + + def predict(self, input_image): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get YOLOv5SegPreprocessor object of the loaded model + + :return YOLOv5SegPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get YOLOv5SegPostprocessor object of the loaded model + + :return YOLOv5SegPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py new file mode 100755 index 0000000000..aed7e7f197 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py @@ -0,0 +1,145 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv6(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv6 model exported by YOLOv6. + + :param model_file: (str)Path of model file, e.g ./yolov6.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv6, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv6( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv6 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟YOLOv6模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @property + def max_wh(self): + # for offseting the boxes by classes when using NMS + return self._model.max_wh + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value + + @max_wh.setter + def max_wh(self, value): + assert isinstance( + value, float + ), "The value to set `max_wh` must be type of float." + self._model.max_wh = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py new file mode 100755 index 0000000000..9b9c63a8ae --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py @@ -0,0 +1,187 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv7Preprocessor: + def __init__(self): + """Create a preprocessor for YOLOv7""" + self._preprocessor = C.vision.detection.YOLOv7Preprocessor() + + def run(self, input_ims): + """Preprocess input images for YOLOv7 + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @property + def padding_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ + # padding value, size should be the same as channels + return self._preprocessor.padding_value + + @property + def is_scale_up(self): + """ + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true + """ + return self._preprocessor.is_scale_up + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._preprocessor.padding_value = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value + + +class YOLOv7Postprocessor: + def __init__(self): + """Create a postprocessor for YOLOv7""" + self._postprocessor = C.vision.detection.YOLOv7Postprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for YOLOv7 + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.25 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.nms_threshold + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + +class YOLOv7(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv7 model exported by YOLOv7. + + :param model_file: (str)Path of model file, e.g ./yolov7.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv7, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv7( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv7 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + self.postprocessor.conf_threshold = conf_threshold + self.postprocessor.nms_threshold = nms_iou_threshold + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get YOLOv7Preprocessor object of the loaded model + + :return YOLOv7Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get YOLOv7Postprocessor object of the loaded model + + :return YOLOv7Postprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py new file mode 100755 index 0000000000..708a1cd36c --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py @@ -0,0 +1,132 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv7End2EndORT(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv7End2EndORT model exported by YOLOv7. + + :param model_file: (str)Path of model file, e.g ./yolov7end2end_ort.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv7End2EndORT, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv7End2EndORT( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv7End2End initialize failed." + + def predict(self, input_image, conf_threshold=0.25): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold) + + # 一些跟模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py new file mode 100755 index 0000000000..9b91f70f1f --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py @@ -0,0 +1,132 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv7End2EndTRT(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv7End2EndTRT model exported by YOLOv7. + + :param model_file: (str)Path of model file, e.g ./yolov7end2end_trt.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv7End2EndTRT, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv7End2EndTRT( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv7End2EndTRT initialize failed." + + def predict(self, input_image, conf_threshold=0.25): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold) + + # 一些跟模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py new file mode 100755 index 0000000000..e1ceb82fd4 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py @@ -0,0 +1,222 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv8Preprocessor: + def __init__(self): + """Create a preprocessor for YOLOv8""" + self._preprocessor = C.vision.detection.YOLOv8Preprocessor() + + def run(self, input_ims): + """Preprocess input images for YOLOv8 + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @property + def padding_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ + # padding value, size should be the same as channels + return self._preprocessor.padding_value + + @property + def is_scale_up(self): + """ + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true + """ + return self._preprocessor.is_scale_up + + @property + def is_mini_pad(self): + """ + is_mini_pad for preprocessing, pad to the minimum rectange which height and width is times of stride, default false + """ + return self._preprocessor.is_mini_pad + + @property + def stride(self): + """ + stride for preprocessing, only for mini_pad mode, default 32 + """ + return self._preprocessor.stride + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._preprocessor.padding_value = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._preprocessor.is_mini_pad = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._preprocessor.stride = value + + +class YOLOv8Postprocessor: + def __init__(self): + """Create a postprocessor for YOLOv8""" + self._postprocessor = C.vision.detection.YOLOv8Postprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for YOLOv8 + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.25 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.nms_threshold + + @property + def multi_label(self): + """ + multi_label for postprocessing, set true for eval, default is True + """ + return self._postprocessor.multi_label + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + @multi_label.setter + def multi_label(self, value): + assert isinstance( + value, bool + ), "The value to set `multi_label` must be type of bool." + self._postprocessor.multi_label = value + + +class YOLOv8(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv8 model exported by YOLOv8. + + :param model_file: (str)Path of model file, e.g ./yolov8s.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(YOLOv8, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOv8( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "YOLOv8 initialize failed." + + def predict(self, input_image): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get YOLOv8Preprocessor object of the loaded model + + :return YOLOv8Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get YOLOv8Postprocessor object of the loaded model + + :return YOLOv8Postprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py new file mode 100755 index 0000000000..09ac32981b --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py @@ -0,0 +1,130 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOX(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOX model exported by YOLOX. + + :param model_file: (str)Path of model file, e.g ./yolox.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOX, self).__init__(runtime_option) + + self._model = C.vision.detection.YOLOX( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOX initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :return: DetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟YOLOX模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_decode_exported(self): + """ + whether the model_file was exported with decode module. + The official YOLOX/tools/export_onnx.py script will export ONNX file without decode module. + Please set it 'true' manually if the model file was exported with decode module. + Defalut False. + """ + return self._model.is_decode_exported + + @property + def downsample_strides(self): + """ + downsample strides for YOLOX to generate anchors, will take (8,16,32) as default values, might have stride=64. + """ + return self._model.downsample_strides + + @property + def max_wh(self): + # for offseting the boxes by classes when using NMS + return self._model.max_wh + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_decode_exported.setter + def is_decode_exported(self, value): + assert isinstance( + value, bool + ), "The value to set `is_decode_exported` must be type of bool." + self._model.is_decode_exported = value + + @downsample_strides.setter + def downsample_strides(self, value): + assert isinstance( + value, list + ), "The value to set `downsample_strides` must be type of list." + self._model.downsample_strides = value + + @max_wh.setter + def max_wh(self, value): + assert isinstance( + value, float + ), "The value to set `max_wh` must be type of float." + self._model.max_wh = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py new file mode 100755 index 0000000000..5e33bee662 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py @@ -0,0 +1,990 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from typing import Union, List +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C +from ...common import ProcessorManager + + +class PaddleDetPreprocessor(ProcessorManager): + def __init__(self, config_file): + """Create a preprocessor for PaddleDetection Model from configuration file + + :param config_file: (str)Path of configuration file, e.g ppyoloe/infer_cfg.yml + """ + self._manager = C.vision.detection.PaddleDetPreprocessor(config_file) + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + +class NMSOption: + def __init__(self): + self.nms_option = C.vision.detection.NMSOption() + + @property + def background_label(self): + return self.nms_option.background_label + + +class NMSRotatedOption: + def __init__(self): + self.nms_rotated_option = C.vision.detection.NMSRotatedOption() + + @property + def background_label(self): + return self.nms_rotated_option.background_label + + +class PaddleDetPostprocessor: + def __init__(self): + """Create a postprocessor for PaddleDetection Model""" + self._postprocessor = C.vision.detection.PaddleDetPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for PaddleDetection Model + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of ClassifyResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + def apply_nms(self): + self._postprocessor.apply_nms() + + def set_nms_option(self, nms_option=None): + """This function will enable decode and nms in postprocess step.""" + if nms_option is None: + nms_option = NMSOption() + self._postprocessor.set_nms_option(self, nms_option.nms_option) + + def set_nms_rotated_option(self, nms_rotated_option=None): + """This function will enable decode and rotated nms in postprocess step.""" + if nms_rotated_option is None: + nms_rotated_option = NMSRotatedOption() + self._postprocessor.set_nms_rotated_option( + self, nms_rotated_option.nms_rotated_option + ) + + +class PPYOLOE(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PPYOLOE model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ppyoloe/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ppyoloe/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PPYOLOE, self).__init__(runtime_option) + + self._model = C.vision.detection.PPYOLOE( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PPYOLOE model initialize failed." + + def predict(self, im): + """Detect an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: DetectionResult + """ + + assert im is not None, "The input image data is None." + return self._model.predict(im) + + def batch_predict(self, images): + """Detect a batch of input image list + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + def clone(self): + """Clone PPYOLOE object + + :return: a new PPYOLOE object + """ + + class PPYOLOEClone(PPYOLOE): + def __init__(self, model): + self._model = model + + clone_model = PPYOLOEClone(self._model.clone()) + return clone_model + + @property + def preprocessor(self): + """Get PaddleDetPreprocessor object of the loaded model + + :return PaddleDetPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get PaddleDetPostprocessor object of the loaded model + + :return PaddleDetPostprocessor + """ + return self._model.postprocessor + + +class PPYOLO(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PPYOLO model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ppyolo/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ppyolo/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PPYOLO model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PPYOLO( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PPYOLO model initialize failed." + + def clone(self): + """Clone PPYOLO object + + :return: a new PPYOLO object + """ + + class PPYOLOClone(PPYOLO): + def __init__(self, model): + self._model = model + + clone_model = PPYOLOClone(self._model.clone()) + return clone_model + + +class PaddleYOLOX(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a YOLOX model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g yolox/model.pdmodel + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PaddleYOLOX model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PaddleYOLOX( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleYOLOX model initialize failed." + + def clone(self): + """Clone PaddleYOLOX object + + :return: a new PaddleYOLOX object + """ + + class PaddleYOLOXClone(PaddleYOLOX): + def __init__(self, model): + self._model = model + + clone_model = PaddleYOLOXClone(self._model.clone()) + return clone_model + + +class PicoDet(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PicoDet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g picodet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g picodet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + self._model = C.vision.detection.PicoDet( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PicoDet model initialize failed." + + def clone(self): + """Clone PicoDet object + + :return: a new PicoDet object + """ + + class PicoDetClone(PicoDet): + def __init__(self, model): + self._model = model + + clone_model = PicoDetClone(self._model.clone()) + return clone_model + + +class FasterRCNN(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a FasterRCNN model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g fasterrcnn/model.pdmodel + :param params_file: (str)Path of parameters file, e.g fasterrcnn/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "FasterRCNN model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.FasterRCNN( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "FasterRCNN model initialize failed." + + def clone(self): + """Clone FasterRCNN object + + :return: a new FasterRCNN object + """ + + class FasterRCNNClone(FasterRCNN): + def __init__(self, model): + self._model = model + + clone_model = FasterRCNNClone(self._model.clone()) + return clone_model + + +class YOLOv3(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a YOLOv3 model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g yolov3/model.pdmodel + :param params_file: (str)Path of parameters file, e.g yolov3/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "YOLOv3 model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.YOLOv3( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "YOLOv3 model initialize failed." + + def clone(self): + """Clone YOLOv3 object + + :return: a new YOLOv3 object + """ + + class YOLOv3Clone(YOLOv3): + def __init__(self, model): + self._model = model + + clone_model = YOLOv3Clone(self._model.clone()) + return clone_model + + +class SOLOv2(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a SOLOv2 model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g solov2/model.pdmodel + :param params_file: (str)Path of parameters file, e.g solov2/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g solov2/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "SOLOv2 model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.SOLOv2( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "SOLOv2 model initialize failed." + + def clone(self): + """Clone SOLOv2 object + + :return: a new SOLOv2 object + """ + + class SOLOv2Clone(SOLOv2): + def __init__(self, model): + self._model = model + + clone_model = SOLOv2Clone(self._model.clone()) + return clone_model + + +class MaskRCNN(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a MaskRCNN model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g fasterrcnn/model.pdmodel + :param params_file: (str)Path of parameters file, e.g fasterrcnn/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "MaskRCNN model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.MaskRCNN( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "MaskRCNN model initialize failed." + + def batch_predict(self, images): + """Detect a batch of input image list, batch_predict is not supported for maskrcnn now. + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + raise Exception("batch_predict is not supported for MaskRCNN model now.") + + def clone(self): + """Clone MaskRCNN object + + :return: a new MaskRCNN object + """ + + class MaskRCNNClone(MaskRCNN): + def __init__(self, model): + self._model = model + + clone_model = MaskRCNNClone(self._model.clone()) + return clone_model + + +class SSD(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a SSD model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ssd/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ssd/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "SSD model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.SSD( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "SSD model initialize failed." + + def clone(self): + """Clone SSD object + + :return: a new SSD object + """ + + class SSDClone(SSD): + def __init__(self, model): + self._model = model + + clone_model = SSDClone(self._model.clone()) + return clone_model + + +class PaddleYOLOv5(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a YOLOv5 model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g yolov5/model.pdmodel + :param params_file: (str)Path of parameters file, e.g yolov5/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PaddleYOLOv5 model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PaddleYOLOv5( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleYOLOv5 model initialize failed." + + +class PaddleYOLOv6(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a YOLOv6 model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g yolov6/model.pdmodel + :param params_file: (str)Path of parameters file, e.g yolov6/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PaddleYOLOv6 model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PaddleYOLOv6( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleYOLOv6 model initialize failed." + + +class PaddleYOLOv7(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a YOLOv7 model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g yolov7/model.pdmodel + :param params_file: (str)Path of parameters file, e.g yolov7/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PaddleYOLOv7 model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PaddleYOLOv7( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleYOLOv7 model initialize failed." + + +class PaddleYOLOv8(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a YOLOv8 model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g yolov8/model.pdmodel + :param params_file: (str)Path of parameters file, e.g yolov8/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g yolov8/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + self._model = C.vision.detection.PaddleYOLOv8( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleYOLOv8 model initialize failed." + + +class RTMDet(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a RTMDet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g rtmdet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g rtmdet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "RTMDet model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.RTMDet( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "RTMDet model initialize failed." + + +class CascadeRCNN(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a CascadeRCNN model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g cascadercnn/model.pdmodel + :param params_file: (str)Path of parameters file, e.g cascadercnn/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "CascadeRCNN model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.CascadeRCNN( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "CascadeRCNN model initialize failed." + + +class PSSDet(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PSSDet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g pssdet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g pssdet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PSSDet model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PSSDet( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PSSDet model initialize failed." + + +class RetinaNet(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a RetinaNet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g retinanet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g retinanet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "RetinaNet model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.RetinaNet( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "RetinaNet model initialize failed." + + +class PPYOLOESOD(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PPYOLOESOD model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ppyoloesod/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ppyoloesod/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PPYOLOESOD model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PPYOLOESOD( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PPYOLOESOD model initialize failed." + + +class FCOS(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a FCOS model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g fcos/model.pdmodel + :param params_file: (str)Path of parameters file, e.g fcos/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "FCOS model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.FCOS( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "FCOS model initialize failed." + + +class TTFNet(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a TTFNet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ttfnet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ttfnet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "TTFNet model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.TTFNet( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "TTFNet model initialize failed." + + +class TOOD(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a TOOD model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g tood/model.pdmodel + :param params_file: (str)Path of parameters file, e.g tood/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "TOOD model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.TOOD( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "TOOD model initialize failed." + + +class GFL(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a GFL model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g gfl/model.pdmodel + :param params_file: (str)Path of parameters file, e.g gfl/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "GFL model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.GFL( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "GFL model initialize failed." + + +class PaddleDetectionModel(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PaddleDetectionModel model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ppyoloe/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ppyoloe/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PaddleDetectionModel, self).__init__(runtime_option) + + self._model = C.vision.detection.PaddleDetectionModel( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleDetectionModel model initialize failed." + + def predict(self, im): + """Detect an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: DetectionResult + """ + + assert im is not None, "The input image data is None." + return self._model.predict(im) + + def batch_predict(self, images): + """Detect a batch of input image list + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + def clone(self): + """Clone PPYOLOE object + + :return: a new PPYOLOE object + """ + + class PPYOLOEClone(PPYOLOE): + def __init__(self, model): + self._model = model + + clone_model = PPYOLOEClone(self._model.clone()) + return clone_model + + @property + def preprocessor(self): + """Get PaddleDetPreprocessor object of the loaded model + + :return PaddleDetPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get PaddleDetPostprocessor object of the loaded model + + :return PaddleDetPostprocessor + """ + return self._model.postprocessor + + +class PPYOLOER(PPYOLOE): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PPYOLOER model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ppyoloe_r/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ppyoloe_r/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe_r/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + self._model = C.vision.detection.PPYOLOER( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PicoDet model initialize failed." + + def clone(self): + """Clone PPYOLOER object + + :return: a new PPYOLOER object + """ + + class PPYOLOERClone(PPYOLOER): + def __init__(self, model): + self._model = model + + clone_model = PPYOLOERClone(self._model.clone()) + return clone_model diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py new file mode 100755 index 0000000000..3b2bbf6518 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py @@ -0,0 +1,17 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from .classify import eval_classify +from .detection import eval_detection +from .segmentation import eval_segmentation diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py new file mode 100755 index 0000000000..f39cdf5e59 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py @@ -0,0 +1,79 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import os +import re +import time +import collections + + +def topk_accuracy(topk_list, label_list): + match_array = np.logical_or.reduce(topk_list == label_list, axis=1) + topk_acc_score = match_array.sum() / match_array.shape[0] + return topk_acc_score + + +def eval_classify(model, image_file_path, label_file_path, topk=5): + from tqdm import trange + import cv2 + import math + + result_list = [] + label_list = [] + image_label_dict = {} + assert os.path.isdir( + image_file_path + ), "The image_file_path:{} is not a directory.".format(image_file_path) + assert os.path.isfile( + label_file_path + ), "The label_file_path:{} is not a file.".format(label_file_path) + assert isinstance(topk, int), "The tok:{} is not int type".format(topk) + + with open(label_file_path, "r") as file: + lines = file.readlines() + for line in lines: + items = line.strip().split() + image_name = items[0] + label = items[1] + image_label_dict[image_name] = int(label) + images_num = len(image_label_dict) + twenty_percent_images_num = math.ceil(images_num * 0.2) + start_time = 0 + end_time = 0 + average_inference_time = 0 + scores = collections.OrderedDict() + for (image, label), i in zip( + image_label_dict.items(), trange(images_num, desc="Inference Progress") + ): + if i == twenty_percent_images_num: + start_time = time.time() + + label_list.append([label]) + image_path = os.path.join(image_file_path, image) + im = cv2.imread(image_path) + result = model.predict(im, topk) + result_list.append(result.label_ids) + if i == images_num - 1: + end_time = time.time() + average_inference_time = round( + (end_time - start_time) / (images_num - twenty_percent_images_num), 4 + ) + topk_acc_score = topk_accuracy(np.array(result_list), np.array(label_list)) + if topk == 1: + scores.update({"topk1": topk_acc_score}) + scores.update({"topk1_average_inference_time(s)": average_inference_time}) + elif topk == 5: + scores.update({"topk5": topk_acc_score}) + scores.update({"topk5_average_inference_time(s)": average_inference_time}) + return scores diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py new file mode 100755 index 0000000000..00f228762b --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py @@ -0,0 +1,125 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import copy +import collections +import math + + +def eval_detection( + model, + data_dir, + ann_file, + conf_threshold=None, + nms_iou_threshold=None, + plot=False, + batch_size=1, +): + from .utils import CocoDetection + from .utils import COCOMetric + import cv2 + from tqdm import trange + import time + + if conf_threshold is not None or nms_iou_threshold is not None: + assert ( + conf_threshold is not None and nms_iou_threshold is not None + ), "The conf_threshold and nms_iou_threshold should be setted at the same time" + assert isinstance( + conf_threshold, (float, int) + ), "The conf_threshold:{} need to be int or float".format(conf_threshold) + assert isinstance( + nms_iou_threshold, (float, int) + ), "The nms_iou_threshold:{} need to be int or float".format(nms_iou_threshold) + eval_dataset = CocoDetection(data_dir=data_dir, ann_file=ann_file, shuffle=False) + all_image_info = eval_dataset.file_list + image_num = eval_dataset.num_samples + eval_dataset.data_fields = { + "im_id", + "image_shape", + "image", + "gt_bbox", + "gt_class", + "is_crowd", + } + eval_metric = COCOMetric( + coco_gt=copy.deepcopy(eval_dataset.coco_gt), classwise=False + ) + scores = collections.OrderedDict() + twenty_percent_image_num = math.ceil(image_num * 0.2) + start_time = 0 + end_time = 0 + average_inference_time = 0 + im_list = list() + im_id_list = list() + for image_info, i in zip( + all_image_info, trange(image_num, desc="Inference Progress") + ): + if i == twenty_percent_image_num: + start_time = time.time() + im = cv2.imread(image_info["image"]) + im_id = image_info["im_id"] + if batch_size == 1: + if conf_threshold is None and nms_iou_threshold is None: + result = model.predict(im.copy()) + else: + result = model.predict(im, conf_threshold, nms_iou_threshold) + pred = { + "bbox": [ + [c] + [s] + b + for b, s, c in zip(result.boxes, result.scores, result.label_ids) + ], + "bbox_num": len(result.boxes), + "im_id": im_id, + } + eval_metric.update(im_id, pred) + else: + im_list.append(im) + im_id_list.append(im_id) + # If the batch_size is not satisfied, the remaining pictures are formed into a batch + if (i + 1) % batch_size != 0 and i != image_num - 1: + continue + if conf_threshold is None and nms_iou_threshold is None: + results = model.batch_predict(im_list) + else: + model.postprocessor.conf_threshold = conf_threshold + model.postprocessor.nms_threshold = nms_iou_threshold + results = model.batch_predict(im_list) + for k in range(len(im_list)): + pred = { + "bbox": [ + [c] + [s] + b + for b, s, c in zip( + results[k].boxes, results[k].scores, results[k].label_ids + ) + ], + "bbox_num": len(results[k].boxes), + "im_id": im_id_list[k], + } + eval_metric.update(im_id_list[k], pred) + im_list.clear() + im_id_list.clear() + + if i == image_num - 1: + end_time = time.time() + average_inference_time = round( + (end_time - start_time) / (image_num - twenty_percent_image_num), 4 + ) + eval_metric.accumulate() + eval_details = eval_metric.details + scores.update(eval_metric.get()) + scores.update({"average_inference_time(s)": average_inference_time}) + eval_metric.reset() + return scores diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py new file mode 100755 index 0000000000..7cff221dd4 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py @@ -0,0 +1,105 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from tqdm import trange +import numpy as np +import collections +import os +import math +import time + + +def eval_segmentation(model, data_dir, batch_size=1): + import cv2 + from .utils import Cityscapes + from .utils import f1_score, calculate_area, mean_iou, accuracy, kappa + + assert os.path.isdir(data_dir), "The image_file_path:{} is not a directory.".format( + data_dir + ) + eval_dataset = Cityscapes(dataset_root=data_dir, mode="val") + file_list = eval_dataset.file_list + image_num = eval_dataset.num_samples + num_classes = eval_dataset.num_classes + intersect_area_all = 0 + pred_area_all = 0 + label_area_all = 0 + conf_mat_all = [] + twenty_percent_image_num = math.ceil(image_num * 0.2) + start_time = 0 + end_time = 0 + average_inference_time = 0 + im_list = [] + label_list = [] + for image_label_path, i in zip( + file_list, trange(image_num, desc="Inference Progress") + ): + if i == twenty_percent_image_num: + start_time = time.time() + im = cv2.imread(image_label_path[0]) + label = cv2.imread(image_label_path[1], cv2.IMREAD_GRAYSCALE) + label_list.append(label) + if batch_size == 1: + result = model.predict(im) + results = [result] + else: + im_list.append(im) + # If the batch_size is not satisfied, the remaining pictures are formed into a batch + if (i + 1) % batch_size != 0 and i != image_num - 1: + continue + results = model.batch_predict(im_list) + if i == image_num - 1: + end_time = time.time() + average_inference_time = round( + (end_time - start_time) / (image_num - twenty_percent_image_num), 4 + ) + for result, label in zip(results, label_list): + pred = np.array(result.label_map).reshape(result.shape[0], result.shape[1]) + intersect_area, pred_area, label_area = calculate_area( + pred, label, num_classes + ) + intersect_area_all = intersect_area_all + intersect_area + pred_area_all = pred_area_all + pred_area + label_area_all = label_area_all + label_area + im_list.clear() + label_list.clear() + + class_iou, miou = mean_iou(intersect_area_all, pred_area_all, label_area_all) + class_acc, oacc = accuracy(intersect_area_all, pred_area_all) + kappa_res = kappa(intersect_area_all, pred_area_all, label_area_all) + category_f1score = f1_score(intersect_area_all, pred_area_all, label_area_all) + + eval_metrics = collections.OrderedDict( + zip( + [ + "miou", + "category_iou", + "oacc", + "category_acc", + "kappa", + "category_F1-score", + "average_inference_time(s)", + ], + [ + miou, + class_iou, + oacc, + class_acc, + kappa_res, + category_f1score, + average_inference_time, + ], + ) + ) + return eval_metrics diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py new file mode 100755 index 0000000000..7289bbe271 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py @@ -0,0 +1,23 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import fd_logging +from .util import * +from .coco_metrics import * +from .seg_metrics import * +from .json_results import * +from .map_utils import * +from .coco_utils import * +from .coco import * +from .cityscapes import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py new file mode 100755 index 0000000000..bd39335a87 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py @@ -0,0 +1,78 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob +from . import fd_logging as logging + +# import fd_logging as logging + + +class Cityscapes(object): + """ + Cityscapes dataset `https://www.cityscapes-dataset.com/`. + The folder structure is as follow: + + cityscapes + | + |--leftImg8bit + | |--train + | |--val + | |--test + | + |--gtFine + | |--train + | |--val + | |--test + + Args: + dataset_root (str): Cityscapes dataset directory. + """ + + NUM_CLASSES = 19 + + def __init__(self, dataset_root, mode): + self.dataset_root = dataset_root + self.file_list = list() + mode = mode.lower() + self.mode = mode + self.num_classes = self.NUM_CLASSES + self.ignore_index = 255 + + img_dir = os.path.join(self.dataset_root, "leftImg8bit") + label_dir = os.path.join(self.dataset_root, "gtFine") + if ( + self.dataset_root is None + or not os.path.isdir(self.dataset_root) + or not os.path.isdir(img_dir) + or not os.path.isdir(label_dir) + ): + raise ValueError( + "The dataset is not Found or the folder structure is nonconfoumance." + ) + + label_files = sorted( + glob.glob(os.path.join(label_dir, mode, "*", "*_gtFine_labelTrainIds.png")) + ) + img_files = sorted( + glob.glob(os.path.join(img_dir, mode, "*", "*_leftImg8bit.png")) + ) + + self.file_list = [ + [img_path, label_path] + for img_path, label_path in zip(img_files, label_files) + ] + + self.num_samples = len(self.file_list) + logging.info("{} samples in file {}".format(self.num_samples, img_dir)) diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py new file mode 100755 index 0000000000..e4b702cb46 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py @@ -0,0 +1,176 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import copy +import os.path as osp +import sys +import numpy as np +from . import fd_logging as logging +from .util import is_pic, get_num_workers + + +class CocoDetection(object): + """读取MSCOCO格式的检测数据集,并对样本进行相应的处理,该格式的数据集同样可以应用到实例分割模型的训练中。 + + Args: + data_dir (str): 数据集所在的目录路径。 + ann_file (str): 数据集的标注文件,为一个独立的json格式文件。 + num_workers (int|str): 数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据 + 系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。 + shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。 + allow_empty (bool): 是否加载负样本。默认为False。 + empty_ratio (float): 用于指定负样本占总样本数的比例。如果小于0或大于等于1,则保留全部的负样本。默认为1。 + """ + + def __init__( + self, + data_dir, + ann_file, + num_workers="auto", + shuffle=False, + allow_empty=False, + empty_ratio=1.0, + ): + + from pycocotools.coco import COCO + + self.data_dir = data_dir + self.data_fields = None + self.num_max_boxes = 1000 + self.num_workers = get_num_workers(num_workers) + self.shuffle = shuffle + self.allow_empty = allow_empty + self.empty_ratio = empty_ratio + self.file_list = list() + neg_file_list = list() + self.labels = list() + + coco = COCO(ann_file) + self.coco_gt = coco + img_ids = sorted(coco.getImgIds()) + cat_ids = coco.getCatIds() + catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)}) + cname2clsid = dict( + { + coco.loadCats(catid)[0]["name"]: clsid + for catid, clsid in catid2clsid.items() + } + ) + for label, cid in sorted(cname2clsid.items(), key=lambda d: d[1]): + self.labels.append(label) + logging.info("Starting to read file list from dataset...") + + ct = 0 + for img_id in img_ids: + is_empty = False + img_anno = coco.loadImgs(img_id)[0] + im_fname = osp.join(data_dir, img_anno["file_name"]) + if not is_pic(im_fname): + continue + im_w = float(img_anno["width"]) + im_h = float(img_anno["height"]) + ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) + instances = coco.loadAnns(ins_anno_ids) + + bboxes = [] + for inst in instances: + x, y, box_w, box_h = inst["bbox"] + x1 = max(0, x) + y1 = max(0, y) + x2 = min(im_w - 1, x1 + max(0, box_w)) + y2 = min(im_h - 1, y1 + max(0, box_h)) + if inst["area"] > 0 and x2 >= x1 and y2 >= y1: + inst["clean_bbox"] = [x1, y1, x2, y2] + bboxes.append(inst) + else: + logging.warning( + "Found an invalid bbox in annotations: " + "im_id: {}, area: {} x1: {}, y1: {}, x2: {}, y2: {}.".format( + img_id, float(inst["area"]), x1, y1, x2, y2 + ) + ) + num_bbox = len(bboxes) + if num_bbox == 0 and not self.allow_empty: + continue + elif num_bbox == 0: + is_empty = True + + gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) + gt_class = np.zeros((num_bbox, 1), dtype=np.int32) + gt_score = np.ones((num_bbox, 1), dtype=np.float32) + is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) + difficult = np.zeros((num_bbox, 1), dtype=np.int32) + gt_poly = [None] * num_bbox + + has_segmentation = False + for i, box in reversed(list(enumerate(bboxes))): + catid = box["category_id"] + gt_class[i][0] = catid2clsid[catid] + gt_bbox[i, :] = box["clean_bbox"] + is_crowd[i][0] = box["iscrowd"] + if "segmentation" in box and box["iscrowd"] == 1: + gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] + elif "segmentation" in box and box["segmentation"]: + if ( + not np.array(box["segmentation"], dtype=object).size > 0 + and not self.allow_empty + ): + gt_poly.pop(i) + is_crowd = np.delete(is_crowd, i) + gt_class = np.delete(gt_class, i) + gt_bbox = np.delete(gt_bbox, i) + else: + gt_poly[i] = box["segmentation"] + has_segmentation = True + if has_segmentation and not any(gt_poly) and not self.allow_empty: + continue + + im_info = { + "im_id": np.array([img_id]).astype("int32"), + "image_shape": np.array([im_h, im_w]).astype("int32"), + } + label_info = { + "is_crowd": is_crowd, + "gt_class": gt_class, + "gt_bbox": gt_bbox, + "gt_score": gt_score, + "gt_poly": gt_poly, + "difficult": difficult, + } + + if is_empty: + neg_file_list.append({"image": im_fname, **im_info, **label_info}) + else: + self.file_list.append({"image": im_fname, **im_info, **label_info}) + ct += 1 + + self.num_max_boxes = max(self.num_max_boxes, len(instances)) + + if not ct: + logging.error("No coco record found in %s' % (ann_file)", exit=True) + self.pos_num = len(self.file_list) + if self.allow_empty and neg_file_list: + self.file_list += self._sample_empty(neg_file_list) + logging.info( + "{} samples in file {}, including {} positive samples and {} negative samples.".format( + len(self.file_list), + ann_file, + self.pos_num, + len(self.file_list) - self.pos_num, + ) + ) + self.num_samples = len(self.file_list) + + self._epoch = 0 diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py new file mode 100755 index 0000000000..18dfdc3bfa --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py @@ -0,0 +1,90 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import sys +from collections import OrderedDict +from .coco_utils import get_infer_results, cocoapi_eval + + +class COCOMetric(object): + def __init__(self, coco_gt, **kwargs): + self.clsid2catid = { + i: cat["id"] for i, cat in enumerate(coco_gt.loadCats(coco_gt.getCatIds())) + } + self.coco_gt = coco_gt + self.classwise = kwargs.get("classwise", False) + self.bias = 0 + self.reset() + + def reset(self): + # only bbox and mask evaluation support currently + self.details = { + "gt": copy.deepcopy(self.coco_gt.dataset), + "bbox": [], + "mask": [], + } + self.eval_stats = {} + + def update(self, im_id, outputs): + outs = {} + # outputs Tensor -> numpy.ndarray + for k, v in outputs.items(): + outs[k] = v + + outs["im_id"] = im_id + infer_results = get_infer_results(outs, self.clsid2catid, bias=self.bias) + self.details["bbox"] += infer_results["bbox"] if "bbox" in infer_results else [] + self.details["mask"] += infer_results["mask"] if "mask" in infer_results else [] + + def accumulate(self): + if len(self.details["bbox"]) > 0: + bbox_stats = cocoapi_eval( + copy.deepcopy(self.details["bbox"]), + "bbox", + coco_gt=self.coco_gt, + classwise=self.classwise, + ) + self.eval_stats["bbox"] = bbox_stats + sys.stdout.flush() + + if len(self.details["mask"]) > 0: + seg_stats = cocoapi_eval( + copy.deepcopy(self.details["mask"]), + "segm", + coco_gt=self.coco_gt, + classwise=self.classwise, + ) + self.eval_stats["mask"] = seg_stats + sys.stdout.flush() + + def log(self): + pass + + def get(self): + if "bbox" not in self.eval_stats: + return {"bbox_mmap": 0.0} + if "mask" in self.eval_stats: + return OrderedDict( + zip( + ["bbox_mmap", "segm_mmap"], + [self.eval_stats["bbox"][0], self.eval_stats["mask"][0]], + ) + ) + else: + return {"bbox_mmap": self.eval_stats["bbox"][0]} diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py new file mode 100755 index 0000000000..aeedac7500 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py @@ -0,0 +1,233 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import numpy as np +from .map_utils import draw_pr_curve +from .json_results import ( + get_det_res, + get_det_poly_res, + get_seg_res, + get_solov2_segm_res, +) +from . import fd_logging as logging +import copy + + +def loadRes(coco_obj, anns): + """ + Load result file and return a result api object. + :param resFile (str) : file name of result file + :return: res (obj) : result api object + """ + + # This function has the same functionality as pycocotools.COCO.loadRes, + # except that the input anns is list of results rather than a json file. + # Refer to + # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/coco.py#L305, + + # matplotlib.use() must be called *before* pylab, matplotlib.pyplot, + # or matplotlib.backends is imported for the first time + # pycocotools import matplotlib + import matplotlib + + matplotlib.use("Agg") + from pycocotools.coco import COCO + import pycocotools.mask as maskUtils + import time + + res = COCO() + res.dataset["images"] = [img for img in coco_obj.dataset["images"]] + + tic = time.time() + assert isinstance(anns) == list, "results in not an array of objects" + annsImgIds = [ann["image_id"] for ann in anns] + assert set(annsImgIds) == ( + set(annsImgIds) & set(coco_obj.getImgIds()) + ), "Results do not correspond to current coco set" + if "caption" in anns[0]: + imgIds = set([img["id"] for img in res.dataset["images"]]) & set( + [ann["image_id"] for ann in anns] + ) + res.dataset["images"] = [ + img for img in res.dataset["images"] if img["id"] in imgIds + ] + for id, ann in enumerate(anns): + ann["id"] = id + 1 + elif "bbox" in anns[0] and not anns[0]["bbox"] == []: + res.dataset["categories"] = copy.deepcopy(coco_obj.dataset["categories"]) + for id, ann in enumerate(anns): + bb = ann["bbox"] + x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] + if not "segmentation" in ann: + ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + ann["area"] = bb[2] * bb[3] + ann["id"] = id + 1 + ann["iscrowd"] = 0 + elif "segmentation" in anns[0]: + res.dataset["categories"] = copy.deepcopy(coco_obj.dataset["categories"]) + for id, ann in enumerate(anns): + # now only support compressed RLE format as segmentation results + ann["area"] = maskUtils.area(ann["segmentation"]) + if not "bbox" in ann: + ann["bbox"] = maskUtils.toBbox(ann["segmentation"]) + ann["id"] = id + 1 + ann["iscrowd"] = 0 + elif "keypoints" in anns[0]: + res.dataset["categories"] = copy.deepcopy(coco_obj.dataset["categories"]) + for id, ann in enumerate(anns): + s = ann["keypoints"] + x = s[0::3] + y = s[1::3] + x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) + ann["area"] = (x1 - x0) * (y1 - y0) + ann["id"] = id + 1 + ann["bbox"] = [x0, y0, x1 - x0, y1 - y0] + + res.dataset["annotations"] = anns + res.createIndex() + return res + + +def get_infer_results(outs, catid, bias=0): + """ + Get result at the stage of inference. + The output format is dictionary containing bbox or mask result. + + For example, bbox result is a list and each element contains + image_id, category_id, bbox and score. + """ + if outs is None or len(outs) == 0: + raise ValueError( + "The number of valid detection result if zero. Please use reasonable model and check input data." + ) + + im_id = outs["im_id"] + + infer_res = {} + if "bbox" in outs: + if len(outs["bbox"]) > 0 and len(outs["bbox"][0]) > 6: + infer_res["bbox"] = get_det_poly_res( + outs["bbox"], outs["bbox_num"], im_id, catid, bias=bias + ) + else: + infer_res["bbox"] = get_det_res( + outs["bbox"], outs["bbox_num"], im_id, catid, bias=bias + ) + + if "mask" in outs: + # mask post process + infer_res["mask"] = get_seg_res( + outs["mask"], outs["bbox"], outs["bbox_num"], im_id, catid + ) + + if "segm" in outs: + infer_res["segm"] = get_solov2_segm_res(outs, im_id, catid) + + return infer_res + + +def cocoapi_eval( + anns, + style, + coco_gt=None, + anno_file=None, + max_dets=(100, 300, 1000), + classwise=False, +): + """ + Args: + anns: Evaluation result. + style (str): COCOeval style, can be `bbox` , `segm` and `proposal`. + coco_gt (str): Whether to load COCOAPI through anno_file, + eg: coco_gt = COCO(anno_file) + anno_file (str): COCO annotations file. + max_dets (tuple): COCO evaluation maxDets. + classwise (bool): Whether per-category AP and draw P-R Curve or not. + """ + assert coco_gt is not None or anno_file is not None + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + + if coco_gt is None: + coco_gt = COCO(anno_file) + logging.info("Start evaluate...") + coco_dt = loadRes(coco_gt, anns) + if style == "proposal": + coco_eval = COCOeval(coco_gt, coco_dt, "bbox") + coco_eval.params.useCats = 0 + coco_eval.params.maxDets = list(max_dets) + else: + coco_eval = COCOeval(coco_gt, coco_dt, style) + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + if classwise: + # Compute per-category AP and PR curve + try: + from terminaltables import AsciiTable + except Exception as e: + logging.error( + "terminaltables not found, plaese install terminaltables. " + "for example: `pip install terminaltables`." + ) + raise e + precisions = coco_eval.eval["precision"] + cat_ids = coco_gt.getCatIds() + # precision: (iou, recall, cls, area range, max dets) + assert len(cat_ids) == precisions.shape[2] + results_per_category = [] + for idx, catId in enumerate(cat_ids): + # area range index 0: all area ranges + # max dets index -1: typically 100 per image + nm = coco_gt.loadCats(catId)[0] + precision = precisions[:, :, idx, 0, -1] + precision = precision[precision > -1] + if precision.size: + ap = np.mean(precision) + else: + ap = float("nan") + results_per_category.append((str(nm["name"]), "{:0.3f}".format(float(ap)))) + pr_array = precisions[0, :, idx, 0, 2] + recall_array = np.arange(0.0, 1.01, 0.01) + draw_pr_curve( + pr_array, + recall_array, + out_dir=style + "_pr_curve", + file_name="{}_precision_recall_curve.jpg".format(nm["name"]), + ) + + num_columns = min(6, len(results_per_category) * 2) + + import itertools + + results_flatten = list(itertools.chain(*results_per_category)) + headers = ["category", "AP"] * (num_columns // 2) + results_2d = itertools.zip_longest( + *[results_flatten[i::num_columns] for i in range(num_columns)] + ) + table_data = [headers] + table_data += [result for result in results_2d] + table = AsciiTable(table_data) + logging.info("Per-category of {} AP: \n{}".format(style, table.table)) + logging.info( + "per-category PR curve has output to {} folder.".format(style + "_pr_curve") + ) + # flush coco evaluation result + sys.stdout.flush() + return coco_eval.stats diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py new file mode 100755 index 0000000000..7f8b27444f --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py @@ -0,0 +1,61 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import os +import sys +import colorama +from colorama import init + +init(autoreset=True) +levels = {0: "ERROR", 1: "WARNING", 2: "INFO", 3: "DEBUG"} + + +def log(level=2, message="", use_color=False): + current_time = time.time() + time_array = time.localtime(current_time) + current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) + if use_color: + print( + "\033[1;31;40m{} [{}]\t{}\033[0m".format( + current_time, levels[level], message + ) + .encode("utf-8") + .decode("latin1") + ) + else: + print( + "{} [{}]\t{}".format(current_time, levels[level], message) + .encode("utf-8") + .decode("latin1") + ) + sys.stdout.flush() + + +def debug(message="", use_color=False): + log(level=3, message=message, use_color=use_color) + + +def info(message="", use_color=False): + log(level=2, message=message, use_color=use_color) + + +def warning(message="", use_color=True): + log(level=1, message=message, use_color=use_color) + + +def error(message="", use_color=True, exit=True): + log(level=0, message=message, use_color=use_color) + if exit: + sys.exit(-1) diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py new file mode 100755 index 0000000000..3633cbaf08 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py @@ -0,0 +1,162 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import numpy as np + + +def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0): + det_res = [] + for i in range(bbox_nums): + cur_image_id = int(image_id) + dt = bboxes[i] + num_id, score, xmin, ymin, xmax, ymax = dt + if int(num_id) < 0: + continue + category_id = label_to_cat_id_map[int(num_id)] + w = xmax - xmin + bias + h = ymax - ymin + bias + bbox = [xmin, ymin, w, h] + dt_res = { + "image_id": cur_image_id, + "category_id": category_id, + "bbox": bbox, + "score": score, + } + det_res.append(dt_res) + return det_res + + +def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0): + det_res = [] + k = 0 + for i in range(len(bbox_nums)): + cur_image_id = int(image_id[i][0]) + det_nums = bbox_nums[i] + for j in range(det_nums): + dt = bboxes[k] + k = k + 1 + num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist() + if int(num_id) < 0: + continue + category_id = label_to_cat_id_map[int(num_id)] + rbox = [x1, y1, x2, y2, x3, y3, x4, y4] + dt_res = { + "image_id": cur_image_id, + "category_id": category_id, + "bbox": rbox, + "score": score, + } + det_res.append(dt_res) + return det_res + + +def strip_mask(mask): + row = mask[0, 0, :] + col = mask[0, :, 0] + im_h = len(col) - np.count_nonzero(col == -1) + im_w = len(row) - np.count_nonzero(row == -1) + return mask[:, :im_h, :im_w] + + +def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map): + import pycocotools.mask as mask_util + + seg_res = [] + k = 0 + for i in range(len(mask_nums)): + cur_image_id = int(image_id[i][0]) + det_nums = mask_nums[i] + mask_i = masks[k : k + det_nums] + mask_i = strip_mask(mask_i) + for j in range(det_nums): + mask = mask_i[j].astype(np.uint8) + score = float(bboxes[k][1]) + label = int(bboxes[k][0]) + k = k + 1 + if label == -1: + continue + cat_id = label_to_cat_id_map[label] + rle = mask_util.encode( + np.array(mask[:, :, None], order="F", dtype="uint8") + )[0] + if six.PY3: + if "counts" in rle: + rle["counts"] = rle["counts"].decode("utf8") + sg_res = { + "image_id": cur_image_id, + "category_id": cat_id, + "segmentation": rle, + "score": score, + } + seg_res.append(sg_res) + return seg_res + + +def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map): + import pycocotools.mask as mask_util + + segm_res = [] + # for each batch + segms = results["segm"].astype(np.uint8) + clsid_labels = results["cate_label"] + clsid_scores = results["cate_score"] + lengths = segms.shape[0] + im_id = int(image_id[0][0]) + if lengths == 0 or segms is None: + return None + # for each sample + for i in range(lengths - 1): + clsid = int(clsid_labels[i]) + catid = num_id_to_cat_id_map[clsid] + score = float(clsid_scores[i]) + mask = segms[i] + segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order="F"))[0] + segm["counts"] = segm["counts"].decode("utf8") + coco_res = { + "image_id": im_id, + "category_id": catid, + "segmentation": segm, + "score": score, + } + segm_res.append(coco_res) + return segm_res + + +def get_keypoint_res(results, im_id): + anns = [] + preds = results["keypoint"] + for idx in range(im_id.shape[0]): + image_id = im_id[idx].item() + kpts, scores = preds[idx] + for kpt, score in zip(kpts, scores): + kpt = kpt.flatten() + ann = { + "image_id": image_id, + "category_id": 1, # XXX hard code + "keypoints": kpt.tolist(), + "score": float(score), + } + x = kpt[0::3] + y = kpt[1::3] + x0, x1, y0, y1 = ( + np.min(x).item(), + np.max(x).item(), + np.min(y).item(), + np.max(y).item(), + ) + ann["area"] = (x1 - x0) * (y1 - y0) + ann["bbox"] = [x0, y0, x1 - x0, y1 - y0] + anns.append(ann) + return anns diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py new file mode 100755 index 0000000000..28d9c56eeb --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py @@ -0,0 +1,42 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import os + + +def draw_pr_curve( + precision, + recall, + iou=0.5, + out_dir="pr_curve", + file_name="precision_recall_curve.jpg", +): + if not os.path.exists(out_dir): + os.makedirs(out_dir) + output_path = os.path.join(out_dir, file_name) + try: + import matplotlib.pyplot as plt + except Exception as e: + # logger.error('Matplotlib not found, plaese install matplotlib.' + # 'for example: `pip install matplotlib`.') + raise e + plt.cla() + plt.figure("P-R Curve") + plt.title("Precision/Recall Curve(IoU={})".format(iou)) + plt.xlabel("Recall") + plt.ylabel("Precision") + plt.grid(True) + plt.plot(recall, precision) + plt.savefig(output_path) diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py new file mode 100755 index 0000000000..8dbc2412ca --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py @@ -0,0 +1,144 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + + +def f1_score(intersect_area, pred_area, label_area): + class_f1_sco = [] + for i in range(len(intersect_area)): + if pred_area[i] + label_area[i] == 0: + f1_sco = 0 + elif pred_area[i] == 0: + f1_sco = 0 + else: + prec = intersect_area[i] / pred_area[i] + rec = intersect_area[i] / label_area[i] + f1_sco = 2 * prec * rec / (prec + rec) + class_f1_sco.append(f1_sco) + return np.array(class_f1_sco) + + +def calculate_area(pred, label, num_classes, ignore_index=255): + """ + Calculate intersect, prediction and label area + + Args: + pred (np.ndarray): The prediction by model. + label (np.ndarray): The ground truth of image. + num_classes (int): The unique number of target classes. + ignore_index (int): Specifies a target value that is ignored. Default: 255. + + Returns: + Numpy Array: The intersection area of prediction and the ground on all class. + Numpy Array: The prediction area on all class. + Numpy Array: The ground truth area on all class + """ + if not pred.shape == label.shape: + raise ValueError( + "Shape of `pred` and `label should be equal, " + "but there are {} and {}.".format(pred.shape, label.shape) + ) + + mask = label != ignore_index + pred = pred + 1 + label = label + 1 + pred = pred * mask + label = label * mask + pred = np.eye(num_classes + 1)[pred] + label = np.eye(num_classes + 1)[label] + pred = pred[:, 1:] + label = label[:, 1:] + + pred_area = [] + label_area = [] + intersect_area = [] + + for i in range(num_classes): + pred_i = pred[:, :, i] + label_i = label[:, :, i] + pred_area_i = np.sum(pred_i) + label_area_i = np.sum(label_i) + intersect_area_i = np.sum(pred_i * label_i) + pred_area.append(pred_area_i) + label_area.append(label_area_i) + intersect_area.append(intersect_area_i) + return np.array(intersect_area), np.array(pred_area), np.array(label_area) + + +def mean_iou(intersect_area, pred_area, label_area): + """ + Calculate iou. + + Args: + intersect_area (np.ndarray): The intersection area of prediction and ground truth on all classes. + pred_area (np.ndarray): The prediction area on all classes. + label_area (np.ndarray): The ground truth area on all classes. + + Returns: + np.ndarray: iou on all classes. + float: mean iou of all classes. + """ + union = pred_area + label_area - intersect_area + class_iou = [] + for i in range(len(intersect_area)): + if union[i] == 0: + iou = 0 + else: + iou = intersect_area[i] / union[i] + class_iou.append(iou) + miou = np.mean(class_iou) + return np.array(class_iou), miou + + +def accuracy(intersect_area, pred_area): + """ + Calculate accuracy + + Args: + intersect_area (np.ndarray): The intersection area of prediction and ground truth on all classes.. + pred_area (np.ndarray): The prediction area on all classes. + + Returns: + np.ndarray: accuracy on all classes. + float: mean accuracy. + """ + class_acc = [] + for i in range(len(intersect_area)): + if pred_area[i] == 0: + acc = 0 + else: + acc = intersect_area[i] / pred_area[i] + class_acc.append(acc) + macc = np.sum(intersect_area) / np.sum(pred_area) + return np.array(class_acc), macc + + +def kappa(intersect_area, pred_area, label_area): + """ + Calculate kappa coefficient + + Args: + intersect_area (np.ndarray): The intersection area of prediction and ground truth on all classes.. + pred_area (np.ndarray): The prediction area on all classes. + label_area (np.ndarray): The ground truth area on all classes. + + Returns: + float: kappa coefficient. + """ + total_area = np.sum(label_area) + po = np.sum(intersect_area) / total_area + pe = np.sum(pred_area * label_area) / (total_area * total_area) + kappa = (po - pe) / (1 - pe) + return kappa diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py new file mode 100755 index 0000000000..8ecabb98cb --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py @@ -0,0 +1,34 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import platform +import multiprocessing as mp + + +def is_pic(img_name): + valid_suffix = ["JPEG", "jpeg", "JPG", "jpg", "BMP", "bmp", "PNG", "png"] + suffix = img_name.split(".")[-1] + if suffix not in valid_suffix: + return False + return True + + +def get_num_workers(num_workers): + if not platform.system() == "Linux": + # Dataloader with multi-process model is not supported + # on MacOS and Windows currently. + return 0 + if num_workers == "auto": + num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 2 else 2 + return num_workers diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py new file mode 100755 index 0000000000..95fb7ec058 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py @@ -0,0 +1,18 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib.pfld import PFLD +from .contrib.pipnet import PIPNet +from .contrib.face_landmark_1000 import FaceLandmark1000 diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py new file mode 100755 index 0000000000..373847dd43 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py @@ -0,0 +1,76 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class FaceLandmark1000(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a face alignment model exported by FaceLandmark1000. + + :param model_file: (str)Path of model file, e.g ./FaceLandmark1000.onnx + :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX + """ + + super(FaceLandmark1000, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.ONNX + ), "FaceLandmark1000 only support model format of ModelFormat.ONNX now." + self._model = C.vision.facealign.FaceLandmark1000( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "FaceLandmark1000 initialize failed." + + def predict(self, input_image): + """Detect an input image landmarks + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: FaceAlignmentResult + """ + + return self._model.predict(input_image) + + @property + def size(self): + """ + Returns the preprocess image size, default (128, 128) + """ + return self._model.size + + @size.setter + def size(self, wh): + """ + Set the preprocess image size, default (128, 128) + """ + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py new file mode 100755 index 0000000000..b11d272de9 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py @@ -0,0 +1,76 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class PFLD(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a face alignment model exported by PFLD. + + :param model_file: (str)Path of model file, e.g pfld/pfld-106-v3.onnx + :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX + """ + + super(PFLD, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.ONNX + ), "PFLD only support model format of ModelFormat.ONNX now." + self._model = C.vision.facealign.PFLD( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "PFLD initialize failed." + + def predict(self, input_image): + """Detect an input image landmarks + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: FaceAlignmentResult + """ + + return self._model.predict(input_image) + + @property + def size(self): + """ + Returns the preprocess image size, default (112, 112) + """ + return self._model.size + + @size.setter + def size(self, wh): + """ + Set the preprocess image size, default (112, 112) + """ + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py new file mode 100755 index 0000000000..28e7d0b280 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py @@ -0,0 +1,118 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class PIPNet(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a face alignment model exported by PIPNet. + + :param model_file: (str)Path of model file, e.g ./PIPNet.onnx + :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX + """ + + super(PIPNet, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.ONNX + ), "PIPNet only support model format of ModelFormat.ONNX now." + self._model = C.vision.facealign.PIPNet( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "PIPNet initialize failed." + + def predict(self, input_image): + """Detect an input image landmarks + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: FaceAlignmentResult + """ + + return self._model.predict(input_image) + + @property + def size(self): + """ + Returns the preprocess image size, default (256, 256) + """ + return self._model.size + + @property + def mean_vals(self): + """ + Returns the mean value of normlization, default mean_vals = [0.485f, 0.456f, 0.406f]; + """ + return self._model.mean_vals + + @property + def std_vals(self): + """ + Returns the std value of normlization, default std_vals = [0.229f, 0.224f, 0.225f]; + """ + return self._model.std_vals + + @property + def num_landmarks(self): + """ + Returns the number of landmarks + """ + return self._model.num_landmarks + + @size.setter + def size(self, wh): + """ + Set the preprocess image size, default (256, 256) + """ + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @mean_vals.setter + def mean_vals(self, value): + assert isinstance( + value, list + ), "The value to set `mean_vals` must be type of list." + self._model.mean_vals = value + + @std_vals.setter + def std_vals(self, value): + assert isinstance( + value, list + ), "The value to set `std_vals` must be type of list." + self._model.std_vals = value + + @num_landmarks.setter + def num_landmarks(self, value): + assert isinstance( + value, int + ), "The value to set `std_vals` must be type of int." + self._model.num_landmarks = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py new file mode 100755 index 0000000000..d60a5ee076 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py @@ -0,0 +1,22 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib.yolov5face import YOLOv5Face +from .contrib.yolov7face import * +from .contrib.centerface import * +from .contrib.blazeface import * +from .contrib.retinaface import RetinaFace +from .contrib.scrfd import SCRFD +from .contrib.ultraface import UltraFace diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py new file mode 100755 index 0000000000..00ce75b825 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py @@ -0,0 +1,146 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class BlazeFacePreprocessor: + def __init__(self): + """Create a preprocessor for BlazeFace""" + self._preprocessor = C.vision.facedet.BlazeFacePreprocessor() + + def run(self, input_ims): + """Preprocess input images for BlazeFace + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def is_scale_(self): + """ + is_scale_ for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true + """ + return self._preprocessor.is_scale_ + + @is_scale_.setter + def is_scale_(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_` must be type of bool." + self._preprocessor.is_scale_ = value + + +class BlazeFacePostprocessor: + def __init__(self): + """Create a postprocessor for BlazeFace""" + self._postprocessor = C.vision.facedet.BlazeFacePostprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for BlazeFace + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.3 + """ + return self._postprocessor.nms_threshold + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + +class BlazeFace(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + config_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a BlazeFace model exported by BlazeFace. + + :param model_file: (str)Path of model file, e.g ./Blazeface.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(BlazeFace, self).__init__(runtime_option) + + self._model = C.vision.facedet.BlazeFace( + model_file, params_file, config_file, self._runtime_option, model_format + ) + + assert self.initialized, "BlazeFace initialize failed." + + def predict(self, input_image): + """Detect the location and key points of human faces from an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: FaceDetectionResult + """ + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of FaceDetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get BlazefacePreprocessor object of the loaded model + + :return BlazefacePreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get BlazefacePostprocessor object of the loaded model + + :return BlazefacePostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py new file mode 100755 index 0000000000..6bda7fb666 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py @@ -0,0 +1,150 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class CenterFacePreprocessor: + def __init__(self): + """Create a preprocessor for CenterFace""" + self._preprocessor = C.vision.facedet.CenterFacePreprocessor() + + def run(self, input_ims): + """Preprocess input images for CenterFace + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + +class CenterFacePostprocessor: + def __init__(self): + """Create a postprocessor for CenterFace""" + self._postprocessor = C.vision.facedet.CenterFacePostprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for CenterFace + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.3 + """ + return self._postprocessor.nms_threshold + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + +class CenterFace(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a CenterFace model exported by CenterFace. + + :param model_file: (str)Path of model file, e.g ./CenterFace.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(CenterFace, self).__init__(runtime_option) + + self._model = C.vision.facedet.CenterFace( + model_file, params_file, self._runtime_option, model_format + ) + + assert self.initialized, "CenterFace initialize failed." + + def predict(self, input_image): + """Detect the location and key points of human faces from an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: FaceDetectionResult + """ + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get CenterFacePreprocessor object of the loaded model + + :return CenterFacePreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get CenterFacePostprocessor object of the loaded model + + :return CenterFacePostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py new file mode 100755 index 0000000000..f3e72cfb0d --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py @@ -0,0 +1,134 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class RetinaFace(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a RetinaFace model exported by RetinaFace. + + :param model_file: (str)Path of model file, e.g ./retinaface.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(RetinaFace, self).__init__(runtime_option) + + self._model = C.vision.facedet.RetinaFace( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "RetinaFace initialize failed." + + def predict(self, input_image, conf_threshold=0.7, nms_iou_threshold=0.3): + """Detect the location and key points of human faces from an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.7 + :param nms_iou_threshold: iou threashold for NMS, default is 0.3 + :return: FaceDetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [640, 480]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default (640, 640) + """ + return self._model.size + + @property + def variance(self): + """ + Argument for image postprocessing step, variance in RetinaFace's prior-box(anchor) generate process, default (0.1, 0.2) + """ + return self._model.variance + + @property + def downsample_strides(self): + """ + Argument for image postprocessing step, downsample strides (namely, steps) for RetinaFace to generate anchors, will take (8,16,32) as default values + """ + return self._model.downsample_strides + + @property + def min_sizes(self): + """ + Argument for image postprocessing step, min sizes, width and height for each anchor, default min_sizes = [[16, 32], [64, 128], [256, 512]] + """ + return self._model.min_sizes + + @property + def landmarks_per_face(self): + """ + Argument for image postprocessing step, landmarks_per_face, default 5 in RetinaFace + """ + return self._model.landmarks_per_face + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @variance.setter + def variance(self, value): + assert isinstance( + value, (list, tuple) + ), "The value to set `variance` must be type of tuple or list." + assert ( + len(value) == 2 + ), "The value to set `variance` must contatins 2 elements".format(len(value)) + self._model.variance = value + + @downsample_strides.setter + def downsample_strides(self, value): + assert isinstance( + value, list + ), "The value to set `downsample_strides` must be type of list." + self._model.downsample_strides = value + + @min_sizes.setter + def min_sizes(self, value): + assert isinstance( + value, list + ), "The value to set `min_sizes` must be type of list." + self._model.min_sizes = value + + @landmarks_per_face.setter + def landmarks_per_face(self, value): + assert isinstance( + value, int + ), "The value to set `landmarks_per_face` must be type of int." + self._model.landmarks_per_face = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py new file mode 100755 index 0000000000..f6d39f40a4 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py @@ -0,0 +1,216 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class SCRFD(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a SCRFD model exported by SCRFD. + + :param model_file: (str)Path of model file, e.g ./scrfd.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(SCRFD, self).__init__(runtime_option) + + self._model = C.vision.facedet.SCRFD( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "SCRFD initialize failed." + + def predict(self, input_image, conf_threshold=0.7, nms_iou_threshold=0.3): + """Detect the location and key points of human faces from an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.7 + :param nms_iou_threshold: iou threashold for NMS, default is 0.3 + :return: FaceDetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._model.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._model.disable_permute() + + # 一些跟SCRFD模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [640, 640]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default (640, 640) + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @property + def downsample_strides(self): + """ + Argument for image postprocessing step, + downsample strides (namely, steps) for SCRFD to generate anchors, + will take (8,16,32) as default values + """ + return self._model.downsample_strides + + @property + def landmarks_per_face(self): + """ + Argument for image postprocessing step, landmarks_per_face, default 5 in SCRFD + """ + return self._model.landmarks_per_face + + @property + def use_kps(self): + """ + Argument for image postprocessing step, + the outputs of onnx file with key points features or not, default true + """ + return self._model.use_kps + + @property + def max_nms(self): + """ + Argument for image postprocessing step, the upperbond number of boxes processed by nms, default 30000 + """ + return self._model.max_nms + + @property + def num_anchors(self): + """ + Argument for image postprocessing step, anchor number of each stride, default 2 + """ + return self._model.num_anchors + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value + + @downsample_strides.setter + def downsample_strides(self, value): + assert isinstance( + value, list + ), "The value to set `downsample_strides` must be type of list." + self._model.downsample_strides = value + + @landmarks_per_face.setter + def landmarks_per_face(self, value): + assert isinstance( + value, int + ), "The value to set `landmarks_per_face` must be type of int." + self._model.landmarks_per_face = value + + @use_kps.setter + def use_kps(self, value): + assert isinstance( + value, bool + ), "The value to set `use_kps` must be type of bool." + self._model.use_kps = value + + @max_nms.setter + def max_nms(self, value): + assert isinstance(value, int), "The value to set `max_nms` must be type of int." + self._model.max_nms = value + + @num_anchors.setter + def num_anchors(self, value): + assert isinstance( + value, int + ), "The value to set `num_anchors` must be type of int." + self._model.num_anchors = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py new file mode 100755 index 0000000000..48c4f9b034 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py @@ -0,0 +1,75 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class UltraFace(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a UltraFace model exported by UltraFace. + + :param model_file: (str)Path of model file, e.g ./ultraface.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(UltraFace, self).__init__(runtime_option) + + self._model = C.vision.facedet.UltraFace( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "UltraFace initialize failed." + + def predict(self, input_image, conf_threshold=0.7, nms_iou_threshold=0.3): + """Detect the location and key points of human faces from an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.7 + :param nms_iou_threshold: iou threashold for NMS, default is 0.3 + :return: FaceDetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟UltraFace模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [640, 480]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default (320, 240) + """ + return self._model.size + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py new file mode 100755 index 0000000000..903e7fba1f --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py @@ -0,0 +1,147 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class YOLOv5Face(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv5Face model exported by YOLOv5Face. + + :param model_file: (str)Path of model file, e.g ./yolov5face.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv5Face, self).__init__(runtime_option) + + self._model = C.vision.facedet.YOLOv5Face( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv5Face initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + """Detect the location and key points of human faces from an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threashold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :return: FaceDetectionResult + """ + return self._model.predict(input_image, conf_threshold, nms_iou_threshold) + + # 一些跟YOLOv5Face模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640,640] + """ + return self._model.size + + @property + def padding_value(self): + # padding value, size should be the same as channels + return self._model.padding_value + + @property + def is_no_pad(self): + # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + return self._model.is_no_pad + + @property + def is_mini_pad(self): + # only pad to the minimum rectange which height and width is times of stride + return self._model.is_mini_pad + + @property + def is_scale_up(self): + # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + return self._model.is_scale_up + + @property + def stride(self): + # padding stride, for is_mini_pad + return self._model.stride + + @property + def landmarks_per_face(self): + """ + Argument for image postprocessing step, landmarks_per_face, default 5 in YOLOv5Face + """ + return self._model.landmarks_per_face + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_value` must be type of list." + self._model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_no_pad` must be type of bool." + self._model.is_no_pad = value + + @is_mini_pad.setter + def is_mini_pad(self, value): + assert isinstance( + value, bool + ), "The value to set `is_mini_pad` must be type of bool." + self._model.is_mini_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance(value, int), "The value to set `stride` must be type of int." + self._model.stride = value + + @landmarks_per_face.setter + def landmarks_per_face(self, value): + assert isinstance( + value, int + ), "The value to set `landmarks_per_face` must be type of int." + self._model.landmarks_per_face = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py new file mode 100755 index 0000000000..0b75cc1bd1 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py @@ -0,0 +1,193 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class Yolov7FacePreprocessor: + def __init__(self): + """Create a preprocessor for Yolov7Face""" + self._preprocessor = C.vision.facedet.Yolov7Preprocessor() + + def run(self, input_ims): + """Preprocess input images for Yolov7Face + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @property + def padding_color_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ + # padding value, size should be the same as channels + return self._preprocessor.padding_color_value + + @property + def is_scale_up(self): + """ + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true + """ + return self._preprocessor.is_scale_up + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._preprocessor.size = wh + + @padding_color_value.setter + def padding_color_value(self, value): + assert isinstance( + value, list + ), "The value to set `padding_color_value` must be type of list." + self._preprocessor.padding_color_value = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, bool + ), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value + + +class Yolov7FacePostprocessor: + def __init__(self): + """Create a postprocessor for Yolov7Face""" + self._postprocessor = C.vision.facedet.Yolov7FacePostprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for Yolov7Face + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.45 + """ + return self._postprocessor.nms_threshold + + @property + def landmarks_per_face(self): + """ + landmarks per face for postprocessing, default is 5 + """ + return self._postprocessor.landmarks_per_face + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance( + conf_threshold, float + ), "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance( + nms_threshold, float + ), "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + @landmarks_per_face.setter + def landmarks_per_face(self, landmarks_per_face): + assert isinstance( + landmarks_per_face, int + ), "The value to set `landmarks_per_face` must be type of int." + self._postprocessor.landmarks_per_face = landmarks_per_face + + +class YOLOv7Face(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a YOLOv7Face model exported by YOLOv7Face. + + :param model_file: (str)Path of model file, e.g ./yolov7face.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(YOLOv7Face, self).__init__(runtime_option) + + self._model = C.vision.facedet.YOLOv7Face( + model_file, params_file, self._runtime_option, model_format + ) + + assert self.initialized, "YOLOv7Face initialize failed." + + def predict(self, input_image): + """Detect the location and key points of human faces from an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: FaceDetectionResult + """ + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get YOLOv7Preprocessor object of the loaded model + + :return YOLOv7Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get YOLOv7Postprocessor object of the loaded model + + :return YOLOv7Postprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py new file mode 100755 index 0000000000..f5bde6daed --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py new file mode 100755 index 0000000000..d18dd52211 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py @@ -0,0 +1,17 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .insightface import * +from .adaface import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py new file mode 100755 index 0000000000..985f1111b8 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py @@ -0,0 +1,109 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from ..... import UltraInferModel, ModelFormat +from ..... import c_lib_wrap as C + + +class AdaFacePreprocessor: + def __init__(self): + """Create a preprocessor for AdaFace Model""" + self._preprocessor = C.vision.faceid.AdaFacePreprocessor() + + def run(self, input_ims): + """Preprocess input images for AdaFace Model + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor, include image, scale_factor, im_shape + """ + return self._preprocessor.run(input_ims) + + +class AdaFacePostprocessor: + def __init__(self): + """Create a postprocessor for AdaFace Model""" + self._postprocessor = C.vision.faceid.AdaFacePostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for PaddleClas Model + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of FaceRecognitionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + @property + def l2_normalize(self): + """ + confidence threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.l2_normalize + + +class AdaFace(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a AdaFace model exported by PaddleClas. + + :param model_file: (str)Path of model file, e.g adaface/model.pdmodel + :param params_file: (str)Path of parameters file, e.g adaface/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(AdaFace, self).__init__(runtime_option) + self._model = C.vision.faceid.AdaFace( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "AdaFace model initialize failed." + + def predict(self, im): + """Detect an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: DetectionResult + """ + + assert im is not None, "The input image data is None." + return self._model.predict(im) + + def batch_predict(self, images): + """Detect a batch of input image list + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get AdaFacePreprocessor object of the loaded model + + :return AdaFacePreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get AdaFacePostprocessor object of the loaded model + + :return AdaFacePostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py new file mode 100755 index 0000000000..0aab2c78b8 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py @@ -0,0 +1,237 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from ..... import UltraInferModel, ModelFormat +from ..... import c_lib_wrap as C + + +class InsightFaceRecognitionPreprocessor: + def __init__(self): + """Create a preprocessor for InsightFaceRecognition Model""" + self._preprocessor = C.vision.faceid.InsightFaceRecognitionPreprocessor() + + def run(self, input_ims): + """Preprocess input images for InsightFaceRecognition Model + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor, include image, scale_factor, im_shape + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, tuple of (width, height), + decide the target size after resize, default (112, 112) + """ + return self._preprocessor.size + + @property + def alpha(self): + """ + Argument for image preprocessing step, alpha values for normalization, + default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + """ + return self._preprocessor.alpha + + @property + def beta(self): + """ + Argument for image preprocessing step, beta values for normalization, + default beta = {-1.f, -1.f, -1.f} + """ + return self._preprocessor.beta + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + + +class InsightFaceRecognitionPostprocessor: + def __init__(self): + """Create a postprocessor for InsightFaceRecognition Model""" + self._postprocessor = C.vision.faceid.InsightFaceRecognitionPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for PaddleClas Model + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of FaceRecognitionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + @property + def l2_normalize(self): + """ + confidence threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.l2_normalize + + +class InsightFaceRecognitionBase(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a InsightFaceRecognitionBase model exported by PaddleClas. + + :param model_file: (str)Path of model file, e.g InsightFaceRecognitionBase/model.pdmodel + :param params_file: (str)Path of parameters file, e.g InsightFaceRecognitionBase/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(InsightFaceRecognitionBase, self).__init__(runtime_option) + self._model = C.vision.faceid.InsightFaceRecognitionBase( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "InsightFaceRecognitionBase model initialize failed." + + def predict(self, im): + """Detect an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: DetectionResult + """ + + assert im is not None, "The input image data is None." + return self._model.predict(im) + + def batch_predict(self, images): + """Detect a batch of input image list + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get InsightFaceRecognitionPreprocessor object of the loaded model + + :return InsightFaceRecognitionPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get InsightFaceRecognitionPostprocessor object of the loaded model + + :return InsightFaceRecognitionPostprocessor + """ + return self._model.postprocessor + + +class ArcFace(InsightFaceRecognitionBase): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a ArcFace model exported by PaddleClas. + :param model_file: (str)Path of model file, e.g ArcFace/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ArcFace/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(InsightFaceRecognitionBase, self).__init__(runtime_option) + + self._model = C.vision.faceid.ArcFace( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "ArcFace model initialize failed." + + +class CosFace(InsightFaceRecognitionBase): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a CosFace model exported by PaddleClas. + :param model_file: (str)Path of model file, e.g CosFace/model.pdmodel + :param params_file: (str)Path of parameters file, e.g CosFace/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(InsightFaceRecognitionBase, self).__init__(runtime_option) + + self._model = C.vision.faceid.CosFace( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "CosFace model initialize failed." + + +class PartialFC(InsightFaceRecognitionBase): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a PartialFC model exported by PaddleClas. + :param model_file: (str)Path of model file, e.g PartialFC/model.pdmodel + :param params_file: (str)Path of parameters file, e.g PartialFC/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(InsightFaceRecognitionBase, self).__init__(runtime_option) + + self._model = C.vision.faceid.PartialFC( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "PartialFC model initialize failed." + + +class VPL(InsightFaceRecognitionBase): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a VPL model exported by PaddleClas. + :param model_file: (str)Path of model file, e.g VPL/model.pdmodel + :param params_file: (str)Path of parameters file, e.g VPL/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + + super(InsightFaceRecognitionBase, self).__init__(runtime_option) + + self._model = C.vision.faceid.VPL( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "VPL model initialize failed." diff --git a/libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py new file mode 100755 index 0000000000..6829f3fce7 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib.anemigan import AnimeGAN diff --git a/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py new file mode 100755 index 0000000000..d75a9b4929 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py @@ -0,0 +1,103 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class AnimeGANPreprocessor: + def __init__(self, config_file): + """Create a preprocessor for AnimeGAN.""" + self._preprocessor = C.vision.generation.AnimeGANPreprocessor() + + def run(self, input_ims): + """Preprocess input images for AnimeGAN. + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + +class AnimeGANPostprocessor: + def __init__(self): + """Create a postprocessor for AnimeGAN.""" + self._postprocessor = C.vision.generation.AnimeGANPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for AnimeGAN + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: results: (list) Final results + """ + return self._postprocessor.run(runtime_results) + + +class AnimeGAN(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a AnimeGAN model. + + :param model_file: (str)Path of model file, e.g ./model.pdmodel + :param params_file: (str)Path of parameters file, e.g ./model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # call super constructor to initialize self._runtime_option + super(AnimeGAN, self).__init__(runtime_option) + + self._model = C.vision.generation.AnimeGAN( + model_file, params_file, self._runtime_option, model_format + ) + # assert self.initialized to confirm initialization successfully. + assert self.initialized, "AnimeGAN initialize failed." + + def predict(self, input_image): + """Predict the style transfer result for an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: style transfer result + """ + return self._model.predict(input_image) + + def batch_predict(self, input_images): + """Predict the style transfer result for multiple input images + + :param input_images: (list of numpy.ndarray)The list of input image data, each image is a 3-D array with layout HWC, BGR format + :return: a list of style transfer results + """ + return self._model.batch_predict(input_images) + + @property + def preprocessor(self): + """Get AnimeGANPreprocessor object of the loaded model + + :return AnimeGANPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get AnimeGANPostprocessor object of the loaded model + + :return AnimeGANPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py new file mode 100755 index 0000000000..9205fcd814 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib.fsanet import FSANet diff --git a/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py new file mode 100755 index 0000000000..373b3d62bc --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py @@ -0,0 +1,76 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class FSANet(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a headpose model exported by FSANet. + + :param model_file: (str)Path of model file, e.g fsanet/fsanet-var.onnx + :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX + """ + + super(FSANet, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.ONNX + ), "FSANet only support model format of ModelFormat.ONNX now." + self._model = C.vision.headpose.FSANet( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "FSANet initialize failed." + + def predict(self, input_image): + """Predict an input image headpose + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: HeadPoseResult + """ + + return self._model.predict(input_image) + + @property + def size(self): + """ + Returns the preprocess image size, default (64, 64) + """ + return self._model.size + + @size.setter + def size(self, wh): + """ + Set the preprocess image size, default (64, 64) + """ + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh diff --git a/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py new file mode 100755 index 0000000000..36159c84d1 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .pptinypose import PPTinyPose diff --git a/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py new file mode 100755 index 0000000000..ef5b6a6ca4 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py @@ -0,0 +1,90 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class PPTinyPose(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """load a PPTinyPose model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g pptinypose/model.pdmodel + :param params_file: (str)Path of parameters file, e.g pptinypose/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g pptinypose/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PPTinyPose, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PPTinyPose model only support model format of ModelFormat.Paddle now." + self._model = C.vision.keypointdetection.PPTinyPose( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PPTinyPose model initialize failed." + + def predict(self, input_image, detection_result=None): + """Detect keypoints in an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param detection_result: (DetectionResult)Pre-detected boxes result, default is None + :return: KeyPointDetectionResult + """ + assert input_image is not None, "The input image data is None." + if detection_result: + return self._model.predict(input_image, detection_result) + else: + return self._model.predict(input_image) + + @property + def use_dark(self): + """Atrribute of PPTinyPose model. Stating whether using Distribution-Aware Coordinate Representation for Human Pose Estimation(DARK for short) in postprocess, default is True + + :return: value of use_dark(bool) + """ + return self._model.use_dark + + @use_dark.setter + def use_dark(self, value): + """Set attribute use_dark of PPTinyPose model. + + :param value: (bool)The value to set use_dark + """ + assert isinstance( + value, bool + ), "The value to set `use_dark` must be type of bool." + self._model.use_dark = value + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._model.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._model.disable_permute() diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py new file mode 100755 index 0000000000..17961245b2 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py @@ -0,0 +1,18 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib.modnet import MODNet +from .contrib.rvm import RobustVideoMatting +from .ppmatting import PPMatting diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py new file mode 100755 index 0000000000..b719ea8734 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py @@ -0,0 +1,125 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class MODNet(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a MODNet model exported by MODNet. + + :param model_file: (str)Path of model file, e.g ./modnet.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(MODNet, self).__init__(runtime_option) + + self._model = C.vision.matting.MODNet( + model_file, params_file, self._runtime_option, model_format + ) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "MODNet initialize failed." + + def predict(self, input_image): + """Predict the matting result for an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: MattingResult + """ + return self._model.predict(input_image) + + # 一些跟模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [256, 256]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [256,256] + """ + return self._model.size + + @property + def alpha(self): + """ + Argument for image preprocessing step, alpha value for normalization, default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f} + """ + return self._model.alpha + + @property + def beta(self): + """ + Argument for image preprocessing step, beta value for normalization, default beta = {-1.f, -1.f, -1.f} + """ + return self._model.beta + + @property + def swap_rb(self): + """ + Argument for image preprocessing step, whether to swap the B and R channel, such as BGR->RGB, default True. + """ + return self._model.swap_rb + + @size.setter + def size(self, wh): + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @alpha.setter + def alpha(self, value): + assert isinstance( + value, (list, tuple) + ), "The value to set `alpha` must be type of tuple or list." + assert ( + len(value) == 3 + ), "The value to set `alpha` must contatins 3 elements for each channels, but now it contains {} elements.".format( + len(value) + ) + self._model.alpha = value + + @beta.setter + def beta(self, value): + assert isinstance( + value, (list, tuple) + ), "The value to set `beta` must be type of tuple or list." + assert ( + len(value) == 3 + ), "The value to set `beta` must contatins 3 elements for each channels, but now it contains {} elements.".format( + len(value) + ) + self._model.beta = value + + @swap_rb.setter + def swap_rb(self, value): + assert isinstance( + value, bool + ), "The value to set `swap_rb` must be type of bool." + self._model.swap_rb = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py new file mode 100755 index 0000000000..f00793bef4 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py @@ -0,0 +1,105 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class RobustVideoMatting(UltraInferModel): + def __init__( + self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX, + ): + """Load a video matting model exported by RobustVideoMatting. + + :param model_file: (str)Path of model file, e.g rvm/rvm_mobilenetv3_fp32.onnx + :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX + """ + super(RobustVideoMatting, self).__init__(runtime_option) + + self._model = C.vision.matting.RobustVideoMatting( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "RobustVideoMatting initialize failed." + + def predict(self, input_image): + """Matting an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: MattingResult + """ + return self._model.predict(input_image) + + @property + def size(self): + """ + Returns the preprocess image size + """ + return self._model.size + + @property + def video_mode(self): + """ + Whether to open the video mode, if there are some irrelevant pictures, set it to fasle, the default is true + """ + return self._model.video_mode + + @property + def swap_rb(self): + """ + Whether convert to RGB, Set to false if you have converted YUV format images to RGB outside the model, dafault true + """ + return self._model.swap_rb + + @size.setter + def size(self, wh): + """ + Set the preprocess image size + """ + assert isinstance( + wh, (list, tuple) + ), "The value to set `size` must be type of tuple or list." + assert ( + len(wh) == 2 + ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh) + ) + self._model.size = wh + + @video_mode.setter + def video_mode(self, value): + """ + Set video_mode property, the default is true + """ + assert isinstance( + value, bool + ), "The value to set `video_mode` must be type of bool." + self._model.video_mode = value + + @swap_rb.setter + def swap_rb(self, value): + """ + Set swap_rb property, the default is true + """ + assert isinstance( + value, bool + ), "The value to set `swap_rb` must be type of bool." + self._model.swap_rb = value diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py new file mode 100755 index 0000000000..536d3b331e --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py @@ -0,0 +1,55 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class PPMatting(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PPMatting model exported by PaddleSeg. + + :param model_file: (str)Path of model file, e.g PPMatting-512/model.pdmodel + :param params_file: (str)Path of parameters file, e.g PPMatting-512/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g PPMatting-512/deploy.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PPMatting, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PPMatting model only support model format of ModelFormat.Paddle now." + self._model = C.vision.matting.PPMatting( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PPMatting model initialize failed." + + def predict(self, input_image): + """Predict the matting result for an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: MattingResult + """ + assert input_image is not None, "The input image data is None." + return self._model.predict(input_image) diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py new file mode 100755 index 0000000000..e41e77900a --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .ppocr import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py new file mode 100755 index 0000000000..2582f92f34 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py @@ -0,0 +1,1928 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +import math +import os +import re +import tempfile + +from dataclasses import dataclass +from tokenizers import Tokenizer as TokenizerFast + +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C +from ...common import ProcessorManager +from ....py_only import PyOnlyProcessorChain +from ....py_only.vision import PyOnlyVisionModel, processors as P +from ....utils.misc import load_config +from .utils.ser_vi_layoutxlm.vqa_utils import * +from .utils.ser_vi_layoutxlm.transforms import * +from .utils.ser_vi_layoutxlm.operators import * + + +def sort_boxes(boxes): + return C.vision.ocr.sort_boxes(boxes) + + +class UVDocPreprocessor(ProcessorManager): + def __init__(self): + """Create a preprocessor for UVDoc Model""" + super(UVDocPreprocessor, self).__init__() + self._manager = C.vision.ocr.UVDocPreprocessor() + + def set_normalize(self, mean, std, is_scale): + """Set preprocess normalize parameters, please call this API to + customize the normalize parameters, otherwise it will use the default + normalize parameters. + :param: mean: (list of float) mean values + :param: std: (list of float) std values + :param: is_scale: (boolean) whether to scale + """ + self._manager.set_normalize(mean, std, is_scale) + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + +class UVDocPostprocessor: + def __init__(self): + """Create a postprocessor for UVDoc Model""" + self._postprocessor = C.vision.ocr.UVDocPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for UVDoc Model + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class UVDocWarpper(UltraInferModel): + def __init__( + self, + model_file="", + params_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load OCR recognition model provided by PaddleOCR + + :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_rec_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_rec_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(UVDocWarpper, self).__init__(runtime_option) + + if len(model_file) == 0: + self._model = C.vision.ocr.UVDocWarpper() + self._runnable = False + else: + self._model = C.vision.ocr.UVDocWarpper( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "UVDocWarpper initialize failed." + self._runnable = True + + def clone(self): + """Clone OCR recognition model object + :return: a new OCR recognition model object + """ + + class UVDocWarpperClone(UVDocWarpper): + def __init__(self, model): + self._model = model + + clone_model = UVDocWarpperClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: rec_text, rec_score + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: list of rec_text, list of rec_score + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @preprocessor.setter + def preprocessor(self, value): + self._model.preprocessor = value + + @property + def postprocessor(self): + return self._model.postprocessor + + @postprocessor.setter + def postprocessor(self, value): + self._model.postprocessor = value + + +class DBDetectorPreprocessor(ProcessorManager): + def __init__(self): + """ + Create a preprocessor for DBDetectorModel + """ + super(DBDetectorPreprocessor, self).__init__() + self._manager = C.vision.ocr.DBDetectorPreprocessor() + + @property + def max_side_len(self): + """Get max_side_len value.""" + return self._manager.max_side_len + + @max_side_len.setter + def max_side_len(self, value): + """Set max_side_len value. + :param: value: (int) max_side_len value + """ + assert isinstance( + value, int + ), "The value to set `max_side_len` must be type of int." + self._manager.max_side_len = value + + def set_normalize(self, mean, std, is_scale): + """Set preprocess normalize parameters, please call this API to + customize the normalize parameters, otherwise it will use the default + normalize parameters. + :param: mean: (list of float) mean values + :param: std: (list of float) std values + :param: is_scale: (boolean) whether to scale + """ + self._manager.set_normalize(mean, std, is_scale) + + @property + def static_shape_infer(self): + return self._manager.static_shape_infer + + @static_shape_infer.setter + def static_shape_infer(self, value): + assert isinstance( + value, bool + ), "The value to set `static_shape_infer` must be type of bool." + self._manager.static_shape_infer = value + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + +class DBDetectorPostprocessor: + def __init__(self): + """ + Create a postprocessor for DBDetectorModel + """ + self._postprocessor = C.vision.ocr.DBDetectorPostprocessor() + + def run(self, runtime_results, batch_det_img_info): + """Postprocess the runtime results for DBDetectorModel + + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :param: batch_det_img_info: (list of std::array)The output of det_preprocessor + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, batch_det_img_info) + + @property + def det_db_thresh(self): + """ + Return the det_db_thresh of DBDetectorPostprocessor + """ + return self._postprocessor.det_db_thresh + + @det_db_thresh.setter + def det_db_thresh(self, value): + """Set the det_db_thresh for DBDetectorPostprocessor + + :param: value : the det_db_thresh value + """ + assert isinstance( + value, float + ), "The value to set `det_db_thresh` must be type of float." + self._postprocessor.det_db_thresh = value + + @property + def det_db_box_thresh(self): + """ + Return the det_db_box_thresh of DBDetectorPostprocessor + """ + return self._postprocessor.det_db_box_thresh + + @det_db_box_thresh.setter + def det_db_box_thresh(self, value): + """Set the det_db_box_thresh for DBDetectorPostprocessor + + :param: value : the det_db_box_thresh value + """ + assert isinstance( + value, float + ), "The value to set `det_db_box_thresh` must be type of float." + self._postprocessor.det_db_box_thresh = value + + @property + def det_db_unclip_ratio(self): + """ + Return the det_db_unclip_ratio of DBDetectorPostprocessor + """ + return self._postprocessor.det_db_unclip_ratio + + @det_db_unclip_ratio.setter + def det_db_unclip_ratio(self, value): + """Set the det_db_unclip_ratio for DBDetectorPostprocessor + + :param: value : the det_db_unclip_ratio value + """ + assert isinstance( + value, float + ), "The value to set `det_db_unclip_ratio` must be type of float." + self._postprocessor.det_db_unclip_ratio = value + + @property + def det_db_score_mode(self): + """ + Return the det_db_score_mode of DBDetectorPostprocessor + """ + return self._postprocessor.det_db_score_mode + + @property + def det_db_box_type(self): + """ + Return the det_db_score_mode of DBDetectorPostprocessor + """ + return self._postprocessor.det_db_box_type + + @det_db_box_type.setter + def det_db_box_type(self, value): + """Set the det_db_score_mode for DBDetectorPostprocessor + + :param: value : the det_db_score_mode value + """ + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._postprocessor.det_db_box_type = value + + @det_db_score_mode.setter + def det_db_score_mode(self, value): + """Set the det_db_score_mode for DBDetectorPostprocessor + + :param: value : the det_db_score_mode value + """ + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._postprocessor.det_db_score_mode = value + + @property + def use_dilation(self): + """ + Return the use_dilation of DBDetectorPostprocessor + """ + return self._postprocessor.use_dilation + + @use_dilation.setter + def use_dilation(self, value): + """Set the use_dilation for DBDetectorPostprocessor + + :param: value : the use_dilation value + """ + assert isinstance( + value, bool + ), "The value to set `use_dilation` must be type of bool." + self._postprocessor.use_dilation = value + + +class DBCURVEDetectorPostprocessor: + def __init__(self): + """ + Create a postprocessor for DBDetectorModel + """ + self._postprocessor = C.vision.ocr.DBCURVEDetectorPostprocessor() + + def run(self, runtime_results, batch_det_img_info): + """Postprocess the runtime results for DBDetectorModel + + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :param: batch_det_img_info: (list of std::array)The output of det_preprocessor + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, batch_det_img_info) + + @property + def det_db_thresh(self): + """ + Return the det_db_thresh of DBCURVEDetectorPostprocessor + """ + return self._postprocessor.det_db_thresh + + @det_db_thresh.setter + def det_db_thresh(self, value): + """Set the det_db_thresh for DBCURVEDetectorPostprocessor + + :param: value : the det_db_thresh value + """ + assert isinstance( + value, float + ), "The value to set `det_db_thresh` must be type of float." + self._postprocessor.det_db_thresh = value + + @property + def det_db_box_thresh(self): + """ + Return the det_db_box_thresh of DBCURVEDetectorPostprocessor + """ + return self._postprocessor.det_db_box_thresh + + @det_db_box_thresh.setter + def det_db_box_thresh(self, value): + """Set the det_db_box_thresh for DBCURVEDetectorPostprocessor + + :param: value : the det_db_box_thresh value + """ + assert isinstance( + value, float + ), "The value to set `det_db_box_thresh` must be type of float." + self._postprocessor.det_db_box_thresh = value + + @property + def det_db_unclip_ratio(self): + """ + Return the det_db_unclip_ratio of DBCURVEDetectorPostprocessor + """ + return self._postprocessor.det_db_unclip_ratio + + @det_db_unclip_ratio.setter + def det_db_unclip_ratio(self, value): + """Set the det_db_unclip_ratio for DBCURVEDetectorPostprocessor + + :param: value : the det_db_unclip_ratio value + """ + assert isinstance( + value, float + ), "The value to set `det_db_unclip_ratio` must be type of float." + self._postprocessor.det_db_unclip_ratio = value + + @property + def det_db_score_mode(self): + """ + Return the det_db_score_mode of DBCURVEDetectorPostprocessor + """ + return self._postprocessor.det_db_score_mode + + @property + def det_db_box_type(self): + """ + Return the det_db_score_mode of DBDetectorPostprocessor + """ + return self._postprocessor.det_db_box_type + + @det_db_box_type.setter + def det_db_box_type(self, value): + """Set the det_db_score_mode for DBDetectorPostprocessor + + :param: value : the det_db_score_mode value + """ + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._postprocessor.det_db_box_type = value + + @det_db_score_mode.setter + def det_db_score_mode(self, value): + """Set the det_db_score_mode for DBDetectorPostprocessor + + :param: value : the det_db_score_mode value + """ + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._postprocessor.det_db_score_mode = value + + @property + def use_dilation(self): + """ + Return the use_dilation of DBDetectorPostprocessor + """ + return self._postprocessor.use_dilation + + @use_dilation.setter + def use_dilation(self, value): + """Set the use_dilation for DBDetectorPostprocessor + + :param: value : the use_dilation value + """ + assert isinstance( + value, bool + ), "The value to set `use_dilation` must be type of bool." + self._postprocessor.use_dilation = value + + +class DBDetector(UltraInferModel): + def __init__( + self, + model_file="", + params_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load OCR detection model provided by PaddleOCR. + + :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_det_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(DBDetector, self).__init__(runtime_option) + + if len(model_file) == 0: + self._model = C.vision.ocr.DBDetector() + self._runnable = False + else: + self._model = C.vision.ocr.DBDetector( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "DBDetector initialize failed." + self._runnable = True + + def clone(self): + """Clone OCR detection model object + + :return: a new OCR detection model object + """ + + class DBDetectorClone(DBDetector): + def __init__(self, model): + self._model = model + + clone_model = DBDetectorClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: boxes + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: batch_boxes + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @property + def postprocessor(self): + return self._model.postprocessor + + # Det Preprocessor Property + @property + def max_side_len(self): + return self._model.preprocessor.max_side_len + + @max_side_len.setter + def max_side_len(self, value): + assert isinstance( + value, int + ), "The value to set `max_side_len` must be type of int." + self._model.preprocessor.max_side_len = value + + # Det Ppstprocessor Property + @property + def det_db_thresh(self): + return self._model.postprocessor.det_db_thresh + + @det_db_thresh.setter + def det_db_thresh(self, value): + assert isinstance( + value, float + ), "The value to set `det_db_thresh` must be type of float." + self._model.postprocessor.det_db_thresh = value + + @property + def det_db_box_thresh(self): + return self._model.postprocessor.det_db_box_thresh + + @det_db_box_thresh.setter + def det_db_box_thresh(self, value): + assert isinstance( + value, float + ), "The value to set `det_db_box_thresh` must be type of float." + self._model.postprocessor.det_db_box_thresh = value + + @property + def det_db_unclip_ratio(self): + return self._model.postprocessor.det_db_unclip_ratio + + @det_db_unclip_ratio.setter + def det_db_unclip_ratio(self, value): + assert isinstance( + value, float + ), "The value to set `det_db_unclip_ratio` must be type of float." + self._model.postprocessor.det_db_unclip_ratio = value + + @property + def det_db_box_type(self): + return self._model.postprocessor.det_db_box_type + + @det_db_box_type.setter + def det_db_box_type(self, value): + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._model.postprocessor.det_db_box_type = value + + @property + def det_db_score_mode(self): + return self._model.postprocessor.det_db_score_mode + + @det_db_score_mode.setter + def det_db_score_mode(self, value): + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._model.postprocessor.det_db_score_mode = value + + @property + def use_dilation(self): + return self._model.postprocessor.use_dilation + + @use_dilation.setter + def use_dilation(self, value): + assert isinstance( + value, bool + ), "The value to set `use_dilation` must be type of bool." + self._model.postprocessor.use_dilation = value + + +class DBCURVEDetector(UltraInferModel): + def __init__( + self, + model_file="", + params_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load OCR detection model provided by PaddleOCR. + + :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_det_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(DBCURVEDetector, self).__init__(runtime_option) + + if len(model_file) == 0: + self._model = C.vision.ocr.DBCURVEDetector() + self._runnable = False + else: + self._model = C.vision.ocr.DBCURVEDetector( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "DBCURVEDetector initialize failed." + self._runnable = True + + def clone(self): + """Clone OCR detection model object + + :return: a new OCR detection model object + """ + + class DBCURVEDetectorClone(DBCURVEDetector): + def __init__(self, model): + self._model = model + + clone_model = DBCURVEDetectorClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: boxes + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: batch_boxes + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @property + def postprocessor(self): + return self._model.postprocessor + + # Det Preprocessor Property + @property + def max_side_len(self): + return self._model.preprocessor.max_side_len + + @max_side_len.setter + def max_side_len(self, value): + assert isinstance( + value, int + ), "The value to set `max_side_len` must be type of int." + self._model.preprocessor.max_side_len = value + + # Det Ppstprocessor Property + @property + def det_db_thresh(self): + return self._model.postprocessor.det_db_thresh + + @det_db_thresh.setter + def det_db_thresh(self, value): + assert isinstance( + value, float + ), "The value to set `det_db_thresh` must be type of float." + self._model.postprocessor.det_db_thresh = value + + @property + def det_db_box_thresh(self): + return self._model.postprocessor.det_db_box_thresh + + @det_db_box_thresh.setter + def det_db_box_thresh(self, value): + assert isinstance( + value, float + ), "The value to set `det_db_box_thresh` must be type of float." + self._model.postprocessor.det_db_box_thresh = value + + @property + def det_db_unclip_ratio(self): + return self._model.postprocessor.det_db_unclip_ratio + + @det_db_unclip_ratio.setter + def det_db_unclip_ratio(self, value): + assert isinstance( + value, float + ), "The value to set `det_db_unclip_ratio` must be type of float." + self._model.postprocessor.det_db_unclip_ratio = value + + @property + def det_db_box_type(self): + return self._model.postprocessor.det_db_box_type + + @det_db_box_type.setter + def det_db_box_type(self, value): + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._model.postprocessor.det_db_box_type = value + + @property + def det_db_score_mode(self): + return self._model.postprocessor.det_db_score_mode + + @det_db_score_mode.setter + def det_db_score_mode(self, value): + assert isinstance( + value, str + ), "The value to set `det_db_score_mode` must be type of str." + self._model.postprocessor.det_db_score_mode = value + + @property + def use_dilation(self): + return self._model.postprocessor.use_dilation + + @use_dilation.setter + def use_dilation(self, value): + assert isinstance( + value, bool + ), "The value to set `use_dilation` must be type of bool." + self._model.postprocessor.use_dilation = value + + +class ClassifierPreprocessor(ProcessorManager): + def __init__(self): + """Create a preprocessor for ClassifierModel""" + super(ClassifierPreprocessor, self).__init__() + self._manager = C.vision.ocr.ClassifierPreprocessor() + + def set_normalize(self, mean, std, is_scale): + """Set preprocess normalize parameters, please call this API to + customize the normalize parameters, otherwise it will use the default + normalize parameters. + :param: mean: (list of float) mean values + :param: std: (list of float) std values + :param: is_scale: (boolean) whether to scale + """ + self._manager.set_normalize(mean, std, is_scale) + + @property + def cls_image_shape(self): + return self._manager.cls_image_shape + + @cls_image_shape.setter + def cls_image_shape(self, value): + assert isinstance( + value, list + ), "The value to set `cls_image_shape` must be type of list." + self._manager.cls_image_shape = value + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + +class ClassifierPostprocessor: + def __init__(self): + """Create a postprocessor for ClassifierModel""" + self._postprocessor = C.vision.ocr.ClassifierPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for ClassifierModel + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + @property + def cls_thresh(self): + """ + Return the cls_thresh of ClassifierPostprocessor + """ + return self._postprocessor.cls_thresh + + @cls_thresh.setter + def cls_thresh(self, value): + """Set the cls_thresh for ClassifierPostprocessor + + :param: value: the value of cls_thresh + """ + assert isinstance( + value, float + ), "The value to set `cls_thresh` must be type of float." + self._postprocessor.cls_thresh = value + + +class Classifier(UltraInferModel): + def __init__( + self, + model_file="", + params_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load OCR classification model provided by PaddleOCR. + + :param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(Classifier, self).__init__(runtime_option) + + if len(model_file) == 0: + self._model = C.vision.ocr.Classifier() + self._runnable = False + else: + self._model = C.vision.ocr.Classifier( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "Classifier initialize failed." + self._runnable = True + + def clone(self): + """Clone OCR classification model object + :return: a new OCR classification model object + """ + + class ClassifierClone(Classifier): + def __init__(self, model): + self._model = model + + clone_model = ClassifierClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: cls_label, cls_score + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: list of cls_label, list of cls_score + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @preprocessor.setter + def preprocessor(self, value): + self._model.preprocessor = value + + @property + def postprocessor(self): + return self._model.postprocessor + + @postprocessor.setter + def postprocessor(self, value): + self._model.postprocessor = value + + @property + def cls_image_shape(self): + return self._model.preprocessor.cls_image_shape + + @cls_image_shape.setter + def cls_image_shape(self, value): + assert isinstance( + value, list + ), "The value to set `cls_image_shape` must be type of list." + self._model.preprocessor.cls_image_shape = value + + # Cls Postprocessor Property + @property + def cls_thresh(self): + return self._model.postprocessor.cls_thresh + + @cls_thresh.setter + def cls_thresh(self, value): + assert isinstance( + value, float + ), "The value to set `cls_thresh` must be type of float." + self._model.postprocessor.cls_thresh = value + + +class RecognizerPreprocessor(ProcessorManager): + def __init__(self): + """Create a preprocessor for RecognizerModel""" + super(RecognizerPreprocessor, self).__init__() + self._manager = C.vision.ocr.RecognizerPreprocessor() + + @property + def static_shape_infer(self): + return self._manager.static_shape_infer + + @static_shape_infer.setter + def static_shape_infer(self, value): + assert isinstance( + value, bool + ), "The value to set `static_shape_infer` must be type of bool." + self._manager.static_shape_infer = value + + def set_normalize(self, mean, std, is_scale): + """Set preprocess normalize parameters, please call this API to + customize the normalize parameters, otherwise it will use the default + normalize parameters. + :param: mean: (list of float) mean values + :param: std: (list of float) std values + :param: is_scale: (boolean) whether to scale + """ + self._manager.set_normalize(mean, std, is_scale) + + @property + def rec_image_shape(self): + return self._manager.rec_image_shape + + @rec_image_shape.setter + def rec_image_shape(self, value): + assert isinstance( + value, list + ), "The value to set `rec_image_shape` must be type of list." + self._manager.rec_image_shape = value + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + +class RecognizerPostprocessor: + def __init__(self, label_path): + """Create a postprocessor for RecognizerModel + :param label_path: (str)Path of label file + """ + self._postprocessor = C.vision.ocr.RecognizerPostprocessor(label_path) + + def run(self, runtime_results): + """Postprocess the runtime results for RecognizerModel + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class Recognizer(UltraInferModel): + def __init__( + self, + model_file="", + params_file="", + label_path="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load OCR recognition model provided by PaddleOCR + + :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_rec_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_rec_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param label_path: (str)Path of label file used by OCR recognition model. e.g ./ppocr_keys_v1.txt + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(Recognizer, self).__init__(runtime_option) + + if len(model_file) == 0: + self._model = C.vision.ocr.Recognizer() + self._runnable = False + else: + self._model = C.vision.ocr.Recognizer( + model_file, params_file, label_path, self._runtime_option, model_format + ) + assert self.initialized, "Recognizer initialize failed." + self._runnable = True + + def clone(self): + """Clone OCR recognition model object + :return: a new OCR recognition model object + """ + + class RecognizerClone(Recognizer): + def __init__(self, model): + self._model = model + + clone_model = RecognizerClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: rec_text, rec_score + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: list of rec_text, list of rec_score + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @preprocessor.setter + def preprocessor(self, value): + self._model.preprocessor = value + + @property + def postprocessor(self): + return self._model.postprocessor + + @postprocessor.setter + def postprocessor(self, value): + self._model.postprocessor = value + + @property + def static_shape_infer(self): + return self._model.preprocessor.static_shape_infer + + @static_shape_infer.setter + def static_shape_infer(self, value): + assert isinstance( + value, bool + ), "The value to set `static_shape_infer` must be type of bool." + self._model.preprocessor.static_shape_infer = value + + @property + def rec_image_shape(self): + return self._model.preprocessor.rec_image_shape + + @rec_image_shape.setter + def rec_image_shape(self, value): + assert isinstance( + value, list + ), "The value to set `rec_image_shape` must be type of list." + self._model.preprocessor.rec_image_shape = value + + +class StructureV2TablePreprocessor: + def __init__(self): + """Create a preprocessor for StructureV2Table Model""" + self._preprocessor = C.vision.ocr.StructureV2TablePreprocessor() + + def run(self, input_ims): + """Preprocess input images for StructureV2TableModel + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + +class StructureV2TablePostprocessor: + def __init__(self, dict_path): + """Create a postprocessor for StructureV2Table Model""" + self._postprocessor = C.vision.ocr.StructureV2TablePostprocessor(dict_path) + + def run(self, runtime_results): + """Postprocess the runtime results for StructureV2Table Model + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class StructureV2Table(UltraInferModel): + def __init__( + self, + model_file="", + params_file="", + table_char_dict_path="", + box_shape="ori", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load StructureV2Table model provided by PP-StructureV2. + + :param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param table_char_dict_path: (str)Path of table_char_dict file, e.g ../ppocr/utils/dict/table_structure_dict_ch.txt + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(StructureV2Table, self).__init__(runtime_option) + + if len(model_file) == 0: + self._model = C.vision.ocr.StructureV2Table() + self._runnable = False + else: + self._model = C.vision.ocr.StructureV2Table( + model_file, + params_file, + table_char_dict_path, + box_shape, + self._runtime_option, + model_format, + ) + assert self.initialized, "Classifier initialize failed." + self._runnable = True + + def clone(self): + """Clone StructureV2Table model object + :return: a new StructureV2Table model object + """ + + class StructureV2TableClone(StructureV2Table): + def __init__(self, model): + self._model = model + + clone_model = StructureV2TableClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: bbox, structure + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: list of bbox list, list of structure + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @preprocessor.setter + def preprocessor(self, value): + self._model.preprocessor = value + + @property + def postprocessor(self): + return self._model.postprocessor + + @postprocessor.setter + def postprocessor(self, value): + self._model.postprocessor = value + + +class StructureV2LayoutPreprocessor: + def __init__(self): + """Create a preprocessor for StructureV2Layout Model""" + self._preprocessor = C.vision.ocr.StructureV2LayoutPreprocessor() + + def run(self, input_ims): + """Preprocess input images for StructureV2Layout Model + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + +class StructureV2LayoutPostprocessor: + def __init__(self): + """Create a postprocessor for StructureV2Layout Model""" + self._postprocessor = C.vision.ocr.StructureV2LayoutPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for StructureV2Layout Model + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class StructureV2Layout(UltraInferModel): + def __init__( + self, + model_file="", + params_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load StructureV2Layout model provided by PP-StructureV2. + + :param model_file: (str)Path of model file, e.g ./picodet_lcnet_x1_0_fgd_layout_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./picodet_lcnet_x1_0_fgd_layout_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(StructureV2Layout, self).__init__(runtime_option) + + if len(model_file) == 0: + self._model = C.vision.ocr.StructureV2Layout() + self._runnable = False + else: + self._model = C.vision.ocr.StructureV2Layout( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "StructureV2Layout model initialize failed." + self._runnable = True + + def clone(self): + """Clone StructureV2Layout model object + :return: a new StructureV2Table model object + """ + + class StructureV2LayoutClone(StructureV2Layout): + def __init__(self, model): + self._model = model + + clone_model = StructureV2LayoutClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: bboxes + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: list of bboxes list + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @preprocessor.setter + def preprocessor(self, value): + self._model.preprocessor = value + + @property + def postprocessor(self): + return self._model.postprocessor + + @postprocessor.setter + def postprocessor(self, value): + self._model.postprocessor = value + + +class PPOCRv4(UltraInferModel): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + """Consruct a pipeline with text detector, direction classifier and text recognizer models + + :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector. + :param cls_model: (UltraInferModel) The classification model object created by ultrainfer.vision.ocr.Classifier. + :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer. + """ + assert ( + det_model is not None and rec_model is not None + ), "The det_model and rec_model cannot be None." + + self.det_model = det_model + self.rec_model = rec_model + self.cls_model = cls_model + + if cls_model is None: + self.system_ = C.vision.ocr.PPOCRv4(det_model._model, rec_model._model) + else: + self.system_ = C.vision.ocr.PPOCRv4( + det_model._model, cls_model._model, rec_model._model + ) + + def clone(self): + """Clone PPOCRv4 pipeline object + :return: a new PPOCRv4 pipeline object + """ + + class PPOCRv4Clone(PPOCRv4): + def __init__(self, system): + self.system_ = system + + clone_model = PPOCRv4Clone(self.system_.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: OCRResult + """ + return self.system_.predict(input_image) + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: OCRBatchResult + """ + return self.system_.batch_predict(images) + + @property + def cls_batch_size(self): + return self.system_.cls_batch_size + + @cls_batch_size.setter + def cls_batch_size(self, value): + assert isinstance( + value, int + ), "The value to set `cls_batch_size` must be type of int." + self.system_.cls_batch_size = value + + @property + def rec_batch_size(self): + return self.system_.rec_batch_size + + @rec_batch_size.setter + def rec_batch_size(self, value): + assert isinstance( + value, int + ), "The value to set `rec_batch_size` must be type of int." + self.system_.rec_batch_size = value + + +class PPOCRSystemv4(PPOCRv4): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + logging.warning( + "DEPRECATED: fd.vision.ocr.PPOCRSystemv4 is deprecated, " + "please use fd.vision.ocr.PPOCRv4 instead." + ) + super(PPOCRSystemv4, self).__init__(det_model, cls_model, rec_model) + + def predict(self, input_image): + return super(PPOCRSystemv4, self).predict(input_image) + + +class PPOCRv3(UltraInferModel): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + """Consruct a pipeline with text detector, direction classifier and text recognizer models + + :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector. + :param cls_model: (UltraInferModel) The classification model object created by ultrainfer.vision.ocr.Classifier. + :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer. + """ + assert ( + det_model is not None and rec_model is not None + ), "The det_model and rec_model cannot be None." + if cls_model is None: + self.system_ = C.vision.ocr.PPOCRv3(det_model._model, rec_model._model) + else: + self.system_ = C.vision.ocr.PPOCRv3( + det_model._model, cls_model._model, rec_model._model + ) + + def clone(self): + """Clone PPOCRv3 pipeline object + :return: a new PPOCRv3 pipeline object + """ + + class PPOCRv3Clone(PPOCRv3): + def __init__(self, system): + self.system_ = system + + clone_model = PPOCRv3Clone(self.system_.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: OCRResult + """ + return self.system_.predict(input_image) + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: OCRBatchResult + """ + return self.system_.batch_predict(images) + + @property + def cls_batch_size(self): + return self.system_.cls_batch_size + + @cls_batch_size.setter + def cls_batch_size(self, value): + assert isinstance( + value, int + ), "The value to set `cls_batch_size` must be type of int." + self.system_.cls_batch_size = value + + @property + def rec_batch_size(self): + return self.system_.rec_batch_size + + @rec_batch_size.setter + def rec_batch_size(self, value): + assert isinstance( + value, int + ), "The value to set `rec_batch_size` must be type of int." + self.system_.rec_batch_size = value + + +class PPOCRSystemv3(PPOCRv3): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + logging.warning( + "DEPRECATED: fd.vision.ocr.PPOCRSystemv3 is deprecated, " + "please use fd.vision.ocr.PPOCRv3 instead." + ) + super(PPOCRSystemv3, self).__init__(det_model, cls_model, rec_model) + + def predict(self, input_image): + return super(PPOCRSystemv3, self).predict(input_image) + + +class PPOCRv2(UltraInferModel): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + """Consruct a pipeline with text detector, direction classifier and text recognizer models + + :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector. + :param cls_model: (UltraInferModel) The classification model object created by ultrainfer.vision.ocr.Classifier. + :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer. + """ + assert ( + det_model is not None and rec_model is not None + ), "The det_model and rec_model cannot be None." + if cls_model is None: + self.system_ = C.vision.ocr.PPOCRv2(det_model._model, rec_model._model) + else: + self.system_ = C.vision.ocr.PPOCRv2( + det_model._model, cls_model._model, rec_model._model + ) + + def clone(self): + """Clone PPOCRv3 pipeline object + :return: a new PPOCRv3 pipeline object + """ + + class PPOCRv2Clone(PPOCRv2): + def __init__(self, system): + self.system_ = system + + clone_model = PPOCRv2Clone(self.system_.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: OCRResult + """ + return self.system_.predict(input_image) + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: OCRBatchResult + """ + + return self.system_.batch_predict(images) + + @property + def cls_batch_size(self): + return self.system_.cls_batch_size + + @cls_batch_size.setter + def cls_batch_size(self, value): + assert isinstance( + value, int + ), "The value to set `cls_batch_size` must be type of int." + self.system_.cls_batch_size = value + + @property + def rec_batch_size(self): + return self.system_.rec_batch_size + + @rec_batch_size.setter + def rec_batch_size(self, value): + assert isinstance( + value, int + ), "The value to set `rec_batch_size` must be type of int." + self.system_.rec_batch_size = value + + +class PPOCRSystemv2(PPOCRv2): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + logging.warning( + "DEPRECATED: fd.vision.ocr.PPOCRSystemv2 is deprecated, " + "please use fd.vision.ocr.PPOCRv2 instead." + ) + super(PPOCRSystemv2, self).__init__(det_model, cls_model, rec_model) + + def predict(self, input_image): + return super(PPOCRSystemv2, self).predict(input_image) + + +class PPStructureV2Table(UltraInferModel): + def __init__(self, det_model=None, rec_model=None, table_model=None): + """Consruct a pipeline with text detector, text recognizer and table recognizer models + + :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector. + :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer. + :param table_model: (UltraInferModel) The table recognition model object created by ultrainfer.vision.ocr.Table. + """ + assert ( + det_model is not None and rec_model is not None and table_model is not None + ), "The det_model, rec_model and table_model cannot be None." + self.system_ = C.vision.ocr.PPStructureV2Table( + det_model._model, + rec_model._model, + table_model._model, + ) + + def clone(self): + """Clone PPStructureV2Table pipeline object + :return: a new PPStructureV2Table pipeline object + """ + + class PPStructureV2TableClone(PPStructureV2Table): + def __init__(self, system): + self.system_ = system + + clone_model = PPStructureV2TableClone(self.system_.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: OCRResult + """ + return self.system_.predict(input_image) + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: OCRBatchResult + """ + + return self.system_.batch_predict(images) + + +class PPStructureV2TableSystem(PPStructureV2Table): + def __init__(self, det_model=None, rec_model=None, table_model=None): + logging.warning( + "DEPRECATED: fd.vision.ocr.PPStructureV2TableSystem is deprecated, " + "please use fd.vision.ocr.PPStructureV2Table instead." + ) + super(PPStructureV2TableSystem, self).__init__( + det_model, rec_model, table_model + ) + + def predict(self, input_image): + return super(PPStructureV2TableSystem, self).predict(input_image) + + +class StructureV2SERViLayoutXLMModelPreprocessor: + def __init__(self, ser_dict_path, use_gpu=True): + """Create a preprocessor for Ser-Vi-LayoutXLM model. + :param: ser_dict_path: (str) class file path + :param: use_gpu: (bool) whether use gpu to OCR process + """ + self._manager = None + from paddleocr import PaddleOCR + + self.ocr_engine = PaddleOCR( + use_angle_cls=False, + det_model_dir=None, + rec_model_dir=None, + show_log=False, + use_gpu=use_gpu, + ) + + pre_process_list = [ + { + "VQATokenLabelEncode": { + "class_path": ser_dict_path, + "contains_re": False, + "ocr_engine": self.ocr_engine, + "order_method": "tb-yx", + } + }, + {"VQATokenPad": {"max_seq_len": 512, "return_attention_mask": True}}, + {"VQASerTokenChunk": {"max_seq_len": 512, "return_attention_mask": True}}, + {"Resize": {"size": [224, 224]}}, + { + "NormalizeImage": { + "std": [58.395, 57.12, 57.375], + "mean": [123.675, 116.28, 103.53], + "scale": "1", + "order": "hwc", + } + }, + {"ToCHWImage": None}, + { + "KeepKeys": { + "keep_keys": [ + "input_ids", + "bbox", + "attention_mask", + "token_type_ids", + "image", + "labels", + "segment_offset_id", + "ocr_info", + "entities", + ] + } + }, + ] + + self.preprocess_op = create_operators(pre_process_list, {"infer_mode": True}) + + def _transform(self, data, ops=None): + """transform""" + if ops is None: + ops = [] + for op in ops: + data = op(data) + if data is None: + return None + return data + + def run(self, input_im): + """Run preprocess of Ser-Vi-LayoutXLM model + :param: input_ims: (numpy.ndarray) input image + """ + ori_im = input_im.copy() + data = {"image": input_im} + data = transform(data, self.preprocess_op) + + for idx in range(len(data)): + if isinstance(data[idx], np.ndarray): + data[idx] = np.expand_dims(data[idx], axis=0) + else: + data[idx] = [data[idx]] + + return data + + +class StructureV2SERViLayoutXLMModelPostprocessor: + def __init__(self, class_path): + """Create a postprocessor for Ser-Vi-LayoutXLM model. + :param: class_path: (string) class file path + """ + self.postprocessor_op = VQASerTokenLayoutLMPostProcess(class_path) + + def run(self, preds, batch=None, *args, **kwargs): + """Run postprocess of Ser-Vi-LayoutXLM model. + :param: preds: (list) results of infering + """ + return self.postprocessor_op(preds, batch, *args, **kwargs) + + +class StructureV2SERViLayoutXLMModel(UltraInferModel): + def __init__( + self, + model_file, + params_file, + ser_dict_path, + class_path, + config_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load SERViLayoutXLM model provided by PP-StructureV2. + + :param model_file: (str)Path of model file, e.g ./ser_vi_layout_xlm/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ser_vi_layout_xlm/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param ser_dict_path: (str) class file path + :param class_path: (str) class file path + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model. + """ + super(StructureV2SERViLayoutXLMModel, self).__init__(runtime_option) + + assert ( + self._runtime_option.backend != 0 + ), "Runtime Option required backend setting." + self._model = C.vision.ocr.StructureV2SERViLayoutXLMModel( + model_file, params_file, config_file, self._runtime_option, model_format + ) + + assert self.initialized, "SERViLayoutXLM model initialize failed." + + self.preprocessor = StructureV2SERViLayoutXLMModelPreprocessor(ser_dict_path) + self.postprocesser = StructureV2SERViLayoutXLMModelPostprocessor(class_path) + + self.input_name_0 = self._model.get_input_info(0).name + self.input_name_1 = self._model.get_input_info(1).name + self.input_name_2 = self._model.get_input_info(2).name + self.input_name_3 = self._model.get_input_info(3).name + + def predict(self, image): + assert isinstance(image, np.ndarray), "predict recives numpy.ndarray(BGR)" + + data = self.preprocessor.run(image) + infer_input = { + self.input_name_0: data[0], + self.input_name_1: data[1], + self.input_name_2: data[2], + self.input_name_3: data[3], + } + + infer_result = self._model.infer(infer_input) + infer_result = infer_result[0] + + post_result = self.postprocesser.run( + infer_result, segment_offset_ids=data[6], ocr_infos=data[7] + ) + + return post_result + + def batch_predict(self, image_list): + assert isinstance(image_list, list) and isinstance( + image_list[0], np.ndarray + ), "batch_predict recives list of numpy.ndarray(BGR)" + + # reading and preprocessing images + datas = None + for image in image_list: + data = self.preprocessor.run(image) + + # concatenate data to batch + if datas == None: + datas = data + else: + for idx in range(len(data)): + if isinstance(data[idx], np.ndarray): + datas[idx] = np.concatenate((datas[idx], data[idx]), axis=0) + else: + datas[idx].extend(data[idx]) + + # infer + infer_inputs = { + self.input_name_0: datas[0], + self.input_name_1: datas[1], + self.input_name_2: datas[2], + self.input_name_3: datas[3], + } + + infer_results = self._model.infer(infer_inputs) + infer_results = infer_results[0] + + # postprocessing + post_results = self.postprocesser.run( + infer_results, segment_offset_ids=datas[6], ocr_infos=datas[7] + ) + + return post_results + + +class PyOnlyFormulaRecognitionModel(PyOnlyVisionModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + self._model_file = model_file + self._params_file = params_file + self._model_format = model_format + super().__init__(runtime_option) + self._config = load_config(config_file) + self._preprocessor = _PyOnlyFormulaRecognitionPreprocessor() + self._postprocessor = _PyOnlyFormulaRecognitionPostprocessor( + **self._config["PostProcess"] + ) + + def model_name(): + return "PyOnlyFormulaRecognitionModel" + + def batch_predict(self, imgs): + data_list = [] + for img in imgs: + data = {"img": img} + data = self._preprocessor.run(data) + data_list.append(data) + + input_name = self._runtime.get_input_info(0).name + imgs = np.stack([data["img"] for data in data_list], axis=0, dtype=np.float32) + imgs = np.ascontiguousarray(imgs) + output_arrs = self._runtime.infer({input_name: imgs}) + + results = [] + for score_map in output_arrs[0]: + data = {"score_map": score_map} + result = self._postprocessor.run(data) + results.append(result) + return results + + def _update_option(self): + self._option.set_model_path( + self._model_file, self._params_file, self._model_format + ) + + +class _PyOnlyFormulaRecognitionPreprocessor(object): + def __init__(self): + super().__init__() + processors = self._build_processors() + self._processor_chain = PyOnlyProcessorChain(processors) + + def run(self, data): + return self._processor_chain(data) + + def _build_processors(self): + processors = [] + processors.append(P.LaTeXOCRReisizeNormImg()) + return processors + + +class _PyOnlyFormulaRecognitionPostprocessor(object): + def __init__(self, **kwargs): + super().__init__() + if kwargs.get("name") == "LaTeXOCRDecode": + self.op = LaTeXOCRDecode( + character_list=kwargs.get("character_dict"), + ) + else: + raise Exception() + + def run(self, data): + rec_text = self.op.apply(data) + rec_text = rec_text["rec_text"] + result = _PyOnlyFormulaRecognitionResult(rec_text=rec_text) + return result + + +@dataclass +class _PyOnlyFormulaRecognitionResult(object): + rec_text: str + + +class LaTeXOCRDecode(object): + def __init__(self, character_list=None): + super().__init__() + character_list = character_list + temp_path = tempfile.gettempdir() + rec_char_dict_path = os.path.join(temp_path, "latexocr_tokenizer.json") + try: + with open(rec_char_dict_path, "w") as f: + json.dump(character_list, f) + except Exception as e: + print(f"创建 latexocr_tokenizer.json 文件失败, 原因{str(e)}") + self.tokenizer = TokenizerFast.from_file(rec_char_dict_path) + + def post_process(self, s): + text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})" + letter = "[a-zA-Z]" + noletter = "[\W_^\d]" + names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)] + s = re.sub(text_reg, lambda match: str(names.pop(0)), s) + news = s + while True: + s = news + news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, noletter), r"\1\2", s) + news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, letter), r"\1\2", news) + news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news) + if news == s: + break + return s + + def decode(self, tokens): + if len(tokens.shape) == 1: + tokens = tokens[None, :] + + dec = [self.tokenizer.decode(tok) for tok in tokens] + dec_str_list = [ + "".join(detok.split(" ")) + .replace("Ġ", " ") + .replace("[EOS]", "") + .replace("[BOS]", "") + .replace("[PAD]", "") + .strip() + for detok in dec + ] + return [str(self.post_process(dec_str)) for dec_str in dec_str_list] + + def apply(self, pred): + key = next(iter(pred)) + preds = np.array(pred[key]) + text = self.decode(preds) + return {"rec_text": text[0]} diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py new file mode 100755 index 0000000000..59372f9379 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py new file mode 100755 index 0000000000..59372f9379 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py new file mode 100755 index 0000000000..7d42cf3a5c --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py @@ -0,0 +1,104 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import cv2 +import numpy as np + + +class Resize(object): + def __init__(self, size=(640, 640), **kwargs): + self.size = size + + def resize_image(self, img): + resize_h, resize_w = self.size + ori_h, ori_w = img.shape[:2] # (h, w, c) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + img = cv2.resize(img, (int(resize_w), int(resize_h))) + return img, [ratio_h, ratio_w] + + def __call__(self, data): + img = data["image"] + if "polys" in data: + text_polys = data["polys"] + + img_resize, [ratio_h, ratio_w] = self.resize_image(img) + if "polys" in data: + new_boxes = [] + for box in text_polys: + new_box = [] + for cord in box: + new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) + new_boxes.append(new_box) + data["polys"] = np.array(new_boxes, dtype=np.float32) + data["image"] = img_resize + return data + + +class NormalizeImage(object): + """normalize image such as substract mean, divide std""" + + def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs): + if isinstance(scale, str): + scale = eval(scale) + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if order == "chw" else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype("float32") + self.std = np.array(std).reshape(shape).astype("float32") + + def __call__(self, data): + img = data["image"] + from PIL import Image + + if isinstance(img, Image.Image): + img = np.array(img) + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + data["image"] = (img.astype("float32") * self.scale - self.mean) / self.std + return data + + +class ToCHWImage(object): + """convert hwc image to chw image""" + + def __init__(self, **kwargs): + pass + + def __call__(self, data): + img = data["image"] + from PIL import Image + + if isinstance(img, Image.Image): + img = np.array(img) + data["image"] = img.transpose((2, 0, 1)) + return data + + +class KeepKeys(object): + def __init__(self, keep_keys, **kwargs): + self.keep_keys = keep_keys + + def __call__(self, data): + data_list = [] + for key in self.keep_keys: + data_list.append(data[key]) + return data_list diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py new file mode 100755 index 0000000000..32ec94cf80 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py @@ -0,0 +1,47 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .vqa_utils import * +from .operators import * + + +def transform(data, ops=None): + """transform""" + if ops is None: + ops = [] + for op in ops: + data = op(data) + if data is None: + return None + return data + + +def create_operators(op_param_list, global_config=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(op_param_list, list), "operator config should be a list" + ops = [] + for operator in op_param_list: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + if global_config is not None: + param.update(global_config) + op = eval(op_name)(**param) + ops.append(op) + return ops diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py new file mode 100755 index 0000000000..4e9627301e --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py @@ -0,0 +1,624 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import copy +import numpy as np +import json +import copy +from copy import deepcopy + +from collections import defaultdict + + +def order_by_tbyx(ocr_info): + res = sorted(ocr_info, key=lambda r: (r["bbox"][1], r["bbox"][0])) + for i in range(len(res) - 1): + for j in range(i, 0, -1): + if abs(res[j + 1]["bbox"][1] - res[j]["bbox"][1]) < 20 and ( + res[j + 1]["bbox"][0] < res[j]["bbox"][0] + ): + tmp = deepcopy(res[j]) + res[j] = deepcopy(res[j + 1]) + res[j + 1] = deepcopy(tmp) + else: + break + return res + + +def load_vqa_bio_label_maps(label_map_path): + with open(label_map_path, "r", encoding="utf-8") as fin: + lines = fin.readlines() + old_lines = [line.strip() for line in lines] + lines = ["O"] + for line in old_lines: + # "O" has already been in lines + if line.upper() in ["OTHER", "OTHERS", "IGNORE"]: + continue + lines.append(line) + labels = ["O"] + for line in lines[1:]: + labels.append("B-" + line) + labels.append("I-" + line) + label2id_map = {label.upper(): idx for idx, label in enumerate(labels)} + id2label_map = {idx: label.upper() for idx, label in enumerate(labels)} + return label2id_map, id2label_map + + +class VQATokenLabelEncode(object): + """ + Label encode for NLP VQA methods + """ + + def __init__( + self, + class_path, + contains_re=False, + add_special_ids=False, + algorithm="LayoutXLM", + use_textline_bbox_info=True, + order_method=None, + infer_mode=False, + ocr_engine=None, + **kwargs + ): + super(VQATokenLabelEncode, self).__init__() + from paddlenlp.transformers import ( + LayoutXLMTokenizer, + LayoutLMTokenizer, + LayoutLMv2Tokenizer, + ) + + tokenizer_dict = { + "LayoutXLM": { + "class": LayoutXLMTokenizer, + "pretrained_model": "layoutxlm-base-uncased", + }, + "LayoutLM": { + "class": LayoutLMTokenizer, + "pretrained_model": "layoutlm-base-uncased", + }, + "LayoutLMv2": { + "class": LayoutLMv2Tokenizer, + "pretrained_model": "layoutlmv2-base-uncased", + }, + } + self.contains_re = contains_re + tokenizer_config = tokenizer_dict[algorithm] + self.tokenizer = tokenizer_config["class"].from_pretrained( + tokenizer_config["pretrained_model"] + ) + self.label2id_map, id2label_map = load_vqa_bio_label_maps(class_path) + self.add_special_ids = add_special_ids + self.infer_mode = infer_mode + self.ocr_engine = ocr_engine + self.use_textline_bbox_info = use_textline_bbox_info + self.order_method = order_method + assert self.order_method in [None, "tb-yx"] + + def split_bbox(self, bbox, text, tokenizer): + words = text.split() + token_bboxes = [] + curr_word_idx = 0 + x1, y1, x2, y2 = bbox + unit_w = (x2 - x1) / len(text) + for idx, word in enumerate(words): + curr_w = len(word) * unit_w + word_bbox = [x1, y1, x1 + curr_w, y2] + token_bboxes.extend([word_bbox] * len(tokenizer.tokenize(word))) + x1 += (len(word) + 1) * unit_w + return token_bboxes + + def filter_empty_contents(self, ocr_info): + """ + find out the empty texts and remove the links + """ + new_ocr_info = [] + empty_index = [] + for idx, info in enumerate(ocr_info): + if len(info["transcription"]) > 0: + new_ocr_info.append(copy.deepcopy(info)) + else: + empty_index.append(info["id"]) + + for idx, info in enumerate(new_ocr_info): + new_link = [] + for link in info["linking"]: + if link[0] in empty_index or link[1] in empty_index: + continue + new_link.append(link) + new_ocr_info[idx]["linking"] = new_link + return new_ocr_info + + def __call__(self, data): + # load bbox and label info + ocr_info = self._load_ocr_info(data) + + for idx in range(len(ocr_info)): + if "bbox" not in ocr_info[idx]: + ocr_info[idx]["bbox"] = self.trans_poly_to_bbox(ocr_info[idx]["points"]) + + if self.order_method == "tb-yx": + ocr_info = order_by_tbyx(ocr_info) + + # for re + train_re = self.contains_re and not self.infer_mode + if train_re: + ocr_info = self.filter_empty_contents(ocr_info) + + height, width, _ = data["image"].shape + + words_list = [] + bbox_list = [] + input_ids_list = [] + token_type_ids_list = [] + segment_offset_id = [] + gt_label_list = [] + + entities = [] + + if train_re: + relations = [] + id2label = {} + entity_id_to_index_map = {} + empty_entity = set() + + data["ocr_info"] = copy.deepcopy(ocr_info) + + for info in ocr_info: + text = info["transcription"] + if len(text) <= 0: + continue + if train_re: + # for re + if len(text) == 0: + empty_entity.add(info["id"]) + continue + id2label[info["id"]] = info["label"] + relations.extend([tuple(sorted(l)) for l in info["linking"]]) + # smooth_box + info["bbox"] = self.trans_poly_to_bbox(info["points"]) + + encode_res = self.tokenizer.encode( + text, + pad_to_max_seq_len=False, + return_attention_mask=True, + return_token_type_ids=True, + ) + + if not self.add_special_ids: + # TODO: use tok.all_special_ids to remove + encode_res["input_ids"] = encode_res["input_ids"][1:-1] + encode_res["token_type_ids"] = encode_res["token_type_ids"][1:-1] + encode_res["attention_mask"] = encode_res["attention_mask"][1:-1] + + if self.use_textline_bbox_info: + bbox = [info["bbox"]] * len(encode_res["input_ids"]) + else: + bbox = self.split_bbox( + info["bbox"], info["transcription"], self.tokenizer + ) + if len(bbox) <= 0: + continue + bbox = self._smooth_box(bbox, height, width) + if self.add_special_ids: + bbox.insert(0, [0, 0, 0, 0]) + bbox.append([0, 0, 0, 0]) + + # parse label + if not self.infer_mode: + label = info["label"] + gt_label = self._parse_label(label, encode_res) + + # construct entities for re + if train_re: + if gt_label[0] != self.label2id_map["O"]: + entity_id_to_index_map[info["id"]] = len(entities) + label = label.upper() + entities.append( + { + "start": len(input_ids_list), + "end": len(input_ids_list) + len(encode_res["input_ids"]), + "label": label.upper(), + } + ) + else: + entities.append( + { + "start": len(input_ids_list), + "end": len(input_ids_list) + len(encode_res["input_ids"]), + "label": "O", + } + ) + input_ids_list.extend(encode_res["input_ids"]) + token_type_ids_list.extend(encode_res["token_type_ids"]) + bbox_list.extend(bbox) + words_list.append(text) + segment_offset_id.append(len(input_ids_list)) + if not self.infer_mode: + gt_label_list.extend(gt_label) + + data["input_ids"] = input_ids_list + data["token_type_ids"] = token_type_ids_list + data["bbox"] = bbox_list + data["attention_mask"] = [1] * len(input_ids_list) + data["labels"] = gt_label_list + data["segment_offset_id"] = segment_offset_id + data["tokenizer_params"] = dict( + padding_side=self.tokenizer.padding_side, + pad_token_type_id=self.tokenizer.pad_token_type_id, + pad_token_id=self.tokenizer.pad_token_id, + ) + data["entities"] = entities + + if train_re: + data["relations"] = relations + data["id2label"] = id2label + data["empty_entity"] = empty_entity + data["entity_id_to_index_map"] = entity_id_to_index_map + return data + + def trans_poly_to_bbox(self, poly): + x1 = int(np.min([p[0] for p in poly])) + x2 = int(np.max([p[0] for p in poly])) + y1 = int(np.min([p[1] for p in poly])) + y2 = int(np.max([p[1] for p in poly])) + return [x1, y1, x2, y2] + + def _load_ocr_info(self, data): + if self.infer_mode: + ocr_result = self.ocr_engine.ocr(data["image"], cls=False)[0] + ocr_info = [] + for res in ocr_result: + ocr_info.append( + { + "transcription": res[1][0], + "bbox": self.trans_poly_to_bbox(res[0]), + "points": res[0], + } + ) + return ocr_info + else: + info = data["label"] + # read text info + info_dict = json.loads(info) + return info_dict + + def _smooth_box(self, bboxes, height, width): + bboxes = np.array(bboxes) + bboxes[:, 0] = bboxes[:, 0] * 1000 / width + bboxes[:, 2] = bboxes[:, 2] * 1000 / width + bboxes[:, 1] = bboxes[:, 1] * 1000 / height + bboxes[:, 3] = bboxes[:, 3] * 1000 / height + bboxes = bboxes.astype("int64").tolist() + return bboxes + + def _parse_label(self, label, encode_res): + gt_label = [] + if label.lower() in ["other", "others", "ignore"]: + gt_label.extend([0] * len(encode_res["input_ids"])) + else: + gt_label.append(self.label2id_map[("b-" + label).upper()]) + gt_label.extend( + [self.label2id_map[("i-" + label).upper()]] + * (len(encode_res["input_ids"]) - 1) + ) + return gt_label + + +class VQATokenPad(object): + def __init__( + self, + max_seq_len=512, + pad_to_max_seq_len=True, + return_attention_mask=True, + return_token_type_ids=True, + truncation_strategy="longest_first", + return_overflowing_tokens=False, + return_special_tokens_mask=False, + infer_mode=False, + **kwargs + ): + + self.max_seq_len = max_seq_len + self.pad_to_max_seq_len = max_seq_len + self.return_attention_mask = return_attention_mask + self.return_token_type_ids = return_token_type_ids + self.truncation_strategy = truncation_strategy + self.return_overflowing_tokens = return_overflowing_tokens + self.return_special_tokens_mask = return_special_tokens_mask + self.infer_mode = infer_mode + + def __call__(self, data): + import paddle + + self.pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index + needs_to_be_padded = ( + self.pad_to_max_seq_len and len(data["input_ids"]) < self.max_seq_len + ) + + if needs_to_be_padded: + if "tokenizer_params" in data: + tokenizer_params = data.pop("tokenizer_params") + else: + tokenizer_params = dict( + padding_side="right", pad_token_type_id=0, pad_token_id=1 + ) + + difference = self.max_seq_len - len(data["input_ids"]) + if tokenizer_params["padding_side"] == "right": + if self.return_attention_mask: + data["attention_mask"] = [1] * len(data["input_ids"]) + [ + 0 + ] * difference + if self.return_token_type_ids: + data["token_type_ids"] = ( + data["token_type_ids"] + + [tokenizer_params["pad_token_type_id"]] * difference + ) + if self.return_special_tokens_mask: + data["special_tokens_mask"] = ( + data["special_tokens_mask"] + [1] * difference + ) + data["input_ids"] = ( + data["input_ids"] + [tokenizer_params["pad_token_id"]] * difference + ) + if not self.infer_mode: + data["labels"] = ( + data["labels"] + [self.pad_token_label_id] * difference + ) + data["bbox"] = data["bbox"] + [[0, 0, 0, 0]] * difference + elif tokenizer_params["padding_side"] == "left": + if self.return_attention_mask: + data["attention_mask"] = [0] * difference + [1] * len( + data["input_ids"] + ) + if self.return_token_type_ids: + data["token_type_ids"] = [ + tokenizer_params["pad_token_type_id"] + ] * difference + data["token_type_ids"] + if self.return_special_tokens_mask: + data["special_tokens_mask"] = [1] * difference + data[ + "special_tokens_mask" + ] + data["input_ids"] = [ + tokenizer_params["pad_token_id"] + ] * difference + data["input_ids"] + if not self.infer_mode: + data["labels"] = [self.pad_token_label_id] * difference + data[ + "labels" + ] + data["bbox"] = [[0, 0, 0, 0]] * difference + data["bbox"] + else: + if self.return_attention_mask: + data["attention_mask"] = [1] * len(data["input_ids"]) + + for key in data: + if key in [ + "input_ids", + "labels", + "token_type_ids", + "bbox", + "attention_mask", + ]: + if self.infer_mode: + if key != "labels": + length = min(len(data[key]), self.max_seq_len) + data[key] = data[key][:length] + else: + continue + data[key] = np.array(data[key], dtype="int64") + return data + + +class VQASerTokenChunk(object): + def __init__(self, max_seq_len=512, infer_mode=False, **kwargs): + self.max_seq_len = max_seq_len + self.infer_mode = infer_mode + + def __call__(self, data): + encoded_inputs_all = [] + seq_len = len(data["input_ids"]) + for index in range(0, seq_len, self.max_seq_len): + chunk_beg = index + chunk_end = min(index + self.max_seq_len, seq_len) + encoded_inputs_example = {} + for key in data: + if key in [ + "label", + "input_ids", + "labels", + "token_type_ids", + "bbox", + "attention_mask", + ]: + if self.infer_mode and key == "labels": + encoded_inputs_example[key] = data[key] + else: + encoded_inputs_example[key] = data[key][chunk_beg:chunk_end] + else: + encoded_inputs_example[key] = data[key] + + encoded_inputs_all.append(encoded_inputs_example) + if len(encoded_inputs_all) == 0: + return None + return encoded_inputs_all[0] + + +class VQAReTokenChunk(object): + def __init__( + self, max_seq_len=512, entities_labels=None, infer_mode=False, **kwargs + ): + self.max_seq_len = max_seq_len + self.entities_labels = ( + {"HEADER": 0, "QUESTION": 1, "ANSWER": 2} + if entities_labels is None + else entities_labels + ) + self.infer_mode = infer_mode + + def __call__(self, data): + # prepare data + entities = data.pop("entities") + relations = data.pop("relations") + encoded_inputs_all = [] + for index in range(0, len(data["input_ids"]), self.max_seq_len): + item = {} + for key in data: + if key in [ + "label", + "input_ids", + "labels", + "token_type_ids", + "bbox", + "attention_mask", + ]: + if self.infer_mode and key == "labels": + item[key] = data[key] + else: + item[key] = data[key][index : index + self.max_seq_len] + else: + item[key] = data[key] + # select entity in current chunk + entities_in_this_span = [] + global_to_local_map = {} # + for entity_id, entity in enumerate(entities): + if ( + index <= entity["start"] < index + self.max_seq_len + and index <= entity["end"] < index + self.max_seq_len + ): + entity["start"] = entity["start"] - index + entity["end"] = entity["end"] - index + global_to_local_map[entity_id] = len(entities_in_this_span) + entities_in_this_span.append(entity) + + # select relations in current chunk + relations_in_this_span = [] + for relation in relations: + if ( + index <= relation["start_index"] < index + self.max_seq_len + and index <= relation["end_index"] < index + self.max_seq_len + ): + relations_in_this_span.append( + { + "head": global_to_local_map[relation["head"]], + "tail": global_to_local_map[relation["tail"]], + "start_index": relation["start_index"] - index, + "end_index": relation["end_index"] - index, + } + ) + item.update( + { + "entities": self.reformat(entities_in_this_span), + "relations": self.reformat(relations_in_this_span), + } + ) + if len(item["entities"]) > 0: + item["entities"]["label"] = [ + self.entities_labels[x] for x in item["entities"]["label"] + ] + encoded_inputs_all.append(item) + if len(encoded_inputs_all) == 0: + return None + return encoded_inputs_all[0] + + def reformat(self, data): + new_data = defaultdict(list) + for item in data: + for k, v in item.items(): + new_data[k].append(v) + return new_data + + +class VQASerTokenLayoutLMPostProcess(object): + """Convert between text-label and text-index""" + + def __init__(self, class_path, **kwargs): + super(VQASerTokenLayoutLMPostProcess, self).__init__() + label2id_map, self.id2label_map = load_vqa_bio_label_maps(class_path) + + self.label2id_map_for_draw = dict() + for key in label2id_map: + if key.startswith("I-"): + self.label2id_map_for_draw[key] = label2id_map["B" + key[1:]] + else: + self.label2id_map_for_draw[key] = label2id_map[key] + + self.id2label_map_for_show = dict() + for key in self.label2id_map_for_draw: + val = self.label2id_map_for_draw[key] + if key == "O": + self.id2label_map_for_show[val] = key + if key.startswith("B-") or key.startswith("I-"): + self.id2label_map_for_show[val] = key[2:] + else: + self.id2label_map_for_show[val] = key + + def __call__(self, preds, batch=None, *args, **kwargs): + import paddle + + if isinstance(preds, tuple): + preds = preds[0] + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + + if batch is not None: + return self._metric(preds, batch[5]) + else: + return self._infer(preds, **kwargs) + + def _metric(self, preds, label): + pred_idxs = preds.argmax(axis=2) + decode_out_list = [[] for _ in range(pred_idxs.shape[0])] + label_decode_out_list = [[] for _ in range(pred_idxs.shape[0])] + + for i in range(pred_idxs.shape[0]): + for j in range(pred_idxs.shape[1]): + if label[i, j] != -100: + label_decode_out_list[i].append(self.id2label_map[label[i, j]]) + decode_out_list[i].append(self.id2label_map[pred_idxs[i, j]]) + return decode_out_list, label_decode_out_list + + def _infer(self, preds, segment_offset_ids, ocr_infos): + results = [] + + for pred, segment_offset_id, ocr_info in zip( + preds, segment_offset_ids, ocr_infos + ): + pred = np.argmax(pred, axis=1) + pred = [self.id2label_map[idx] for idx in pred] + + for idx in range(len(segment_offset_id)): + if idx == 0: + start_id = 0 + else: + start_id = segment_offset_id[idx - 1] + + end_id = segment_offset_id[idx] + + curr_pred = pred[start_id:end_id] + curr_pred = [self.label2id_map_for_draw[p] for p in curr_pred] + + if len(curr_pred) <= 0: + pred_id = 0 + else: + counts = np.bincount(curr_pred) + pred_id = np.argmax(counts) + ocr_info[idx]["pred_id"] = int(pred_id) + ocr_info[idx]["pred"] = self.id2label_map_for_show[int(pred_id)] + results.append(ocr_info) + return results diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py new file mode 100755 index 0000000000..56f3b12e70 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py @@ -0,0 +1,19 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .paddle3d.smoke import * +from .paddle3d.petr import * +from .paddle3d.centerpoint import * +from .paddle3d.caddn import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py new file mode 100755 index 0000000000..4648555840 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py new file mode 100755 index 0000000000..0f746f45d9 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py @@ -0,0 +1,108 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class CaddnPreprocessor: + def __init__(self, config_file): + """Create a preprocessor for Caddn""" + self._preprocessor = C.vision.perception.CaddnPreprocessor(config_file) + + def run(self, input_ims, cam_data, lidar_data): + """Preprocess input images for Caddn + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims, cam_data, lidar_data) + + +class CaddnPostprocessor: + def __init__(self): + """Create a postprocessor for Caddn""" + self._postprocessor = C.vision.perception.CaddnPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for Caddn + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of PerceptionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class Caddn(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a Caddn model exported by Caddn. + + :param model_file: (str)Path of model file, e.g ./Caddn.pdmodel + :param params_file: (str)Path of parameters file, e.g ./Caddn.pdiparams + :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(Caddn, self).__init__(runtime_option) + + self._model = C.vision.perception.Caddn( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "Caddn initialize failed." + + def predict(self, input_image, cam_data, lidar_data): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param: cam_data: (list)The input camera data + :param: lidar_data: (list)The input lidar data + :return: PerceptionResult + """ + return self._model.predict(input_image, cam_data, lidar_data) + + def batch_predict(self, images, cam_data, lidar_data): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :param: cam_data: (list)The input camera data + :param: lidar_data: (list)The input lidar data + :return list of PerceptionResult + """ + + return self._model.batch_predict(images, cam_data, lidar_data) + + @property + def preprocessor(self): + """Get CaddnPreprocessor object of the loaded model + + :return CaddnPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get CaddnPostprocessor object of the loaded model + + :return CaddnPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py new file mode 100755 index 0000000000..07f7422be3 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py @@ -0,0 +1,92 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class CenterpointPreprocessor: + def __init__(self, config_file): + """Create a preprocessor for Centerpoint""" + self._preprocessor = C.vision.perception.CenterpointPreprocessor(config_file) + + def run(self, point_dirs, num_point_dim, with_timelag): + """Preprocess input images for Centerpoint + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(point_dirs, num_point_dim, with_timelag) + + +class Centerpoint(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a Centerpoint model exported by Centerpoint. + + :param model_file: (str)Path of model file, e.g ./Centerpoint.pdmodel + :param params_file: (str)Path of parameters file, e.g ./Centerpoint.pdiparams + :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(Centerpoint, self).__init__(runtime_option) + + self._model = C.vision.perception.Centerpoint( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "Centerpoint initialize failed." + + def predict(self, point_dir): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: PerceptionResult + """ + return self._model.predict(point_dir) + + def batch_predict(self, points_dir): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of PerceptionResult + """ + + return self._model.batch_predict(points_dir) + + @property + def preprocessor(self): + """Get CenterpointPreprocessor object of the loaded model + + :return CenterpointPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get CenterpointPostprocessor object of the loaded model + + :return CenterpointPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py new file mode 100755 index 0000000000..8ae25c5c47 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py @@ -0,0 +1,106 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class PetrPreprocessor: + def __init__(self, config_file): + """Create a preprocessor for Petr""" + self._preprocessor = C.vision.perception.PetrPreprocessor(config_file) + + def run(self, input_ims): + """Preprocess input images for Petr + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + +class PetrPostprocessor: + def __init__(self): + """Create a postprocessor for Petr""" + self._postprocessor = C.vision.perception.PetrPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for Petr + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of PerceptionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class Petr(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a SMoke model exported by Petr. + + :param model_file: (str)Path of model file, e.g ./petr.pdmodel + :param params_file: (str)Path of parameters file, e.g ./petr.pdiparams + :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(Petr, self).__init__(runtime_option) + + self._model = C.vision.perception.Petr( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "Petr initialize failed." + + def predict(self, input_image): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: PerceptionResult + """ + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of PerceptionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get PetrPreprocessor object of the loaded model + + :return PetrPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get PetrPostprocessor object of the loaded model + + :return PetrPostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py new file mode 100755 index 0000000000..d31159cb0f --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py @@ -0,0 +1,106 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class SmokePreprocessor: + def __init__(self, config_file): + """Create a preprocessor for Smoke""" + self._preprocessor = C.vision.perception.SmokePreprocessor(config_file) + + def run(self, input_ims): + """Preprocess input images for Smoke + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + +class SmokePostprocessor: + def __init__(self): + """Create a postprocessor for Smoke""" + self._postprocessor = C.vision.perception.SmokePostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for Smoke + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: list of PerceptionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class Smoke(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a SMoke model exported by Smoke. + + :param model_file: (str)Path of model file, e.g ./smoke.pdmodel + :param params_file: (str)Path of parameters file, e.g ./smoke.pdiparams + :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(Smoke, self).__init__(runtime_option) + + self._model = C.vision.perception.Smoke( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "Smoke initialize failed." + + def predict(self, input_image): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 + :return: PerceptionResult + """ + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of PerceptionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get SmokePreprocessor object of the loaded model + + :return SmokePreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get SmokePostprocessor object of the loaded model + + :return SmokePostprocessor + """ + return self._model.postprocessor diff --git a/libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py new file mode 100755 index 0000000000..9e54a8c5f3 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py @@ -0,0 +1,16 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .ppseg import * diff --git a/libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py new file mode 100755 index 0000000000..e0a36b960c --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py @@ -0,0 +1,321 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import logging +from dataclasses import dataclass +from typing import List + +import numpy as np +from skimage import morphology + +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C +from ...common import ProcessorManager +from ....py_only import PyOnlyProcessorChain +from ....py_only.vision import PyOnlyVisionModel, processors as P +from ....utils.misc import load_config + + +class PaddleSegModel(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a image segmentation model exported by PaddleSeg. + + :param model_file: (str)Path of model file, e.g unet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g unet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str) Path of configuration file for deploy, e.g unet/deploy.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PaddleSegModel, self).__init__(runtime_option) + + # assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now." + self._model = C.vision.segmentation.PaddleSegModel( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PaddleSeg model initialize failed." + + def predict(self, image): + """Predict the segmentation result for an input image + + :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: SegmentationResult + """ + return self._model.predict(image) + + def batch_predict(self, image_list): + """Predict the segmentation results for a batch of input images + + :param image_list: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: list of SegmentationResult + """ + return self._model.batch_predict(image_list) + + def clone(self): + """Clone PaddleSegModel object + + :return: a new PaddleSegModel object + """ + + class PaddleSegCloneModel(PaddleSegModel): + def __init__(self, model): + self._model = model + + clone_model = PaddleSegCloneModel(self._model.clone()) + return clone_model + + @property + def preprocessor(self): + """Get PaddleSegPreprocessor object of the loaded model + + :return: PaddleSegPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get PaddleSegPostprocessor object of the loaded model + + :return: PaddleSegPostprocessor + """ + return self._model.postprocessor + + +class PaddleSegPreprocessor(ProcessorManager): + def __init__(self, config_file): + """Create a preprocessor for PaddleSegModel from configuration file + + :param config_file: (str)Path of configuration file, e.g ppliteseg/deploy.yaml + """ + self._manager = C.vision.segmentation.PaddleSegPreprocessor(config_file) + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._manager.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._manager.disable_permute() + + @property + def is_vertical_screen(self): + """Atrribute of PP-HumanSeg model. Stating Whether the input image is vertical image(height > width), default value is False + + :return: value of is_vertical_screen(bool) + """ + return self._manager.is_vertical_screen + + @is_vertical_screen.setter + def is_vertical_screen(self, value): + """Set attribute is_vertical_screen of PP-HumanSeg model. + + :param value: (bool)The value to set is_vertical_screen + """ + assert isinstance( + value, bool + ), "The value to set `is_vertical_screen` must be type of bool." + self._manager.is_vertical_screen = value + + +class PaddleSegPostprocessor: + def __init__(self, config_file): + """Create a postprocessor for PaddleSegModel from configuration file + + :param config_file: (str)Path of configuration file, e.g ppliteseg/deploy.yaml + """ + self._postprocessor = C.vision.segmentation.PaddleSegPostprocessor(config_file) + + def run(self, runtime_results, imgs_info): + """Postprocess the runtime results for PaddleSegModel + + :param runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param imgs_info: The original input images shape info map, key is "shape_info", value is [[image_height, image_width]] + :return: list of SegmentationResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, imgs_info) + + @property + def apply_softmax(self): + """Atrribute of PaddleSeg model. Stating Whether applying softmax operator in the postprocess, default value is False + + :return: value of apply_softmax(bool) + """ + return self._postprocessor.apply_softmax + + @apply_softmax.setter + def apply_softmax(self, value): + """Set attribute apply_softmax of PaddleSeg model. + + :param value: (bool)The value to set apply_softmax + """ + assert isinstance( + value, bool + ), "The value to set `apply_softmax` must be type of bool." + self._postprocessor.apply_softmax = value + + @property + def store_score_map(self): + """Atrribute of PaddleSeg model. Stating Whether storing score map in the SegmentationResult, default value is False + + :return: value of store_score_map(bool) + """ + return self._postprocessor.store_score_map + + @store_score_map.setter + def store_score_map(self, value): + """Set attribute store_score_map of PaddleSeg model. + + :param value: (bool)The value to set store_score_map + """ + assert isinstance( + value, bool + ), "The value to set `store_score_map` must be type of bool." + self._postprocessor.store_score_map = value + + +class PyOnlyAnomalyDetectionModel(PyOnlyVisionModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + self._model_file = model_file + self._params_file = params_file + self._model_format = model_format + super().__init__(runtime_option) + self._config = load_config(config_file) + self._preprocessor = _PyOnlyAnomalyDetectionPreprocessor( + self._config["Deploy"]["transforms"] + ) + self._postprocessor = _PyOnlyAnomalyDetectionPostprocessor() + + def model_name(): + return "PyOnlyImageAnomalyDetectionModel" + + def batch_predict(self, imgs): + data_list = [] + for img in imgs: + data = {"img": img} + data = self._preprocessor.run(data) + data_list.append(data) + + input_name = self._runtime.get_input_info(0).name + imgs = np.stack([data["img"] for data in data_list], axis=0, dtype=np.float32) + imgs = np.ascontiguousarray(imgs) + output_arrs = self._runtime.infer({input_name: imgs}) + + results = [] + for score_map in output_arrs[0]: + data = {"score_map": score_map} + result = self._postprocessor.run(data) + results.append(result) + return results + + def _update_option(self): + self._option.set_model_path( + self._model_file, self._params_file, self._model_format + ) + + +class _PyOnlyAnomalyDetectionPreprocessor(object): + def __init__(self, config): + super().__init__() + processors = self._build_processors(config) + processors.append(P.ToCHWImage()) + self._processor_chain = PyOnlyProcessorChain(processors) + + def run(self, data): + return self._processor_chain(data) + + def _build_processors(self, config): + processors = [] + for item in config: + tf_type = item["type"] + args = {k: v for k, v in item.items() if k != "type"} + if tf_type == "Resize": + if args.keys() - { + "target_size", + "keep_ratio", + "size_divisor", + "interp", + }: + raise ValueError + args.setdefault("keep_ratio", False) + args.setdefault("size_divisor", None) + args.setdefault("interp", "LINEAR") + processor = P.Resize( + target_size=args["target_size"], + keep_ratio=args["keep_ratio"], + size_divisor=args["size_divisor"], + interp=args["interp"], + ) + elif tf_type == "ResizeByLong": + if args.keys() - {"long_size"}: + raise ValueError + args.setdefault("size_divisor", None) + args.setdefault("interp", "LINEAR") + processor = P.ResizeByLong(target_long_edge=args["long_size"]) + elif tf_type == "ResizeByShort": + if args.keys() - {"short_size"}: + raise ValueError + processor = P.ResizeByShort(target_short_edge=args["short_size"]) + elif tf_type == "Normalize": + if args.keys() - {"mean", "std"}: + raise ValueError + args.setdefault("mean", 0.5) + args.setdefault("std", 0.5) + processor = P.Normalize(mean=args["mean"], std=args["std"]) + else: + raise ValueError("Unknown transform type") + processors.append(processor) + return processors + + +class _PyOnlyAnomalyDetectionPostprocessor(object): + def run(self, data): + score_map = data["score_map"] + + thred = 0.01 + mask = score_map[0] + mask[mask > thred] = 255 + mask[mask <= thred] = 0 + kernel = morphology.disk(4) + mask = morphology.opening(mask, kernel) + mask = mask.astype(np.uint8) + + result = _PyOnlyAnomalyDetectionResult( + label_map=mask.reshape((-1)).tolist(), shape=list(mask.shape) + ) + return result + + +@dataclass +class _PyOnlyAnomalyDetectionResult(object): + label_map: List[int] + shape: List[int] diff --git a/libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py new file mode 100755 index 0000000000..39cd8d3776 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from .ppsr import PPMSVSR, EDVR, BasicVSR diff --git a/libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py new file mode 100755 index 0000000000..23474e57e0 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py @@ -0,0 +1,122 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class PPMSVSR(UltraInferModel): + def __init__( + self, + model_file, + params_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a VSR model exported by PaddleGAN. + + :param model_file: (str)Path of model file, e.g PPMSVSR/inference.pdmodel + :param params_file: (str)Path of parameters file, e.g PPMSVSR/inference.pdiparams + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PPMSVSR, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PPMSVSR model only support model format of ModelFormat.Paddle now." + self._model = C.vision.sr.PPMSVSR( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "PPMSVSR model initialize failed." + + def predict(self, input_images): + """Predict the super resolution frame sequences for an input frame sequences + + :param input_images: list[numpy.ndarray] The input image data, 3-D array with layout HWC, BGR format + :return: list[numpy.ndarray] + """ + assert input_images is not None, "The input image data is None." + return self._model.predict(input_images) + + +class EDVR(PPMSVSR): + def __init__( + self, + model_file, + params_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a EDVR model exported by PaddleGAN. + + :param model_file: (str)Path of model file, e.g EDVR/inference.pdmodel + :param params_file: (str)Path of parameters file, e.g EDVR/inference.pdiparams + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PPMSVSR, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "EDVR model only support model format of ModelFormat.Paddle now." + self._model = C.vision.sr.EDVR( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "EDVR model initialize failed." + + def predict(self, input_images): + """Predict the super resolution frame sequences for an input frame sequences + + :param input_images: list[numpy.ndarray] The input image data, 3-D array with layout HWC, BGR format + :return: list[numpy.ndarray] + """ + assert input_images is not None, "The input image data is None." + return self._model.predict(input_images) + + +class BasicVSR(PPMSVSR): + def __init__( + self, + model_file, + params_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a EDVR model exported by PaddleGAN. + + :param model_file: (str)Path of model file, e.g BasicVSR/inference.pdmodel + :param params_file: (str)Path of parameters file, e.g BasicVSR/inference.pdiparams + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PPMSVSR, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "BasicVSR model only support model format of ModelFormat.Paddle now." + self._model = C.vision.sr.BasicVSR( + model_file, params_file, self._runtime_option, model_format + ) + assert self.initialized, "BasicVSR model initialize failed." + + def predict(self, input_images): + """Predict the super resolution frame sequences for an input frame sequences + + :param input_images: list[numpy.ndarray] The input image data, 3-D array with layout HWC, BGR format + :return: list[numpy.ndarray] + """ + assert input_images is not None, "The input image data is None." + return self._model.predict(input_images) diff --git a/libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py new file mode 100755 index 0000000000..3ebcf61aeb --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py @@ -0,0 +1,21 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from ... import c_lib_wrap as C +from .pptracking import PPTracking + +try: + TrailRecorder = C.vision.tracking.TrailRecorder +except: + pass diff --git a/libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py new file mode 100755 index 0000000000..9724d4cb84 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py @@ -0,0 +1,69 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .... import UltraInferModel, ModelFormat +from .... import c_lib_wrap as C + + +class PPTracking(UltraInferModel): + def __init__( + self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE, + ): + """Load a PPTracking model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g pptracking/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ppyoloe/model.pdiparams + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model + """ + super(PPTracking, self).__init__(runtime_option) + + assert ( + model_format == ModelFormat.PADDLE + ), "PPTracking model only support model format of ModelFormat.Paddle now." + self._model = C.vision.tracking.PPTracking( + model_file, params_file, config_file, self._runtime_option, model_format + ) + assert self.initialized, "PPTracking model initialize failed." + + def predict(self, input_image): + """Predict the MOT result for an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: MOTResult + """ + assert input_image is not None, "The input image data is None." + return self._model.predict(input_image) + + def bind_recorder(self, val): + """Binding tracking trail + + :param val: (TrailRecorder) trail recorder, which is contained object's id and center point sequence + :return: None + """ + self._model.bind_recorder(val) + + def unbind_recorder(self): + """cancel binding of tracking trail + + :return: + """ + self._model.unbind_recorder() diff --git a/libs/ultrainfer/python/ultrainfer/vision/utils.py b/libs/ultrainfer/python/ultrainfer/vision/utils.py new file mode 100755 index 0000000000..b33e443743 --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/utils.py @@ -0,0 +1,290 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +import json +from .. import c_lib_wrap as C + + +def mask_to_json(result): + r_json = { + "data": result.data, + "shape": result.shape, + } + return json.dumps(r_json) + + +def detection_to_json(result): + masks = [] + for mask in result.masks: + masks.append(mask_to_json(mask)) + r_json = { + "boxes": result.boxes, + "scores": result.scores, + "label_ids": result.label_ids, + "masks": masks, + "contain_masks": result.contain_masks, + } + return json.dumps(r_json) + + +def perception_to_json(result): + r_json = { + "scores": result.scores, + "label_ids": result.label_ids, + "boxes": result.boxes, + "center": result.center, + "observation_angle": result.observation_angle, + "yaw_angle": result.yaw_angle, + "velocity": result.velocity, + } + return json.dumps(r_json) + + +def classify_to_json(result): + r_json = { + "label_ids": result.label_ids, + "scores": result.scores, + } + return json.dumps(r_json) + + +def keypoint_to_json(result): + r_json = { + "keypoints": result.keypoints, + "scores": result.scores, + "num_joints": result.num_joints, + } + return json.dumps(r_json) + + +def ocr_to_json(result): + r_json = { + "boxes": result.boxes, + "text": result.text, + "rec_scores": result.rec_scores, + "cls_scores": result.cls_scores, + "cls_labels": result.cls_labels, + } + return json.dumps(r_json) + + +def mot_to_json(result): + r_json = { + "boxes": result.boxes, + "ids": result.ids, + "scores": result.scores, + "class_ids": result.class_ids, + } + return json.dumps(r_json) + + +def face_detection_to_json(result): + r_json = { + "boxes": result.boxes, + "landmarks": result.landmarks, + "scores": result.scores, + "landmarks_per_face": result.landmarks_per_face, + } + return json.dumps(r_json) + + +def face_alignment_to_json(result): + r_json = { + "landmarks": result.landmarks, + } + return json.dumps(r_json) + + +def face_recognition_to_json(result): + r_json = { + "embedding": result.embedding, + } + return json.dumps(r_json) + + +def segmentation_to_json(result): + r_json = { + "label_map": result.label_map, + "score_map": result.score_map, + "shape": result.shape, + "contain_score_map": result.contain_score_map, + } + return json.dumps(r_json) + + +def matting_to_json(result): + r_json = { + "alpha": result.alpha, + "foreground": result.foreground, + "shape": result.shape, + "contain_foreground": result.contain_foreground, + } + return json.dumps(r_json) + + +def head_pose_to_json(result): + r_json = { + "euler_angles": result.euler_angles, + } + return json.dumps(r_json) + + +def fd_result_to_json(result): + if isinstance(result, list): + r_list = [] + for r in result: + r_list.append(fd_result_to_json(r)) + return r_list + elif isinstance(result, C.vision.DetectionResult): + return detection_to_json(result) + elif isinstance(result, C.vision.Mask): + return mask_to_json(result) + elif isinstance(result, C.vision.ClassifyResult): + return classify_to_json(result) + elif isinstance(result, C.vision.KeyPointDetectionResult): + return keypoint_to_json(result) + elif isinstance(result, C.vision.OCRResult): + return ocr_to_json(result) + elif isinstance(result, C.vision.MOTResult): + return mot_to_json(result) + elif isinstance(result, C.vision.FaceDetectionResult): + return face_detection_to_json(result) + elif isinstance(result, C.vision.FaceAlignmentResult): + return face_alignment_to_json(result) + elif isinstance(result, C.vision.FaceRecognitionResult): + return face_recognition_to_json(result) + elif isinstance(result, C.vision.SegmentationResult): + return segmentation_to_json(result) + elif isinstance(result, C.vision.MattingResult): + return matting_to_json(result) + elif isinstance(result, C.vision.HeadPoseResult): + return head_pose_to_json(result) + elif isinstance(result, C.vision.PerceptionResult): + return perception_to_json(result) + else: + assert False, "{} Conversion to JSON format is not supported".format( + type(result) + ) + return {} + + +def json_to_mask(result): + mask = C.vision.Mask() + mask.data = result["data"] + mask.shape = result["shape"] + return mask + + +def json_to_detection(result): + masks = [] + for mask in result["masks"]: + masks.append(json_to_mask(json.loads(mask))) + det_result = C.vision.DetectionResult() + det_result.boxes = result["boxes"] + det_result.scores = result["scores"] + det_result.label_ids = result["label_ids"] + det_result.masks = masks + det_result.contain_masks = result["contain_masks"] + return det_result + + +def json_to_perception(result): + perception_result = C.vision.PerceptionResult() + perception_result.scores = result["scores"] + perception_result.label_ids = result["label_ids"] + perception_result.boxes = result["boxes"] + perception_result.center = result["center"] + perception_result.observation_angle = result["observation_angle"] + perception_result.yaw_angle = result["yaw_angle"] + perception_result.velocity = result["velocity"] + return perception_result + + +def json_to_classify(result): + cls_result = C.vision.ClassifyResult() + cls_result.label_ids = result["label_ids"] + cls_result.scores = result["scores"] + return cls_result + + +def json_to_keypoint(result): + kp_result = C.vision.KeyPointDetectionResult() + kp_result.keypoints = result["keypoints"] + kp_result.scores = result["scores"] + kp_result.num_joints = result["num_joints"] + return kp_result + + +def json_to_ocr(result): + ocr_result = C.vision.OCRResult() + ocr_result.boxes = result["boxes"] + ocr_result.text = result["text"] + ocr_result.rec_scores = result["rec_scores"] + ocr_result.cls_scores = result["cls_scores"] + ocr_result.cls_labels = result["cls_labels"] + return ocr_result + + +def json_to_mot(result): + mot_result = C.vision.MOTResult() + mot_result.boxes = result["boxes"] + mot_result.ids = result["ids"] + mot_result.scores = result["scores"] + mot_result.class_ids = result["class_ids"] + return mot_result + + +def json_to_face_detection(result): + face_result = C.vision.FaceDetectionResult() + face_result.boxes = result["boxes"] + face_result.landmarks = result["landmarks"] + face_result.scores = result["scores"] + face_result.landmarks_per_face = result["landmarks_per_face"] + return face_result + + +def json_to_face_alignment(result): + face_result = C.vision.FaceAlignmentResult() + face_result.landmarks = result["landmarks"] + return face_result + + +def json_to_face_recognition(result): + face_result = C.vision.FaceRecognitionResult() + face_result.embedding = result["embedding"] + return face_result + + +def json_to_segmentation(result): + seg_result = C.vision.SegmentationResult() + seg_result.label_map = result["label_map"] + seg_result.score_map = result["score_map"] + seg_result.shape = result["shape"] + seg_result.contain_score_map = result["contain_score_map"] + return seg_result + + +def json_to_matting(result): + matting_result = C.vision.MattingResult() + matting_result.alpha = result["alpha"] + matting_result.foreground = result["foreground"] + matting_result.shape = result["shape"] + matting_result.contain_foreground = result["contain_foreground"] + return matting_result + + +def json_to_head_pose(result): + hp_result = C.vision.HeadPoseResult() + hp_result.euler_angles = result["euler_angles"] + return hp_result diff --git a/libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py new file mode 100755 index 0000000000..15730ddeaa --- /dev/null +++ b/libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py @@ -0,0 +1,229 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from ... import c_lib_wrap as C +import cv2 + + +def vis_detection( + im_data, + det_result, + labels=[], + score_threshold=0.0, + line_size=1, + font_size=0.5, + font_color=[255, 255, 255], + font_thickness=1, +): + """Show the visualized results for detection models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param det_result: the result produced by model + :param labels: (list of str) the visualized result will show the bounding box contain class label + :param score_threshold: (float) score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold + :param line_size: (float) line_size line size for bounding boxes + :param font_size: (float) font_size font size for text + :param font_color: (list of int) font_color for text + :param font_thickness: (int) font_thickness for text + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_detection( + im_data, + det_result, + labels, + score_threshold, + line_size, + font_size, + font_color, + font_thickness, + ) + + +def vis_perception( + im_data, det_result, config_file, score_threshold=0.0, line_size=1, font_size=0.5 +): + """Show the visualized results for 3d detection models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param det_result: the result produced by model + :param config_file: the config file for detection and visualization + :param score_threshold: (float) score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold + :param line_size: (float) line_size line size for bounding boxes + :param font_size: (float) font_size font size for text + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_perception( + im_data, det_result, config_file, score_threshold, line_size, font_size + ) + + +def vis_keypoint_detection(im_data, keypoint_det_result, conf_threshold=0.5): + """Show the visualized results for keypoint detection models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param keypoint_det_result: the result produced by model + :param conf_threshold: (float) conf_threshold threshold for result scores, the bounding box will not be shown if the score is less than conf_threshold + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.Visualize.vis_keypoint_detection( + im_data, keypoint_det_result, conf_threshold + ) + + +def vis_face_detection(im_data, face_det_result, line_size=1, font_size=0.5): + """Show the visualized results for face detection models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param face_det_result: the result produced by model + :param line_size: (float) line_size line size for bounding boxes + :param font_size: (float) font_size font size for text + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_face_detection(im_data, face_det_result, line_size, font_size) + + +def vis_face_alignment(im_data, face_align_result, line_size=1): + """Show the visualized results for face alignment models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param face_align_result: the result produced by model + :param line_size: (float)line_size line size for circle point + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_face_alignment(im_data, face_align_result, line_size) + + +def vis_segmentation(im_data, seg_result, weight=0.5): + """Show the visualized results for segmentation models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param seg_result: the result produced by model + :param weight: (float)transparent weight of visualized result image + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_segmentation(im_data, seg_result, weight) + + +def vis_matting_alpha(im_data, matting_result, remove_small_connected_area=False): + logging.warning( + "DEPRECATED: ultrainfer.vision.vis_matting_alpha is deprecated, please use ultrainfer.vision.vis_matting function instead." + ) + return C.vision.vis_matting(im_data, matting_result, remove_small_connected_area) + + +def vis_matting( + im_data, + matting_result, + transparent_background=False, + transparent_threshold=0.99, + remove_small_connected_area=False, +): + """Show the visualized results for matting models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param matting_result: the result produced by model + :param transparent_background: whether visulizing matting result with transparent background + :param transparent_threshold: since the alpha value in MattringResult is a float between [0, 1], transparent_threshold is used to filter background pixel + :param remove_small_connected_area: (bool) if remove_small_connected_area==True, the visualized result will not include the small connected areas + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_matting( + im_data, + matting_result, + transparent_background, + transparent_threshold, + remove_small_connected_area, + ) + + +def swap_background_matting( + im_data, background, result, remove_small_connected_area=False +): + logging.warning( + "DEPRECATED: ultrainfer.vision.swap_background_matting is deprecated, please use ultrainfer.vision.swap_background function instead." + ) + assert isinstance( + result, C.vision.MattingResult + ), "The result must be MattingResult type" + return C.vision.Visualize.swap_background_matting( + im_data, background, result, remove_small_connected_area + ) + + +def swap_background_segmentation(im_data, background, background_label, result): + logging.warning( + "DEPRECATED: ultrainfer.vision.swap_background_segmentation is deprecated, please use ultrainfer.vision.swap_background function instead." + ) + assert isinstance( + result, C.vision.SegmentationResult + ), "The result must be SegmentaitonResult type" + return C.vision.Visualize.swap_background_segmentation( + im_data, background, background_label, result + ) + + +def swap_background( + im_data, background, result, remove_small_connected_area=False, background_label=0 +): + """Swap the image background with MattingResult or SegmentationResult + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param background: (numpy.ndarray)The background image data, 3-D array with layout HWC, BGR format + :param result: The result produced by model, MattingResult or SegmentationResult + :param remove_small_connected_area: (bool) If remove_small_connected_area==True, the visualized result will not include the small connected areas + :param background_label: (int)The background label number in SegmentationResult + :return: (numpy.ndarray) image with visualized results + """ + if isinstance(result, C.vision.MattingResult): + return C.vision.swap_background( + im_data, background, result, remove_small_connected_area + ) + elif isinstance(result, C.vision.SegmentationResult): + return C.vision.swap_background(im_data, background, result, background_label) + else: + raise Exception( + "Only support result type of MattingResult or SegmentationResult, but now the data type is {}.".format( + type(result) + ) + ) + + +def vis_ppocr(im_data, det_result): + """Show the visualized results for ocr models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param det_result: the result produced by model + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_ppocr(im_data, det_result) + + +def vis_ppocr_curve(im_data, det_result): + """Show the visualized results for ocr models + + :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param det_result: the result produced by model + :return: (numpy.ndarray) image with visualized results + """ + return C.vision.vis_ppocr_curve(im_data, det_result) + + +def vis_mot(im_data, mot_result, score_threshold=0.0, records=None): + return C.vision.vis_mot(im_data, mot_result, score_threshold, records) + + +def vis_headpose(im_data, headpose_result, size=50, line_size=1): + return C.vision.vis_headpose(im_data, headpose_result, size, line_size) diff --git a/libs/ultrainfer/scripts/__init__.py b/libs/ultrainfer/scripts/__init__.py new file mode 100755 index 0000000000..59372f9379 --- /dev/null +++ b/libs/ultrainfer/scripts/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/libs/ultrainfer/scripts/ascend_init.sh b/libs/ultrainfer/scripts/ascend_init.sh new file mode 100755 index 0000000000..e88150d3de --- /dev/null +++ b/libs/ultrainfer/scripts/ascend_init.sh @@ -0,0 +1,13 @@ +# Set huawei ascend toolkit correctly. +HUAWEI_ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest" +HUAWEI_ASCEND_DRIVER_PATH="/usr/local/Ascend/driver" +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HUAWEI_ASCEND_DRIVER_PATH/lib64/driver:$HUAWEI_ASCEND_DRIVER_PATH/lib64:$HUAWEI_ASCEND_DRIVER_PATH/lib64/stub:$HUAWEI_ASCEND_TOOLKIT_HOME/acllib/lib64:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/lib64:$HUAWEI_ASCEND_TOOLKIT_HOME/opp/op_proto/built-in +export PYTHONPATH=$PYTHONPATH:$HUAWEI_ASCEND_TOOLKIT_HOME/fwkacllib/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/acllib/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/toolkit/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/pyACL/python/site-packages/acl +export PATH=$PATH:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/ccec_compiler/bin:${HUAWEI_ASCEND_TOOLKIT_HOME}/acllib/bin:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/bin +export ASCEND_AICPU_PATH=$HUAWEI_ASCEND_TOOLKIT_HOME +export ASCEND_OPP_PATH=$HUAWEI_ASCEND_TOOLKIT_HOME/opp +export TOOLCHAIN_HOME=$HUAWEI_ASCEND_TOOLKIT_HOME/toolkit +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +echo "===== Finish Initializing Environment for Ascend Deployment =====" diff --git a/libs/ultrainfer/scripts/build_bcloud_lib.py b/libs/ultrainfer/scripts/build_bcloud_lib.py new file mode 100755 index 0000000000..e896a3c563 --- /dev/null +++ b/libs/ultrainfer/scripts/build_bcloud_lib.py @@ -0,0 +1,41 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import shutil + +dirname = sys.argv[1] +bc_dirname = sys.argv[2] + +if os.path.exists(bc_dirname): + raise Exception("Path {} is already exists.".format(bc_dirname)) + +os.makedirs(bc_dirname) + +# copy include files +shutil.copytree(os.path.join(dirname, "include"), os.path.join(bc_dirname, "include")) + +# copy libraries +shutil.copytree(os.path.join(dirname, "lib"), os.path.join(bc_dirname, "lib")) + +third_libs = os.path.join(dirname, "third_libs") + +for root, dirs, files in os.walk(third_libs): + for f in files: + if f.strip().count(".so") > 0 or f.strip() == "plugins.xml": + full_path = os.path.join(root, f) + shutil.copy( + full_path, os.path.join(bc_dirname, "lib"), follow_symlinks=False + ) diff --git a/libs/ultrainfer/scripts/clean_sdk.sh b/libs/ultrainfer/scripts/clean_sdk.sh new file mode 100755 index 0000000000..d01730df0d --- /dev/null +++ b/libs/ultrainfer/scripts/clean_sdk.sh @@ -0,0 +1,7 @@ +origin_install_dir=$1 + +rm -rf $origin_install_dir/include/onnx $origin_install_dir/include/paddle2onnx +mv $origin_install_dir/lib $origin_install_dir/lib_bak +mkdir $origin_install_dir/lib +cp $origin_install_dir/lib_bak/*ultrainfer* $origin_install_dir/lib +rm -rf $origin_install_dir/lib_bak diff --git a/libs/ultrainfer/scripts/copy_directory.py b/libs/ultrainfer/scripts/copy_directory.py new file mode 100755 index 0000000000..f4363ba4a8 --- /dev/null +++ b/libs/ultrainfer/scripts/copy_directory.py @@ -0,0 +1,32 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import os +import sys + + +def copy_directory(src, dst): + if os.path.exists(dst): + raise Exception("Destination {} is already exist.".format(dst)) + if not os.path.exists(src): + raise Exception("Source {} is not exist.".format(src)) + try: + shutil.copytree(src, dst, symlinks=True) + except: + raise Exception("Copy {} to {} failed.".format(src, dst)) + + +if __name__ == "__main__": + copy_directory(sys.argv[1], sys.argv[2]) diff --git a/libs/ultrainfer/scripts/linux/_build_cpp.sh b/libs/ultrainfer/scripts/linux/_build_cpp.sh new file mode 100755 index 0000000000..8c5a819cdc --- /dev/null +++ b/libs/ultrainfer/scripts/linux/_build_cpp.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +set -e + +TRT_VERSION='8.5.2.2' +CUDA_VERSION='11.8' +CUDNN_VERSION='8.6' + +# deal cmd input +while [[ "$#" -gt 0 ]]; do + case "$1" in + --with-gpu) WITH_GPU="$2"; shift ;; + --enable-benchmark) ENABLE_BENCHMARK="$2"; shift ;; + --paddleinference-url) PADDLEINFERENCE_URL="$2"; shift ;; + --paddleinference-version) PADDLEINFERENCE_VERSION="$2"; shift ;; + *) echo "Unknown parameter passed: $1"; exit 1 ;; + esac + shift +done + +export DEBIAN_FRONTEND='noninteractive' +export TZ='Asia/Shanghai' +export CC=/usr/local/gcc-8.2/bin/gcc +export CXX=/usr/local/gcc-8.2/bin/g++ + +cd /workspace/ultrainfer + +wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.cloud.tencent.com/repo/centos7_base.repo +yum clean all +yum makecache + +yum install -y patchelf rapidjson-devel + +ln -sf /opt/_internal/cpython-3.10.0/bin/python3.10 /usr/bin/python +ln -sf /opt/_internal/cpython-3.10.0/bin/pip3.10 /usr/bin/pip + +export LD_LIBRARY_PATH=/opt/_internal/cpython-3.10.0/lib:${LD_LIBRARY_PATH} +export PATH=/opt/_internal/cpython-3.10.0/bin:${PATH} + +rm -rf "TensorRT-${TRT_VERSION}" "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz" +http_proxy= https_proxy= wget "https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz" +tar -xzvf "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz" + +( + cd /workspace/ultrainfer + rm -rf build && mkdir build && cd build + unset http_proxy https_proxy + cmake \ + -DLIBRARY_NAME='ultrainfer_runtime' \ + -DCMAKE_INSTALL_PREFIX="${PWD}/ultrainfer_install" \ + -DWITH_GPU="${WITH_GPU}" \ + -DENABLE_TRT_BACKEND="${WITH_GPU}" \ + -DTRT_DIRECTORY="/workspace/ultrainfer/TensorRT-${TRT_VERSION}" \ + -DENABLE_ORT_BACKEND=ON \ + -DENABLE_PADDLE_BACKEND=ON \ + -DPADDLEINFERENCE_URL="${PADDLEINFERENCE_URL}" \ + -DPADDLEINFERENCE_VERSION="${PADDLEINFERENCE_VERSION}" \ + -DENABLE_OPENVINO_BACKEND=ON \ + -DENABLE_VISION=ON \ + -DENABLE_TEXT=ON \ + -DBUILD_ULTRAINFER_PYTHON=OFF \ + -DBUILD_FD_TRITON_BACKEND=ON \ + -DENABLE_BENCHMARK="${ENABLE_BENCHMARK}" \ + .. + make -j"$(nproc)" + make install +) diff --git a/libs/ultrainfer/scripts/linux/_build_py.sh b/libs/ultrainfer/scripts/linux/_build_py.sh new file mode 100755 index 0000000000..de06b7949c --- /dev/null +++ b/libs/ultrainfer/scripts/linux/_build_py.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +set -e + +TRT_VERSION='8.5.2.2' +CUDA_VERSION='11.8' +CUDNN_VERSION='8.6' + +while [[ "$#" -gt 0 ]]; do + case "$1" in + --with-gpu) WITH_GPU="$2"; shift ;; + --enable-benchmark) ENABLE_BENCHMARK="$2"; shift ;; + --python) PYTHON_VERSION="$2"; shift ;; + --paddleinference-url) PADDLEINFERENCE_URL="$2"; shift ;; + --paddleinference-version) PADDLEINFERENCE_VERSION="$2"; shift ;; + *) echo "Unknown parameter passed: $1"; exit 1 ;; + esac + shift +done + +export DEBIAN_FRONTEND='noninteractive' +export TZ='Asia/Shanghai' + +cd /workspace + +wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.cloud.tencent.com/repo/centos7_base.repo +yum clean all +yum makecache + +yum --disablerepo=epel -y update ca-certificates +yum install -y wget bzip2 +yum install -y epel-release +yum install -y patchelf rapidjson-devel + +PYTHON_DIR="/opt/_internal/cpython-${PYTHON_VERSION}" +if [ -d "$PYTHON_DIR" ]; then + ln -sf "${PYTHON_DIR}/bin/python${PYTHON_VERSION}" /usr/bin/python + ln -sf "${PYTHON_DIR}/bin/pip${PYTHON_VERSION}" /usr/bin/pip + export LD_LIBRARY_PATH="${PYTHON_DIR}/lib:${LD_LIBRARY_PATH}" + export PATH="${PYTHON_DIR}/bin:${PATH}" +else + echo "Python version ${PYTHON_VERSION} not found in ${PYTHON_DIR}." + exit 1 +fi + +python -m pip install numpy pandas + +cd /workspace/ultrainfer + +rm -rf "TensorRT-${TRT_VERSION}" "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz" +http_proxy= https_proxy= wget "https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz" +tar -xzvf "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz" + +export WITH_GPU="${WITH_GPU}" +export ENABLE_TRT_BACKEND="${WITH_GPU}" +export TRT_DIRECTORY="/workspace/ultrainfer/TensorRT-${TRT_VERSION}" +export ENABLE_ORT_BACKEND=ON +export ENABLE_PADDLE_BACKEND=ON +export PADDLEINFERENCE_URL="${PADDLEINFERENCE_URL}" +export PADDLEINFERENCE_VERSION="${PADDLEINFERENCE_VERSION}" +export ENABLE_OPENVINO_BACKEND=ON +export ENABLE_VISION=ON +export ENABLE_TEXT=ON +export ENABLE_BENCHMARK="${ENABLE_BENCHMARK}" +export CC=/usr/local/gcc-8.2/bin/gcc +export CXX=/usr/local/gcc-8.2/bin/g++ + +cd /workspace/ultrainfer/python +python -m pip install wheel +unset http_proxy https_proxy + +rm -rf .setuptools-cmake-build build ultrainfer/libs/third_libs dist +python setup.py build +# HACK +patchelf \ + --set-rpath '$ORIGIN/libs/third_libs/onnxruntime/lib:$ORIGIN/libs/third_libs/paddle2onnx/lib:$ORIGIN/libs/third_libs/paddle_inference/paddle/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/cryptopp/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/mklml/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/glog/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/protobuf/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/utf8proc/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/xxhash/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/gflags/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/onednn/lib:$ORIGIN/libs/third_libs/tensorrt/lib:$ORIGIN/libs/third_libs/opencv/lib64:$ORIGIN/libs/third_libs/openvino/runtime/lib:$ORIGIN/libs/third_libs/openvino/runtime/3rdparty/omp/lib' \ + build/lib.*/ultrainfer/ultrainfer_main*.so +python setup.py bdist_wheel diff --git a/libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh new file mode 100755 index 0000000000..e091e8aab4 --- /dev/null +++ b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# input +CONTAINER_NAME="${CONTAINER_NAME:-build_fd}" +WITH_GPU="${WITH_GPU:-ON}" +ENABLE_BENCHMARK="${ENABLE_BENCHMARK:-OFF}" +DEBUG="${DEBUG:-OFF}" + +DOCKER_IMAGE="ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle_manylinux_devel:cuda11.8-cudnn8.6-trt8.5-gcc8.2" + +if [[ -z "$PADDLEINFERENCE_URL" ]]; then + echo "Error: PADDLEINFERENCE_URL is not set." + exit 1 +fi + +if [[ -z "$PADDLEINFERENCE_VERSION" ]]; then + echo "Error: PADDLEINFERENCE_VERSION is not set." + exit 1 +fi + +# Set variables +CMAKE_CXX_COMPILER="/usr/local/gcc-8.2/bin/g++" + +# Get the current script directory and compute the directory to mount +SCRIPT_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" +ULTRAINFER_DIR="$(realpath "$SCRIPT_DIR/../../../")" + +# Set the Docker startup command +if [ "$WITH_GPU" = "ON" ]; then + DOCKER_CMD=$(cat << EOF +docker run --gpus all -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \ +-v "${ULTRAINFER_DIR}":/workspace \ +-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \ +-e "http_proxy=${http_proxy}" \ +-e "https_proxy=${https_proxy}" \ +"${DOCKER_IMAGE}" /bin/bash -c " +ldconfig && \ +cd /workspace && \ +./ultrainfer/scripts/linux/_build_cpp.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \ +tail -f /dev/null" +EOF +) +else + DOCKER_CMD=$(cat << EOF +docker run -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \ +-v "${ULTRAINFER_DIR}":/workspace \ +-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \ +-e "http_proxy=${http_proxy}" \ +-e "https_proxy=${https_proxy}" \ +"${DOCKER_IMAGE}" /bin/bash -c " +cd /workspace && \ +./ultrainfer/scripts/linux/_build_cpp.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \ +tail -f /dev/null" +EOF +) +fi + +# If in debug mode, replace --rm with -it and keep the container running +if [ "$DEBUG" = "OFF" ]; then + DOCKER_CMD="${DOCKER_CMD/-it/--rm}" + DOCKER_CMD="${DOCKER_CMD/ && tail -f \/dev\/null/}" +fi + +# Check if a Docker container with the same name already exists +if docker ps -a --format '{{.Names}}' | grep -Eq "^${CONTAINER_NAME}\$"; then + echo "Error: A Docker container with the name '${CONTAINER_NAME}' already exists." + echo "Please remove the existing container or choose a different container name." + exit 1 +fi + +echo "Starting Docker container..." +eval "$DOCKER_CMD" diff --git a/libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh new file mode 100755 index 0000000000..bec7f6028e --- /dev/null +++ b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +# input +CONTAINER_NAME="${CONTAINER_NAME:-build_fd}" +WITH_GPU="${WITH_GPU:-ON}" +ENABLE_BENCHMARK="${ENABLE_BENCHMARK:-OFF}" +DEBUG="${DEBUG:-OFF}" +PYTHON_VERSION="${PYTHON_VERSION:-3.10.0}" + +DOCKER_IMAGE="ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle_manylinux_devel:cuda11.8-cudnn8.6-trt8.5-gcc8.2" + +if [[ -z "$PADDLEINFERENCE_URL" ]]; then + echo "Error: PADDLEINFERENCE_URL is not set." + exit 1 +fi + +if [[ -z "$PADDLEINFERENCE_VERSION" ]]; then + echo "Error: PADDLEINFERENCE_VERSION is not set." + exit 1 +fi + +# Set variables +CMAKE_CXX_COMPILER="/usr/local/gcc-8.2/bin/g++" + +# Get the current script directory and compute the directory to mount +SCRIPT_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" +ULTRAINFER_DIR="$(realpath "$SCRIPT_DIR/../../../")" + +# Set the Docker startup command +if [ "$WITH_GPU" = "ON" ]; then + DOCKER_CMD=$(cat << EOF +docker run --gpus all -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \ +-v "${ULTRAINFER_DIR}":/workspace \ +-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \ +-e "http_proxy=${http_proxy}" \ +-e "https_proxy=${https_proxy}" \ +"${DOCKER_IMAGE}" /bin/bash -c " +cd /workspace && \ +ldconfig && \ +./ultrainfer/scripts/linux/_build_py.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --python "${PYTHON_VERSION}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \ +tail -f /dev/null" +EOF +) +else + DOCKER_CMD=$(cat << EOF +docker run -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \ +-v "${ULTRAINFER_DIR}":/workspace \ +-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \ +-e "http_proxy=${http_proxy}" \ +-e "https_proxy=${https_proxy}" \ +"${DOCKER_IMAGE}" /bin/bash -c " +cd /workspace && \ +./ultrainfer/scripts/linux/_build_py.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --python "${PYTHON_VERSION}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \ +tail -f /dev/null" +EOF +) +fi + +# If in debug mode, replace --rm with -it and keep the container running +if [ "$DEBUG" = "OFF" ]; then + DOCKER_CMD="${DOCKER_CMD/-it/--rm}" + DOCKER_CMD="${DOCKER_CMD/ && tail -f \/dev\/null/}" +fi + +# Check if a Docker container with the same name already exists +if docker ps -a --format '{{.Names}}' | grep -Eq "^${CONTAINER_NAME}\$"; then + echo "Error: A Docker container with the name '${CONTAINER_NAME}' already exists." + echo "Please remove the existing container or choose a different container name." + exit 1 +fi + +echo "Starting Docker container..." +eval "$DOCKER_CMD" diff --git a/libs/ultrainfer/scripts/patch_lib.sh b/libs/ultrainfer/scripts/patch_lib.sh new file mode 100755 index 0000000000..952c838d8e --- /dev/null +++ b/libs/ultrainfer/scripts/patch_lib.sh @@ -0,0 +1,15 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +patchelf --set-rpath '${ORIGIN}' $1/*.so* diff --git a/libs/ultrainfer/scripts/patch_paddle_inference.py b/libs/ultrainfer/scripts/patch_paddle_inference.py new file mode 100755 index 0000000000..1fae19e707 --- /dev/null +++ b/libs/ultrainfer/scripts/patch_paddle_inference.py @@ -0,0 +1,52 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil +import subprocess +import platform +import sys + + +def process_paddle_inference(paddle_inference_so_file, paddle_inference_version): + if platform.system().lower() != "linux": + return + rpaths = [ + "$ORIGIN", + "$ORIGIN/../../third_party/install/mklml/lib/", + "$ORIGIN/../../third_party/install/xpu/lib/", + "$ORIGIN/../../third_party/install/fdmodel/lib/", + "$ORIGIN/../../../tensorrt/lib/", + ] + version_major = int(paddle_inference_version.split(".")[0]) + if paddle_inference_version != "0.0.0" and version_major < 2: + raise ValueError("Invalid Paddle Inference version") + if version_major == 2: + rpaths.append("$ORIGIN/../../third_party/install/mkldnn/lib/") + else: + rpaths.append("$ORIGIN/../../third_party/install/onednn/lib/") + + patchelf_exe = os.getenv("PATCHELF_EXE", "patchelf") + command = "{} --force-rpath --set-rpath '{}' {}".format( + patchelf_exe, ":".join(rpaths), paddle_inference_so_file + ) + if platform.machine() != "sw_64" and platform.machine() != "mips64": + assert os.system(command) == 0, "patchelf {} failed, the command: {}".format( + paddle_inference_so_file, command + ) + + +if __name__ == "__main__": + process_paddle_inference(sys.argv[1], sys.argv[2]) diff --git a/libs/ultrainfer/scripts/patch_paddle_lite.py b/libs/ultrainfer/scripts/patch_paddle_lite.py new file mode 100755 index 0000000000..39ad9abcf0 --- /dev/null +++ b/libs/ultrainfer/scripts/patch_paddle_lite.py @@ -0,0 +1,44 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil +import subprocess +import platform +import sys + + +def process_paddle_lite(paddle_lite_so_path): + if platform.system().lower() != "linux": + return + rpaths = ["$ORIGIN", "$ORIGIN/mklml/lib/"] + patchelf_exe = os.getenv("PATCHELF_EXE", "patchelf") + for root, dirs, files in os.walk(paddle_lite_so_path): + for lib in files: + if ".so" in lib: + paddle_lite_so_file = os.path.join(root, lib) + command = "{} --set-rpath '{}' {}".format( + patchelf_exe, ":".join(rpaths), paddle_lite_so_file + ) + if platform.machine() != "sw_64" and platform.machine() != "mips64": + assert ( + os.system(command) == 0 + ), "patchelf {} failed, the command: {}".format( + paddle_lite_so_file, command + ) + + +if __name__ == "__main__": + process_paddle_lite(sys.argv[1]) diff --git a/libs/ultrainfer/scripts/ultrainfer_init.bat b/libs/ultrainfer/scripts/ultrainfer_init.bat new file mode 100755 index 0000000000..845070b201 --- /dev/null +++ b/libs/ultrainfer/scripts/ultrainfer_init.bat @@ -0,0 +1,167 @@ +@echo off + +set __script_action_type=%1 +set __ultrainfer_sdk_dir=%2 +set __another_target_dir=%3 +set __install_infos_flag=%4 + +@rem help +if "%__script_action_type%" == "help" ( + call:__print_long_line + echo [1] [help] print help information: ultrainfer_init.bat help + echo [2] [show] show all dlls/libs/include paths: ultrainfer_init.bat show ultrainfer-sdk-dir + echo [3] [init] init all dlls paths for current terminal: ultrainfer_init.bat init ultrainfer-sdk-dir [WARNING: need copy onnxruntime.dll manually] + echo [4] [setup] setup path env for current terminal: ultrainfer_init.bat setup ultrainfer-sdk-dir [WARNING: need copy onnxruntime.dll manually] + echo [5] [install] install all dlls to a specific dir: ultrainfer_init.bat install ultrainfer-sdk-dir another-dir-to-install-dlls **[RECOMMEND]** + echo [6] [install] install all dlls with logging infos: ultrainfer_init.bat install ultrainfer-sdk-dir another-dir-to-install-dlls info + call:__print_long_line + goto:eof +) + +@rem show dlls and libs +if "%__script_action_type%" == "show" ( + + call:__print_long_line + echo [SDK] %__ultrainfer_sdk_dir% + + call:__print_long_line + set __have_openvino_flag=false + set __ultrainfer_lib_dir=%__ultrainfer_sdk_dir%\lib + + @setlocal enabledelayedexpansion + echo [DLL] !__ultrainfer_lib_dir!\ultrainfer.dll **[NEEDED]** + for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e \.dll ^| findstr /v "vc14\\bin\\opencv"') do ( + set __3rd_dll_file=%%a && set __3rd_needed_flag=true + echo !__3rd_dll_file! | findstr "openvino">nul && set __have_openvino_flag=true + echo !__3rd_dll_file! | findstr d\.dll>nul && set __3rd_needed_flag=false + if "!__3rd_needed_flag!"=="false" (echo !__3rd_dll_file! | findstr /v opencv>nul && set __3rd_needed_flag=true) + echo !__3rd_dll_file! | findstr debug\.dll>nul && set __3rd_needed_flag=false + if "!__3rd_needed_flag!"=="true" (echo [DLL] !__3rd_dll_file! **[NEEDED]**) else (echo [DLL] !__3rd_dll_file!) + ) + + call:__print_long_line + echo [Lib] !__ultrainfer_lib_dir!\ultrainfer.lib **[NEEDED][ultrainfer]** + for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e \.lib ^| findstr /v "vc14\\lib\\opencv"') do ( + set __3rd_lib_file=%%a && set __3rd_needed_flag=false && set __api_tag=ultrainfer + echo !__3rd_lib_file! | findstr "opencv">nul && set __3rd_needed_flag=true + echo !__3rd_lib_file! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision + if "!__3rd_needed_flag!"=="true" (echo !__3rd_lib_file! | findstr d\.lib>nul && set __3rd_needed_flag=false) + echo !__3rd_lib_file! | findstr "fast_tokenizer">nul && set __3rd_needed_flag=true + echo !__3rd_lib_file! | findstr "fast_tokenizer">nul && set __api_tag=!__api_tag!::text + if "!__3rd_needed_flag!"=="true" (echo [Lib] !__3rd_lib_file! **[NEEDED][!__api_tag!]**) else (echo [Lib] !__3rd_lib_file!) + ) + + call:__print_long_line + set __ultrainfer_include_dir=%__ultrainfer_sdk_dir%\include + echo [Include] !__ultrainfer_include_dir! **[NEEDED][ultrainfer]** + for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e include ^| findstr /v "vc14\\bin\\opencv"') do ( + set __3rd_include_dir=%%a && set __3rd_needed_flag=false && set __api_tag=ultrainfer + echo !__3rd_include_dir! | findstr "opencv">nul && set __3rd_needed_flag=true + echo !__3rd_include_dir! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision + echo !__3rd_include_dir! | findstr "fast_tokenizer">nul && set __3rd_needed_flag=true + echo !__3rd_include_dir! | findstr "fast_tokenizer">nul && set __api_tag=!__api_tag!::text + if "!__3rd_needed_flag!"=="true" (echo [Include] !__3rd_include_dir! **[NEEDED][!__api_tag!]**) else (echo [Include] !__3rd_include_dir!) + ) + + call:__print_long_line + if "!__have_openvino_flag!"=="true" ( + for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e \.xml ^| findstr "openvino"') do ( + set __openvino_plugin_xml=%%a + echo [XML] !__openvino_plugin_xml! **[NEEDED]** + ) + call:__print_long_line + ) + @setlocal disabledelayedexpansion + goto:eof +) + +@rem init all paths for dlls +if "%__script_action_type%" == "init" ( + @setlocal enabledelayedexpansion + set /p yes_or_no=Init dll paths for UltraInfer in current terminal: [y/n] + if "!yes_or_no!"=="y" (echo YES.) else (echo NO. && pause && goto:eof) + @setlocal disabledelayedexpansion + if exist bin.txt (del /Q bin.txt) + if exist lib.txt (del /Q lib.txt) + for /f "delims= " %%a in ('dir /s /b /A:D %__ultrainfer_sdk_dir% ^| findstr /v include ^| findstr /e bin ^| findstr /v "vc14\\bin"') do (>>bin.txt set /p=%%a;>lib.txt set /p=%%a;nul && set __have_openvino_flag=true + echo !__3rd_or_fd_dll_file! | findstr d\.dll>nul && set __3rd_or_fd_needed_flag=false + if "!__3rd_or_fd_needed_flag!"=="false" ( echo !__3rd_or_fd_dll_file! | findstr /v opencv>nul && set __3rd_or_fd_needed_flag=true) + echo !__3rd_or_fd_dll_file! | findstr debug\.dll>nul && set __3rd_or_fd_needed_flag=false + if "!__3rd_or_fd_needed_flag!"=="true" ( + copy /Y !__3rd_or_fd_dll_file! %__another_target_dir% + if "!__install_infos_flag!"=="info" ( echo [Installed][DLL] !__3rd_or_fd_dll_file! "--->" %__another_target_dir%) + ) + ) + if "!__have_openvino_flag!"=="true" ( + for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir% ^| findstr /e \.xml ^| findstr "openvino"') do ( + set __openvino_plugin_xml=%%a + copy /Y !__openvino_plugin_xml! %__another_target_dir% + if "!__install_infos_flag!"=="info" ( echo [Installed][XML] !__openvino_plugin_xml! "--->" %__another_target_dir% ) + ) + ) + @setlocal disabledelayedexpansion + goto:eof +) +goto:eof + +@rem helpers +:__print_long_line +echo ------------------------------------------------------------------------------------------------------------------------------------------------------------ +goto:eof +@rem end + +@echo on diff --git a/libs/ultrainfer/scripts/ultrainfer_init.sh b/libs/ultrainfer/scripts/ultrainfer_init.sh new file mode 100755 index 0000000000..f004298c72 --- /dev/null +++ b/libs/ultrainfer/scripts/ultrainfer_init.sh @@ -0,0 +1,61 @@ +# source this file to import libraries + +PLATFORM=`uname` +ULTRAINFER_LIBRARY_PATH=${BASH_SOURCE:-$0} +if [[ "$PLATFORM" = "Linux" ]];then + ULTRAINFER_LIBRARY_PATH=`readlink -f ${ULTRAINFER_LIBRARY_PATH}` +fi +ULTRAINFER_LIBRARY_PATH=$(cd `dirname ${ULTRAINFER_LIBRARY_PATH}`; pwd) + +echo "=============== Information ======================" +echo "UltraInfer Library Path: $ULTRAINFER_LIBRARY_PATH" +echo "Platform: $PLATFORM" +echo "==================================================" + +# Find all the .so files' path +if [[ "$(ps -a $$)" =~ "zsh" ]]; then + ALL_SO_FILES=(`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"`) + ALL_DYLIB_FILES=(`find $ULTRAINFER_LIBRARY_PATH -name "*.dylib*"`) +else + ALL_SO_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"` + ALL_DYLIB_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.dylib*"` +fi + +for SO_FILE in $ALL_SO_FILES;do + LIBS_DIRECTORIES+=(${SO_FILE%/*}) +done + +# Find all the .dylib files' path +# ALL_DYLIB_FILES=(`find $ULTRAINFER_LIBRARY_PATH -name "*.dylib*"`) +for DYLIB_FILE in $ALL_DYLIB_FILES;do + LIBS_DIRECTORIES+=(${DYLIB_FILE%/*}) +done + +# Remove the dumplicate directories +LIBS_DIRECTORIES=($(awk -v RS=' ' '!a[$1]++' <<< ${LIBS_DIRECTORIES[@]})) + +# Print the dynamic library location and output the configuration file +IMPORT_PATH="" +output_file=${ULTRAINFER_LIBRARY_PATH}/ultrainfer_libs.conf +rm -rf $output_file +for LIB_DIR in ${LIBS_DIRECTORIES[@]};do + echo "Find Library Directory: $LIB_DIR" + echo "$LIB_DIR" >> $output_file + IMPORT_PATH=${LIB_DIR}":"$IMPORT_PATH +done + +if [ -f "ascend_init.sh" ] +then + source ascend_init.sh +fi + +echo "[Execute] Will try to export all the library directories to environments, if not work, please try to export these path by your self." +PLATFORM=`uname` +if [[ "$PLATFORM" = "Linux" ]];then + NEW_LIB_PATH=$(tr ":" "\n" <<< "${IMPORT_PATH}:$LD_LIBRARY_PATH" | sort | uniq | tr "\n" ":") + export LD_LIBRARY_PATH=$NEW_LIB_PATH +fi +if [[ "$PLATFORM" = "Darwin" ]];then + NEW_LIB_PATH=$(tr ":" "\n" <<< "${IMPORT_PATH}:$DYLD_LIBRARY_PATH" | sort | uniq | tr "\n" ":") + export DYLD_LIBRARY_PATH=$NEW_LIB_PATH +fi diff --git a/libs/ultrainfer/ultrainfer/CMakeLists.txt b/libs/ultrainfer/ultrainfer/CMakeLists.txt new file mode 100755 index 0000000000..e69de29bb2 diff --git a/libs/ultrainfer/ultrainfer/benchmark/benchmark.h b/libs/ultrainfer/ultrainfer/benchmark/benchmark.h new file mode 100755 index 0000000000..e3c556525c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/benchmark/benchmark.h @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/benchmark/option.h" +#include "ultrainfer/benchmark/results.h" +#include "ultrainfer/core/config.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/utils/utils.h" + +#ifdef ENABLE_BENCHMARK +#define __RUNTIME_PROFILE_LOOP_BEGIN(option, base_loop) \ + int __p_loop = (base_loop); \ + const bool __p_enable_profile = option.enable_profile; \ + const bool __p_include_h2d_d2h = option.include_h2d_d2h; \ + const int __p_repeats = option.repeats; \ + const int __p_warmup = option.warmup; \ + if (__p_enable_profile && (!__p_include_h2d_d2h)) { \ + __p_loop = (__p_repeats) + (__p_warmup); \ + FDINFO << option << std::endl; \ + } \ + TimeCounter __p_tc; \ + bool __p_tc_start = false; \ + for (int __p_i = 0; __p_i < __p_loop; ++__p_i) { \ + if (__p_i >= (__p_warmup) && (!__p_tc_start)) { \ + __p_tc.Start(); \ + __p_tc_start = true; \ + } + +#define __RUNTIME_PROFILE_LOOP_END(result) \ + } \ + if ((__p_enable_profile && (!__p_include_h2d_d2h))) { \ + if (__p_tc_start) { \ + __p_tc.End(); \ + double __p_tc_duration = __p_tc.Duration(); \ + result.time_of_runtime = \ + __p_tc_duration / static_cast(__p_repeats); \ + } \ + } + +#define __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(option, base_loop) \ + int __p_loop_h = (base_loop); \ + const bool __p_enable_profile_h = option.enable_profile; \ + const bool __p_include_h2d_d2h_h = option.include_h2d_d2h; \ + const int __p_repeats_h = option.repeats; \ + const int __p_warmup_h = option.warmup; \ + if (__p_enable_profile_h && __p_include_h2d_d2h_h) { \ + __p_loop_h = (__p_repeats_h) + (__p_warmup_h); \ + FDINFO << option << std::endl; \ + } \ + TimeCounter __p_tc_h; \ + bool __p_tc_start_h = false; \ + for (int __p_i_h = 0; __p_i_h < __p_loop_h; ++__p_i_h) { \ + if (__p_i_h >= (__p_warmup_h) && (!__p_tc_start_h)) { \ + __p_tc_h.Start(); \ + __p_tc_start_h = true; \ + } + +#define __RUNTIME_PROFILE_LOOP_H2D_D2H_END(result) \ + } \ + if ((__p_enable_profile_h && __p_include_h2d_d2h_h)) { \ + if (__p_tc_start_h) { \ + __p_tc_h.End(); \ + double __p_tc_duration_h = __p_tc_h.Duration(); \ + result.time_of_runtime = \ + __p_tc_duration_h / static_cast(__p_repeats_h); \ + } \ + } +#else +#define __RUNTIME_PROFILE_LOOP_BEGIN(option, base_loop) \ + for (int __p_i = 0; __p_i < (base_loop); ++__p_i) { +#define __RUNTIME_PROFILE_LOOP_END(result) } +#define __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(option, base_loop) \ + for (int __p_i_h = 0; __p_i_h < (base_loop); ++__p_i_h) { +#define __RUNTIME_PROFILE_LOOP_H2D_D2H_END(result) } +#endif diff --git a/libs/ultrainfer/ultrainfer/benchmark/option.h b/libs/ultrainfer/ultrainfer/benchmark/option.h new file mode 100755 index 0000000000..eb45a1fb48 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/benchmark/option.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +namespace ultrainfer { + +/** \brief All C++ UltraInfer benchmark profile APIs are defined inside this + * namespace + * + */ +namespace benchmark { + +// @brief Option object used to control the behavior of the benchmark profiling. +// +struct BenchmarkOption { + int warmup = 50; ///< Warmup for backend inference. + int repeats = 100; ///< Repeats for backend inference. + bool enable_profile = false; ///< Whether to use profile or not. + bool include_h2d_d2h = false; ///< Whether to include time of H2D_D2H for time + ///< of runtime. // NOLINT + + friend std::ostream &operator<<(std::ostream &output, + const BenchmarkOption &option) { + if (!option.include_h2d_d2h) { + output << "Running profiling for Runtime " + << "without H2D and D2H, "; + } else { + output << "Running profiling for Runtime " + << "with H2D and D2H, "; + } + output << "Repeats: " << option.repeats << ", " + << "Warmup: " << option.warmup; + return output; + } +}; + +} // namespace benchmark +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/benchmark/results.h b/libs/ultrainfer/ultrainfer/benchmark/results.h new file mode 100755 index 0000000000..527eb0a885 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/benchmark/results.h @@ -0,0 +1,28 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +namespace ultrainfer { +namespace benchmark { + +/*! @brief Result object used to record the time of runtime after benchmark + * profiling is done. + */ +struct BenchmarkResult { + ///< Means pure_backend_time+time_of_h2d_d2h(if include_h2d_d2h=true). + double time_of_runtime = 0.0f; +}; + +} // namespace benchmark +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/benchmark/utils.cc b/libs/ultrainfer/ultrainfer/benchmark/utils.cc new file mode 100755 index 0000000000..d3b0896da7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/benchmark/utils.cc @@ -0,0 +1,908 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#ifdef __linux__ +#include +#endif +#include + +#include "ultrainfer/benchmark/utils.h" +#include "ultrainfer/utils/path.h" +#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) +#include "ultrainfer/vision/utils/utils.h" +#endif + +namespace ultrainfer { +namespace benchmark { + +#if defined(ENABLE_BENCHMARK) +std::string Strip(const std::string &str, char ch) { + int i = 0; + while (str[i] == ch) { + i++; + } + int j = str.size() - 1; + while (str[j] == ch) { + j--; + } + return str.substr(i, j + 1 - i); +} + +void Split(const std::string &s, std::vector &tokens, char delim) { + tokens.clear(); + size_t lastPos = s.find_first_not_of(delim, 0); + size_t pos = s.find(delim, lastPos); + while (lastPos != std::string::npos) { + tokens.emplace_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delim, pos); + pos = s.find(delim, lastPos); + } + return; +} + +ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id) + : is_supported_(false), sampling_interval_(sampling_interval_ms), + gpu_id_(gpu_id) { +#ifdef __linux__ + is_supported_ = true; +#else + is_supported_ = false; +#endif + if (!is_supported_) { + FDASSERT(false, "Currently ResourceUsageMonitor only supports Linux.") + return; + } +} + +void ResourceUsageMonitor::Start() { + if (!is_supported_) { + return; + } + if (check_memory_thd_ != nullptr) { + FDINFO << "Memory monitoring has already started!" << std::endl; + return; + } + FDINFO << "Start monitoring memory!" << std::endl; + stop_signal_ = false; + check_memory_thd_.reset(new std::thread(([this]() { + // Note we retrieve the memory usage at the very beginning of the thread. + while (true) { +#ifdef __linux__ + rusage res; + if (getrusage(RUSAGE_SELF, &res) == 0) { + max_cpu_mem_ = + std::max(max_cpu_mem_, static_cast(res.ru_maxrss / 1024.0)); + } +#endif +#if defined(WITH_GPU) + std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_); + // get max_gpu_mem and max_gpu_util + std::vector gpu_tokens; + Split(gpu_mem_info, gpu_tokens, ','); + max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6])); + max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7])); +#endif + if (stop_signal_) { + break; + } + std::this_thread::sleep_for( + std::chrono::milliseconds(sampling_interval_)); + } + }))); +} + +void ResourceUsageMonitor::Stop() { + if (!is_supported_) { + return; + } + if (check_memory_thd_ == nullptr) { + FDINFO << "Memory monitoring hasn't started yet or has stopped!" + << std::endl; + return; + } + FDINFO << "Stop monitoring memory!" << std::endl; + StopInternal(); +} + +void ResourceUsageMonitor::StopInternal() { + stop_signal_ = true; + if (check_memory_thd_ == nullptr) { + return; + } + if (check_memory_thd_ != nullptr) { + check_memory_thd_->join(); + } + check_memory_thd_.reset(nullptr); +} + +std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) { + std::string result = ""; +#if defined(__linux__) && defined(WITH_GPU) + std::string command = "nvidia-smi --id=" + std::to_string(device_id) + + " --query-gpu=index,uuid,name,timestamp,memory.total," + "memory.free,memory.used,utilization.gpu,utilization." + "memory --format=csv,noheader,nounits"; + FILE *pp = popen(command.data(), "r"); + if (!pp) + return ""; + char tmp[1024]; + + while (fgets(tmp, sizeof(tmp), pp) != NULL) { + result += tmp; + } + pclose(pp); +#else + FDASSERT(false, + "Currently collect gpu memory info only supports Linux in GPU.") +#endif + return result; +} +#endif // ENABLE_BENCHMARK + +/// Utils for precision evaluation +#if defined(ENABLE_BENCHMARK) +static const char KEY_VALUE_SEP = '#'; +static const char VALUE_SEP = ','; + +std::vector ReadLines(const std::string &path) { + std::ifstream fin(path); + std::vector lines; + std::string line; + if (fin.is_open()) { + while (getline(fin, line)) { + lines.push_back(line); + } + } else { + FDERROR << "Failed to open file " << path << std::endl; + std::abort(); + } + fin.close(); + return lines; +} + +std::map> +SplitDataLine(const std::string &data_line) { + std::map> dict; + std::vector tokens, value_tokens; + Split(data_line, tokens, KEY_VALUE_SEP); + std::string key = tokens[0]; + std::string value = tokens[1]; + Split(value, value_tokens, VALUE_SEP); + dict[key] = value_tokens; + return dict; +} + +bool ResultManager::SaveFDTensor(const FDTensor &tensor, + const std::string &path) { + if (tensor.CpuData() == nullptr || tensor.Numel() <= 0) { + FDERROR << "Input tensor is empty!" << std::endl; + return false; + } + std::ofstream fs(path, std::ios::out); + if (!fs.is_open()) { + FDERROR << "Fail to open file:" << path << std::endl; + return false; + } + fs.precision(20); + if (tensor.Dtype() != FDDataType::FP32 && + tensor.Dtype() != FDDataType::INT32 && + tensor.Dtype() != FDDataType::INT64) { + FDERROR << "Only support FP32/INT32/INT64 now, but got " + << Str(tensor.dtype) << std::endl; + return false; + } + // name + fs << "name" << KEY_VALUE_SEP << tensor.name << "\n"; + // shape + fs << "shape" << KEY_VALUE_SEP; + for (int i = 0; i < tensor.shape.size(); ++i) { + if (i < tensor.shape.size() - 1) { + fs << tensor.shape[i] << VALUE_SEP; + } else { + fs << tensor.shape[i]; + } + } + fs << "\n"; + // dtype + fs << "dtype" << KEY_VALUE_SEP << Str(tensor.dtype) << "\n"; + // data + fs << "data" << KEY_VALUE_SEP; + const void *data_ptr = tensor.CpuData(); + for (int i = 0; i < tensor.Numel(); ++i) { + if (tensor.Dtype() == FDDataType::INT64) { + if (i < tensor.Numel() - 1) { + fs << (static_cast(data_ptr))[i] << VALUE_SEP; + } else { + fs << (static_cast(data_ptr))[i]; + } + } else if (tensor.Dtype() == FDDataType::INT32) { + if (i < tensor.Numel() - 1) { + fs << (static_cast(data_ptr))[i] << VALUE_SEP; + } else { + fs << (static_cast(data_ptr))[i]; + } + } else { // FP32 + if (i < tensor.Numel() - 1) { + fs << (static_cast(data_ptr))[i] << VALUE_SEP; + } else { + fs << (static_cast(data_ptr))[i]; + } + } + } + fs << "\n"; + fs.close(); + return true; +} + +bool ResultManager::LoadFDTensor(FDTensor *tensor, const std::string &path) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from " << path << std::endl; + return false; + } + auto lines = ReadLines(path); + std::map> data; + // name + data = SplitDataLine(lines[0]); + tensor->name = data.begin()->first; + // shape + data = SplitDataLine(lines[1]); + tensor->shape.clear(); + for (const auto &s : data.begin()->second) { + tensor->shape.push_back(std::stol(s)); + } + // dtype + data = SplitDataLine(lines[2]); + if (data.begin()->second.at(0) == Str(FDDataType::INT64)) { + tensor->dtype = FDDataType::INT64; + } else if (data.begin()->second.at(0) == Str(FDDataType::INT32)) { + tensor->dtype = FDDataType::INT32; + } else if (data.begin()->second.at(0) == Str(FDDataType::FP32)) { + tensor->dtype = FDDataType::FP32; + } else { + FDERROR << "Only support FP32/INT64/INT32 now, but got " + << data.begin()->second.at(0) << std::endl; + return false; + } + // data + data = SplitDataLine(lines[3]); + tensor->Allocate(tensor->shape, tensor->dtype, tensor->name); + if (tensor->dtype == FDDataType::INT64) { + int64_t *mutable_data_ptr = static_cast(tensor->MutableData()); + for (int i = 0; i < data.begin()->second.size(); ++i) { + mutable_data_ptr[i] = std::stol(data.begin()->second[i]); + } + } else if (tensor->dtype == FDDataType::INT32) { + int32_t *mutable_data_ptr = static_cast(tensor->MutableData()); + for (int i = 0; i < data.begin()->second.size(); ++i) { + mutable_data_ptr[i] = std::stoi(data.begin()->second[i]); + } + } else { // FP32 + float *mutable_data_ptr = static_cast(tensor->MutableData()); + for (int i = 0; i < data.begin()->second.size(); ++i) { + mutable_data_ptr[i] = std::stof(data.begin()->second[i]); + } + } + return true; +} + +TensorDiff ResultManager::CalculateDiffStatis(const FDTensor &lhs, + const FDTensor &rhs) { + if (lhs.Numel() != rhs.Numel() || lhs.Dtype() != rhs.Dtype()) { + FDASSERT(false, + "The size and dtype of input FDTensor must be equal!" + " But got size %d, %d, dtype %s, %s", + lhs.Numel(), rhs.Numel(), Str(lhs.Dtype()).c_str(), + Str(rhs.Dtype()).c_str()) + } + FDDataType dtype = lhs.Dtype(); + int numel = lhs.Numel(); + if (dtype != FDDataType::FP32 && dtype != FDDataType::INT64 && + dtype != FDDataType::INT32) { + FDASSERT(false, "Only support FP32/INT64/INT32 now, but got %s", + Str(dtype).c_str()) + } + if (dtype == FDDataType::INT64) { + std::vector tensor_diff(numel); + const int64_t *lhs_data_ptr = static_cast(lhs.CpuData()); + const int64_t *rhs_data_ptr = static_cast(rhs.CpuData()); + for (int i = 0; i < numel; ++i) { + tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i]; + } + TensorDiff diff; + CalculateStatisInfo(tensor_diff.data(), numel, &(diff.data.mean), + &(diff.data.max), &(diff.data.min)); + return diff; + } else if (dtype == FDDataType::INT32) { + std::vector tensor_diff(numel); + const int32_t *lhs_data_ptr = static_cast(lhs.CpuData()); + const int32_t *rhs_data_ptr = static_cast(rhs.CpuData()); + for (int i = 0; i < numel; ++i) { + tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i]; + } + TensorDiff diff; + CalculateStatisInfo(tensor_diff.data(), numel, &(diff.data.mean), + &(diff.data.max), &(diff.data.min)); + return diff; + } else { // FP32 + std::vector tensor_diff(numel); + const float *lhs_data_ptr = static_cast(lhs.CpuData()); + const float *rhs_data_ptr = static_cast(rhs.CpuData()); + for (int i = 0; i < numel; ++i) { + tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i]; + } + TensorDiff diff; + CalculateStatisInfo(tensor_diff.data(), numel, &(diff.data.mean), + &(diff.data.max), &(diff.data.min)); + return diff; + } +} + +void ResultManager::SaveBenchmarkResult(const std::string &res, + const std::string &path) { + if (path.empty()) { + FDERROR << "Benchmark data path can not be empty!" << std::endl; + return; + } + auto openmode = std::ios::app; + std::ofstream fs(path, openmode); + if (!fs.is_open()) { + FDERROR << "Fail to open result file: " << path << std::endl; + } + fs << res; + fs.close(); +} + +bool ResultManager::LoadBenchmarkConfig( + const std::string &path, + std::unordered_map *config_info) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from " << path << std::endl; + return false; + } + auto lines = ReadLines(path); + for (auto line : lines) { + std::vector tokens; + Split(line, tokens, ':'); + (*config_info)[tokens[0]] = Strip(tokens[1], ' '); + } + return true; +} + +std::vector> +ResultManager::GetInputShapes(const std::string &raw_shapes) { + std::vector> shapes; + std::vector shape_tokens; + Split(raw_shapes, shape_tokens, ':'); + for (auto str_shape : shape_tokens) { + std::vector shape; + std::string tmp_str = str_shape; + while (!tmp_str.empty()) { + int dim = atoi(tmp_str.data()); + shape.push_back(dim); + size_t next_offset = tmp_str.find(","); + if (next_offset == std::string::npos) { + break; + } else { + tmp_str = tmp_str.substr(next_offset + 1); + } + } + shapes.push_back(shape); + } + return shapes; +} + +std::vector +ResultManager::GetInputNames(const std::string &raw_names) { + std::vector names_tokens; + Split(raw_names, names_tokens, ':'); + return names_tokens; +} + +std::vector ResultManager::SplitStr(const std::string &raw_str, + char delim) { + std::vector str_tokens; + Split(raw_str, str_tokens, delim); + return str_tokens; +} + +std::vector +ResultManager::GetInputDtypes(const std::string &raw_dtypes) { + std::vector dtypes; + std::vector dtypes_tokens; + Split(raw_dtypes, dtypes_tokens, ':'); + for (auto dtype : dtypes_tokens) { + if (dtype == "FP32") { + dtypes.push_back(FDDataType::FP32); + } else if (dtype == "INT32") { + dtypes.push_back(FDDataType::INT32); + } else if (dtype == "INT64") { + dtypes.push_back(FDDataType::INT64); + } else if (dtype == "INT8") { + dtypes.push_back(FDDataType::INT8); + } else if (dtype == "UINT8") { + dtypes.push_back(FDDataType::UINT8); + } else if (dtype == "FP16") { + dtypes.push_back(FDDataType::FP16); + } else if (dtype == "FP64") { + dtypes.push_back(FDDataType::FP64); + } else { + dtypes.push_back(FDDataType::FP32); // default + } + } + return dtypes; +} + +#if defined(ENABLE_VISION) +bool ResultManager::SaveDetectionResult(const vision::DetectionResult &res, + const std::string &path) { + if (res.boxes.empty()) { + FDERROR << "DetectionResult can not be empty!" << std::endl; + return false; + } + std::ofstream fs(path, std::ios::out); + if (!fs.is_open()) { + FDERROR << "Fail to open file:" << path << std::endl; + return false; + } + fs.precision(20); + // boxes + fs << "boxes" << KEY_VALUE_SEP; + for (int i = 0; i < res.boxes.size(); ++i) { + for (int j = 0; j < 4; ++j) { + if ((i == res.boxes.size() - 1) && (j == 3)) { + fs << res.boxes[i][j]; + } else { + fs << res.boxes[i][j] << VALUE_SEP; + } + } + } + fs << "\n"; + // scores + fs << "scores" << KEY_VALUE_SEP; + for (int i = 0; i < res.scores.size(); ++i) { + if (i < res.scores.size() - 1) { + fs << res.scores[i] << VALUE_SEP; + } else { + fs << res.scores[i]; + } + } + fs << "\n"; + // label_ids + fs << "label_ids" << KEY_VALUE_SEP; + for (int i = 0; i < res.label_ids.size(); ++i) { + if (i < res.label_ids.size() - 1) { + fs << res.label_ids[i] << VALUE_SEP; + } else { + fs << res.label_ids[i]; + } + } + fs << "\n"; + // TODO(qiuyanjun): dump masks + fs.close(); + return true; +} + +bool ResultManager::SaveClassifyResult(const vision::ClassifyResult &res, + const std::string &path) { + if (res.label_ids.empty()) { + FDERROR << "ClassifyResult can not be empty!" << std::endl; + return false; + } + std::ofstream fs(path, std::ios::out); + if (!fs.is_open()) { + FDERROR << "Fail to open file:" << path << std::endl; + return false; + } + fs.precision(20); + // label_ids + fs << "label_ids" << KEY_VALUE_SEP; + for (int i = 0; i < res.label_ids.size(); ++i) { + if (i < res.label_ids.size() - 1) { + fs << res.label_ids[i] << VALUE_SEP; + } else { + fs << res.label_ids[i]; + } + } + fs << "\n"; + // scores + fs << "scores" << KEY_VALUE_SEP; + for (int i = 0; i < res.scores.size(); ++i) { + if (i < res.scores.size() - 1) { + fs << res.scores[i] << VALUE_SEP; + } else { + fs << res.scores[i]; + } + } + fs << "\n"; + fs.close(); + return true; +} + +bool ResultManager::SaveSegmentationResult( + const vision::SegmentationResult &res, const std::string &path) { + if (res.label_map.empty()) { + FDERROR << "SegmentationResult can not be empty!" << std::endl; + return false; + } + std::ofstream fs(path, std::ios::out); + if (!fs.is_open()) { + FDERROR << "Fail to open file:" << path << std::endl; + return false; + } + fs.precision(20); + // label_map + fs << "label_map" << KEY_VALUE_SEP; + for (int i = 0; i < res.label_map.size(); ++i) { + if (i < res.label_map.size() - 1) { + fs << static_cast(res.label_map[i]) << VALUE_SEP; + } else { + fs << static_cast(res.label_map[i]); + } + } + fs << "\n"; + // score_map + if (res.contain_score_map) { + fs << "score_map" << KEY_VALUE_SEP; + for (int i = 0; i < res.score_map.size(); ++i) { + if (i < res.score_map.size() - 1) { + fs << res.score_map[i] << VALUE_SEP; + } else { + fs << res.score_map[i]; + } + } + fs << "\n"; + } + fs.close(); + return true; +} + +bool ResultManager::SaveOCRDetResult(const std::vector> &res, + const std::string &path) { + if (res.empty()) { + FDERROR << "OCRDetResult can not be empty!" << std::endl; + return false; + } + std::ofstream fs(path, std::ios::out); + if (!fs.is_open()) { + FDERROR << "Fail to open file:" << path << std::endl; + return false; + } + fs.precision(20); + // boxes + fs << "boxes" << KEY_VALUE_SEP; + for (int i = 0; i < res.size(); ++i) { + for (int j = 0; j < 8; ++j) { + if ((i == res.size() - 1) && (j == 7)) { + fs << res[i][j]; + } else { + fs << res[i][j] << VALUE_SEP; + } + } + } + fs << "\n"; + fs.close(); + return true; +} + +bool ResultManager::SaveMattingResult(const vision::MattingResult &res, + const std::string &path) { + if (res.alpha.empty()) { + FDERROR << "MattingResult can not be empty!" << std::endl; + return false; + } + std::ofstream fs(path, std::ios::out); + if (!fs.is_open()) { + FDERROR << "Fail to open file:" << path << std::endl; + return false; + } + fs.precision(20); + // alpha + fs << "alpha" << KEY_VALUE_SEP; + for (int i = 0; i < res.alpha.size(); ++i) { + if (i < res.alpha.size() - 1) { + fs << res.alpha[i] << VALUE_SEP; + } else { + fs << res.alpha[i]; + } + } + fs << "\n"; + // foreground + if (res.contain_foreground) { + fs << "foreground" << KEY_VALUE_SEP; + for (int i = 0; i < res.foreground.size(); ++i) { + if (i < res.foreground.size() - 1) { + fs << res.foreground[i] << VALUE_SEP; + } else { + fs << res.foreground[i]; + } + } + fs << "\n"; + } + fs.close(); + return true; +} + +bool ResultManager::LoadDetectionResult(vision::DetectionResult *res, + const std::string &path) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from " << path << std::endl; + return false; + } + auto lines = ReadLines(path); + std::map> data; + + // boxes + data = SplitDataLine(lines[0]); + int boxes_num = data.begin()->second.size() / 4; + res->Resize(boxes_num); + for (int i = 0; i < boxes_num; ++i) { + res->boxes[i][0] = std::stof(data.begin()->second[i * 4 + 0]); + res->boxes[i][1] = std::stof(data.begin()->second[i * 4 + 1]); + res->boxes[i][2] = std::stof(data.begin()->second[i * 4 + 2]); + res->boxes[i][3] = std::stof(data.begin()->second[i * 4 + 3]); + } + // scores + data = SplitDataLine(lines[1]); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->scores[i] = std::stof(data.begin()->second[i]); + } + // label_ids + data = SplitDataLine(lines[2]); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->label_ids[i] = std::stoi(data.begin()->second[i]); + } + // TODO(qiuyanjun): load masks + return true; +} + +bool ResultManager::LoadClassifyResult(vision::ClassifyResult *res, + const std::string &path) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from " << path << std::endl; + return false; + } + auto lines = ReadLines(path); + std::map> data; + // label_ids + data = SplitDataLine(lines[0]); + res->Resize(data.begin()->second.size()); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->label_ids[i] = std::stoi(data.begin()->second[i]); + } + // scores + data = SplitDataLine(lines[1]); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->scores[i] = std::stof(data.begin()->second[i]); + } + return true; +} + +bool ResultManager::LoadSegmentationResult(vision::SegmentationResult *res, + const std::string &path) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from " << path << std::endl; + return false; + } + auto lines = ReadLines(path); + if (lines.size() > 1) { + res->contain_score_map = true; + } + std::map> data; + // label_map + data = SplitDataLine(lines[0]); + res->Resize(data.begin()->second.size()); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->label_map[i] = std::stoi(data.begin()->second[i]); + } + // score_map + if (lines.size() > 1) { + data = SplitDataLine(lines[1]); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->score_map[i] = std::stof(data.begin()->second[i]); + } + } + return true; +} + +bool ResultManager::LoadOCRDetResult(std::vector> *res, + const std::string &path) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from " << path << std::endl; + return false; + } + auto lines = ReadLines(path); + std::map> data; + // boxes + data = SplitDataLine(lines[0]); + int boxes_num = data.begin()->second.size() / 8; + res->resize(boxes_num); + for (int i = 0; i < boxes_num; ++i) { + for (int j = 0; j < 8; ++j) { + (*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]); + } + } + return true; +} + +bool ResultManager::LoadMattingResult(vision::MattingResult *res, + const std::string &path) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from " << path << std::endl; + return false; + } + auto lines = ReadLines(path); + if (lines.size() > 1) { + res->contain_foreground = true; + } + std::map> data; + // alpha + data = SplitDataLine(lines[0]); + res->Resize(data.begin()->second.size()); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->alpha[i] = std::stof(data.begin()->second[i]); + } + // foreground + if (lines.size() > 1) { + data = SplitDataLine(lines[1]); + for (int i = 0; i < data.begin()->second.size(); ++i) { + res->foreground[i] = std::stof(data.begin()->second[i]); + } + } + return true; +} + +DetectionDiff +ResultManager::CalculateDiffStatis(const vision::DetectionResult &lhs, + const vision::DetectionResult &rhs, + const float &score_threshold) { + vision::DetectionResult lhs_sort = lhs; + vision::DetectionResult rhs_sort = rhs; + // lex sort by x(w) & y(h) + vision::utils::LexSortDetectionResultByXY(&lhs_sort); + vision::utils::LexSortDetectionResultByXY(&rhs_sort); + // get value diff & trunc it by score_threshold + const int boxes_num = std::min(lhs_sort.boxes.size(), rhs_sort.boxes.size()); + std::vector boxes_diff; + std::vector scores_diff; + std::vector labels_diff; + // TODO(qiuyanjun): process the diff of masks. + for (int i = 0; i < boxes_num; ++i) { + if (lhs_sort.scores[i] > score_threshold && + rhs_sort.scores[i] > score_threshold) { + scores_diff.push_back(lhs_sort.scores[i] - rhs_sort.scores[i]); + labels_diff.push_back(lhs_sort.label_ids[i] - rhs_sort.label_ids[i]); + boxes_diff.push_back(lhs_sort.boxes[i][0] - rhs_sort.boxes[i][0]); + boxes_diff.push_back(lhs_sort.boxes[i][1] - rhs_sort.boxes[i][1]); + boxes_diff.push_back(lhs_sort.boxes[i][2] - rhs_sort.boxes[i][2]); + boxes_diff.push_back(lhs_sort.boxes[i][3] - rhs_sort.boxes[i][3]); + } + } + FDASSERT(boxes_diff.size() > 0, + "Can't get any valid boxes while score_threshold is %f, " + "The boxes.size of lhs is %d, the boxes.size of rhs is %d", + score_threshold, lhs_sort.boxes.size(), rhs_sort.boxes.size()) + + DetectionDiff diff; + CalculateStatisInfo(boxes_diff.data(), boxes_diff.size(), + &(diff.boxes.mean), &(diff.boxes.max), + &(diff.boxes.min)); + CalculateStatisInfo(scores_diff.data(), scores_diff.size(), + &(diff.scores.mean), &(diff.scores.max), + &(diff.scores.min)); + CalculateStatisInfo(labels_diff.data(), labels_diff.size(), + &(diff.labels.mean), &(diff.labels.max), + &(diff.labels.min)); + return diff; +} + +ClassifyDiff +ResultManager::CalculateDiffStatis(const vision::ClassifyResult &lhs, + const vision::ClassifyResult &rhs) { + const int class_nums = std::min(lhs.label_ids.size(), rhs.label_ids.size()); + std::vector scores_diff; + std::vector labels_diff; + for (int i = 0; i < class_nums; ++i) { + scores_diff.push_back(lhs.scores[i] - rhs.scores[i]); + labels_diff.push_back(lhs.label_ids[i] - rhs.label_ids[i]); + } + + ClassifyDiff diff; + CalculateStatisInfo(scores_diff.data(), scores_diff.size(), + &(diff.scores.mean), &(diff.scores.max), + &(diff.scores.min)); + CalculateStatisInfo(labels_diff.data(), labels_diff.size(), + &(diff.labels.mean), &(diff.labels.max), + &(diff.labels.min)); + return diff; +} + +SegmentationDiff +ResultManager::CalculateDiffStatis(const vision::SegmentationResult &lhs, + const vision::SegmentationResult &rhs) { + const int pixel_nums = std::min(lhs.label_map.size(), rhs.label_map.size()); + std::vector labels_diff; + std::vector scores_diff; + for (int i = 0; i < pixel_nums; ++i) { + labels_diff.push_back(lhs.label_map[i] - rhs.label_map[i]); + if (lhs.contain_score_map && rhs.contain_score_map) { + scores_diff.push_back(lhs.score_map[i] - rhs.score_map[i]); + } + } + SegmentationDiff diff; + CalculateStatisInfo(labels_diff.data(), labels_diff.size(), + &(diff.labels.mean), &(diff.labels.max), + &(diff.labels.min)); + if (lhs.contain_score_map && rhs.contain_score_map) { + CalculateStatisInfo(scores_diff.data(), scores_diff.size(), + &(diff.scores.mean), &(diff.scores.max), + &(diff.scores.min)); + } + return diff; +} + +OCRDetDiff +ResultManager::CalculateDiffStatis(const std::vector> &lhs, + const std::vector> &rhs) { + const int boxes_nums = std::min(lhs.size(), rhs.size()); + std::vector> lhs_sort = lhs; + std::vector> rhs_sort = rhs; + // lex sort by x(w) & y(h) + vision::utils::LexSortOCRDetResultByXY(&lhs_sort); + vision::utils::LexSortOCRDetResultByXY(&rhs_sort); + // get value diff + const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size()); + std::vector boxes_diff; + for (int i = 0; i < boxes_num; ++i) { + for (int j = 0; j < 8; ++j) { + boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]); + } + } + + OCRDetDiff diff; + CalculateStatisInfo(boxes_diff.data(), boxes_diff.size(), + &(diff.boxes.mean), &(diff.boxes.max), + &(diff.boxes.min)); + return diff; +} + +MattingDiff +ResultManager::CalculateDiffStatis(const vision::MattingResult &lhs, + const vision::MattingResult &rhs) { + const int pixel_nums = std::min(lhs.alpha.size(), rhs.alpha.size()); + std::vector alpha_diff; + std::vector foreground_diff; + for (int i = 0; i < pixel_nums; ++i) { + alpha_diff.push_back(lhs.alpha[i] - rhs.alpha[i]); + if (lhs.contain_foreground && rhs.contain_foreground) { + foreground_diff.push_back(lhs.foreground[i] - rhs.foreground[i]); + } + } + MattingDiff diff; + CalculateStatisInfo(alpha_diff.data(), alpha_diff.size(), + &(diff.alpha.mean), &(diff.alpha.max), + &(diff.alpha.min)); + if (lhs.contain_foreground && rhs.contain_foreground) { + CalculateStatisInfo(foreground_diff.data(), foreground_diff.size(), + &(diff.foreground.mean), &(diff.foreground.max), + &(diff.foreground.min)); + } + return diff; +} + +#endif // ENABLE_VISION +#endif // ENABLE_BENCHMARK + +} // namespace benchmark +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/benchmark/utils.h b/libs/ultrainfer/ultrainfer/benchmark/utils.h new file mode 100755 index 0000000000..db8c0d1ec2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/benchmark/utils.h @@ -0,0 +1,204 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include +#include // NOLINT +#include +#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) +#include "ultrainfer/vision/common/result.h" +#endif + +namespace ultrainfer { +namespace benchmark { + +#if defined(ENABLE_BENCHMARK) +/*! @brief ResourceUsageMonitor object used when to collect memory info. + */ +class ULTRAINFER_DECL ResourceUsageMonitor { +public: + /** \brief Set sampling_interval_ms and gpu_id for ResourceUsageMonitor. + * + * \param[in] sampling_interval_ms How often to collect memory info(ms). + * \param[in] gpu_id Device(gpu) id, default 0. + */ + explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0); + + ~ResourceUsageMonitor() { StopInternal(); } + + /// Start memory info collect + void Start(); + /// Stop memory info collect + void Stop(); + /// Get maximum cpu memory usage + float GetMaxCpuMem() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_cpu_mem_; + } + /// Get maximum gpu memory usage + float GetMaxGpuMem() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_gpu_mem_; + } + /// Get maximum gpu util + float GetMaxGpuUtil() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_gpu_util_; + } + + ResourceUsageMonitor(ResourceUsageMonitor &) = delete; + ResourceUsageMonitor &operator=(const ResourceUsageMonitor &) = delete; + ResourceUsageMonitor(ResourceUsageMonitor &&) = delete; + ResourceUsageMonitor &operator=(const ResourceUsageMonitor &&) = delete; + +private: + void StopInternal(); + // Get current gpu memory info + std::string GetCurrentGpuMemoryInfo(int device_id); + + bool is_supported_ = false; + bool stop_signal_ = false; + const int sampling_interval_; + float max_cpu_mem_ = 0.0f; // MB + float max_gpu_mem_ = 0.0f; // MB + float max_gpu_util_ = 0.0f; + const int gpu_id_ = 0; + std::unique_ptr check_memory_thd_ = nullptr; +}; + +// Remove the ch characters at both ends of str +ULTRAINFER_DECL std::string Strip(const std::string &str, char ch = ' '); + +// Split string +ULTRAINFER_DECL void Split(const std::string &s, + std::vector &tokens, char delim = ' '); + +/// Diff values for precision evaluation +struct ULTRAINFER_DECL BaseDiff {}; + +struct ULTRAINFER_DECL EvalStatis { + double mean = -1.0; + double min = -1.0; + double max = -1.0; +}; + +struct ULTRAINFER_DECL TensorDiff : public BaseDiff { + EvalStatis data; +}; + +#if defined(ENABLE_VISION) +struct ULTRAINFER_DECL DetectionDiff : public BaseDiff { + EvalStatis boxes; + EvalStatis scores; + EvalStatis labels; +}; + +struct ULTRAINFER_DECL ClassifyDiff : public BaseDiff { + EvalStatis scores; + EvalStatis labels; +}; + +struct ULTRAINFER_DECL SegmentationDiff : public BaseDiff { + EvalStatis scores; + EvalStatis labels; +}; + +struct ULTRAINFER_DECL OCRDetDiff : public BaseDiff { + EvalStatis boxes; +}; + +struct ULTRAINFER_DECL MattingDiff : public BaseDiff { + EvalStatis alpha; + EvalStatis foreground; +}; + +#endif // ENABLE_VISION +#endif // ENABLE_BENCHMARK + +/// Utils for precision evaluation +struct ULTRAINFER_DECL ResultManager { +#if defined(ENABLE_BENCHMARK) + /// Save & Load functions for FDTensor result. + static bool SaveFDTensor(const FDTensor &tensor, const std::string &path); + static bool LoadFDTensor(FDTensor *tensor, const std::string &path); + /// Calculate diff value between two FDTensor results. + static TensorDiff CalculateDiffStatis(const FDTensor &lhs, + const FDTensor &rhs); + /// Save Benchmark data + static void SaveBenchmarkResult(const std::string &res, + const std::string &path); + /// Load Benchmark config + static bool LoadBenchmarkConfig( + const std::string &path, + std::unordered_map *config_info); + /// Get Input Shapes + static std::vector> + GetInputShapes(const std::string &raw_shapes); + /// Get Input Names + static std::vector GetInputNames(const std::string &raw_names); + /// Get Input Dtypes + static std::vector GetInputDtypes(const std::string &raw_dtypes); + /// Split string + static std::vector SplitStr(const std::string &raw_str, + char delim = ':'); +#if defined(ENABLE_VISION) + /// Save & Load functions for basic results. + static bool SaveDetectionResult(const vision::DetectionResult &res, + const std::string &path); + static bool LoadDetectionResult(vision::DetectionResult *res, + const std::string &path); + static bool SaveClassifyResult(const vision::ClassifyResult &res, + const std::string &path); + static bool LoadClassifyResult(vision::ClassifyResult *res, + const std::string &path); + static bool SaveSegmentationResult(const vision::SegmentationResult &res, + const std::string &path); + static bool LoadSegmentationResult(vision::SegmentationResult *res, + const std::string &path); + static bool SaveOCRDetResult(const std::vector> &res, + const std::string &path); + static bool LoadOCRDetResult(std::vector> *res, + const std::string &path); + static bool SaveMattingResult(const vision::MattingResult &res, + const std::string &path); + static bool LoadMattingResult(vision::MattingResult *res, + const std::string &path); + /// Calculate diff value between two basic results. + static DetectionDiff CalculateDiffStatis(const vision::DetectionResult &lhs, + const vision::DetectionResult &rhs, + const float &score_threshold = 0.3f); + static ClassifyDiff CalculateDiffStatis(const vision::ClassifyResult &lhs, + const vision::ClassifyResult &rhs); + static SegmentationDiff + CalculateDiffStatis(const vision::SegmentationResult &lhs, + const vision::SegmentationResult &rhs); + static OCRDetDiff + CalculateDiffStatis(const std::vector> &lhs, + const std::vector> &rhs); + static MattingDiff CalculateDiffStatis(const vision::MattingResult &lhs, + const vision::MattingResult &rhs); +#endif // ENABLE_VISION +#endif // ENABLE_BENCHMARK +}; + +} // namespace benchmark +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/allocate.cc b/libs/ultrainfer/ultrainfer/core/allocate.cc new file mode 100755 index 0000000000..8d1a9f680f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/allocate.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifdef WITH_GPU +#include +#endif + +#include "ultrainfer/core/allocate.h" + +namespace ultrainfer { + +bool FDHostAllocator::operator()(void **ptr, size_t size) const { + *ptr = malloc(size); + return *ptr != nullptr; +} + +void FDHostFree::operator()(void *ptr) const { free(ptr); } + +#ifdef WITH_GPU + +bool FDDeviceAllocator::operator()(void **ptr, size_t size) const { + return cudaMalloc(ptr, size) == cudaSuccess; +} + +void FDDeviceFree::operator()(void *ptr) const { cudaFree(ptr); } + +bool FDDeviceHostAllocator::operator()(void **ptr, size_t size) const { + return cudaMallocHost(ptr, size) == cudaSuccess; +} + +void FDDeviceHostFree::operator()(void *ptr) const { cudaFreeHost(ptr); } + +#endif + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/allocate.h b/libs/ultrainfer/ultrainfer/core/allocate.h new file mode 100755 index 0000000000..6c2650242e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/allocate.h @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +class ULTRAINFER_DECL FDHostAllocator { +public: + bool operator()(void **ptr, size_t size) const; +}; + +class ULTRAINFER_DECL FDHostFree { +public: + void operator()(void *ptr) const; +}; + +#ifdef WITH_GPU + +class ULTRAINFER_DECL FDDeviceAllocator { +public: + bool operator()(void **ptr, size_t size) const; +}; + +class ULTRAINFER_DECL FDDeviceFree { +public: + void operator()(void *ptr) const; +}; + +class ULTRAINFER_DECL FDDeviceHostAllocator { +public: + bool operator()(void **ptr, size_t size) const; +}; + +class ULTRAINFER_DECL FDDeviceHostFree { +public: + void operator()(void *ptr) const; +}; + +#endif + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/config.h.in b/libs/ultrainfer/ultrainfer/core/config.h.in new file mode 100755 index 0000000000..f51e321cb1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/config.h.in @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef ULTRAINFER_LIB +#cmakedefine ULTRAINFER_LIB +#endif + +#ifndef LIBRARY_NAME +#cmakedefine LIBRARY_NAME @LIBRARY_NAME@ +#endif + +#ifndef PY_LIBRARY_NAME +#cmakedefine PY_LIBRARY_NAME @PY_LIBRARY_NAME@ +#endif + +#ifndef ENABLE_PADDLE2ONNX +#cmakedefine ENABLE_PADDLE2ONNX +#endif + +#ifndef ENABLE_ORT_BACKEND +#cmakedefine ENABLE_ORT_BACKEND +#endif + +#ifndef ENABLE_PADDLE_BACKEND +#cmakedefine ENABLE_PADDLE_BACKEND +#endif + +#ifndef ENABLE_POROS_BACKEND +#cmakedefine ENABLE_POROS_BACKEND +#endif + +#ifndef ENABLE_OPENVINO_BACKEND +#cmakedefine ENABLE_OPENVINO_BACKEND +#endif + +#ifndef WITH_GPU +#cmakedefine WITH_GPU +#endif + +#ifndef WITH_KUNLUNXIN +#cmakedefine WITH_KUNLUNXIN +#endif + +#ifndef WITH_DIRECTML +#cmakedefine WITH_DIRECTML +#endif + +#ifndef ENABLE_TRT_BACKEND +#cmakedefine ENABLE_TRT_BACKEND +#endif + +#ifndef ENABLE_VISION +#cmakedefine ENABLE_VISION +#endif + +#ifndef ENABLE_FLYCV +#cmakedefine ENABLE_FLYCV +#endif + +#ifndef ENABLE_TEXT +#cmakedefine ENABLE_TEXT +#endif + +#ifndef ENABLE_BENCHMARK +#cmakedefine ENABLE_BENCHMARK +#endif + +#ifndef ENABLE_HORIZON_BACKEND +#cmakedefine ENABLE_HORIZON_BACKEND +#endif + +#ifndef ENABLE_TVM_BACKEND +#cmakedefine ENABLE_TVM_BACKEND +#endif diff --git a/libs/ultrainfer/ultrainfer/core/fd_scalar.h b/libs/ultrainfer/ultrainfer/core/fd_scalar.h new file mode 100755 index 0000000000..bd152e3907 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/fd_scalar.h @@ -0,0 +1,121 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +#include "ultrainfer/core/fd_type.h" +#include "ultrainfer/core/float16.h" + +namespace ultrainfer { + +class Scalar { +public: + // Constructor support implicit + Scalar() : Scalar(0) {} + Scalar(double val) : dtype_(FDDataType::FP64) { // NOLINT + data_.f64 = val; + } + + Scalar(float val) : dtype_(FDDataType::FP32) { // NOLINT + data_.f32 = val; + } + + Scalar(float16 val) : dtype_(FDDataType::FP16) { // NOLINT + data_.f16 = val; + } + + Scalar(int64_t val) : dtype_(FDDataType::INT64) { // NOLINT + data_.i64 = val; + } + + Scalar(int32_t val) : dtype_(FDDataType::INT32) { // NOLINT + data_.i32 = val; + } + + Scalar(int16_t val) : dtype_(FDDataType::INT16) { // NOLINT + data_.i16 = val; + } + + Scalar(int8_t val) : dtype_(FDDataType::INT8) { // NOLINT + data_.i8 = val; + } + + Scalar(uint8_t val) : dtype_(FDDataType::UINT8) { // NOLINT + data_.ui8 = val; + } + + Scalar(bool val) : dtype_(FDDataType::BOOL) { // NOLINT + data_.b = val; + } + + // The compatible method for fliud operators, + // and it will be removed in the future. + explicit Scalar(const std::string &str_value) : dtype_(FDDataType::FP64) { + if (str_value == "inf") { + data_.f64 = std::numeric_limits::infinity(); + } else if (str_value == "-inf") { + data_.f64 = -std::numeric_limits::infinity(); + } else if (str_value == "nan") { + data_.f64 = std::numeric_limits::quiet_NaN(); + } else { + data_.f64 = std::stod(str_value); + } + } + + template inline RT to() const { + switch (dtype_) { + case FDDataType::FP32: + return static_cast(data_.f32); + case FDDataType::FP64: + return static_cast(data_.f64); + case FDDataType::FP16: + return static_cast(data_.f16); + case FDDataType::INT32: + return static_cast(data_.i32); + case FDDataType::INT64: + return static_cast(data_.i64); + case FDDataType::INT16: + return static_cast(data_.i16); + case FDDataType::INT8: + return static_cast(data_.i8); + case FDDataType::UINT8: + return static_cast(data_.ui8); + case FDDataType::BOOL: + return static_cast(data_.b); + default: + FDASSERT(false, "Invalid enum scalar data type `%s`.", + Str(dtype_).c_str()); + } + } + + FDDataType dtype() const { return dtype_; } + +private: + FDDataType dtype_; + union data { + bool b; + int8_t i8; + int16_t i16; + int32_t i32; + int64_t i64; + uint8_t ui8; + float16 f16; + float f32; + double f64; + } data_; +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/fd_tensor.cc b/libs/ultrainfer/ultrainfer/core/fd_tensor.cc new file mode 100755 index 0000000000..de4f368a4d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/fd_tensor.cc @@ -0,0 +1,447 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/core/fd_tensor.h" + +#include +#include + +#include "ultrainfer/core/float16.h" +#include "ultrainfer/utils/utils.h" +#ifdef WITH_GPU +#include +#endif + +namespace ultrainfer { + +void *FDTensor::MutableData() { + if (external_data_ptr != nullptr) { + return external_data_ptr; + } + return buffer_; +} + +void *FDTensor::Data() { + if (external_data_ptr != nullptr) { + return external_data_ptr; + } + return buffer_; +} + +const void *FDTensor::Data() const { + if (external_data_ptr != nullptr) { + return external_data_ptr; + } + return buffer_; +} + +void FDTensor::StopSharing() { + if (IsShared()) { + ReallocFn(Nbytes()); + CopyBuffer(buffer_, external_data_ptr, Nbytes()); + external_data_ptr = nullptr; + } +} + +const void *FDTensor::CpuData() const { + if (device == Device::GPU) { +#ifdef WITH_GPU + auto *cpu_ptr = const_cast *>(&temporary_cpu_buffer); + cpu_ptr->resize(Nbytes()); + // need to copy cuda mem to cpu first + if (external_data_ptr != nullptr) { + FDASSERT(cudaMemcpy(cpu_ptr->data(), external_data_ptr, Nbytes(), + cudaMemcpyDeviceToHost) == 0, + "[ERROR] Error occurs while copy memory from GPU to CPU"); + + } else { + FDASSERT(cudaMemcpy(cpu_ptr->data(), buffer_, Nbytes(), + cudaMemcpyDeviceToHost) == 0, + "[ERROR] Error occurs while buffer copy memory from GPU to CPU"); + } + return cpu_ptr->data(); +#else + FDASSERT(false, + "The UltraInfer didn't compile under -DWITH_GPU=ON, so this is " + "an unexpected problem happend."); +#endif + } + return Data(); +} + +void FDTensor::SetExternalData(const std::vector &new_shape, + const FDDataType &data_type, void *data_buffer, + const Device &new_device, int new_device_id) { + dtype = data_type; + shape.assign(new_shape.begin(), new_shape.end()); + external_data_ptr = data_buffer; + device = new_device; + device_id = new_device_id; +} + +void FDTensor::ExpandDim(int64_t axis) { + size_t ndim = shape.size(); + FDASSERT(axis >= 0 && axis <= ndim, + "The allowed 'axis' must be in range of (0, %lu)!", ndim); + shape.insert(shape.begin() + axis, 1); +} + +void FDTensor::Squeeze(int64_t axis) { + size_t ndim = shape.size(); + FDASSERT(axis >= 0 && axis < ndim, + "The allowed 'axis' must be in range of (0, %lu)!", ndim); + FDASSERT(shape[axis] == 1, + "The No.%ld dimension of shape should be 1, but it is %ld!", + (long)axis, (long)shape[axis]); + shape.erase(shape.begin() + axis); +} + +void FDTensor::Allocate(const std::vector &new_shape, + const FDDataType &data_type, + const std::string &tensor_name, + const Device &new_device) { + dtype = data_type; + name = tensor_name; + shape.assign(new_shape.begin(), new_shape.end()); + device = new_device; + size_t nbytes = Nbytes(); + FDASSERT(ReallocFn(nbytes), + "The UltraInfer FDTensor allocate cpu memory error"); +} + +int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); } + +int FDTensor::Numel() const { + return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); +} + +void FDTensor::Resize(size_t new_nbytes) { ReallocFn(new_nbytes); } + +void FDTensor::Resize(const std::vector &new_shape) { + int numel = Numel(); + int new_numel = std::accumulate(new_shape.begin(), new_shape.end(), 1, + std::multiplies()); + if (new_numel > numel || external_data_ptr != nullptr) { + size_t nbytes = new_numel * FDDataTypeSize(dtype); + ReallocFn(nbytes); + } + shape.assign(new_shape.begin(), new_shape.end()); + external_data_ptr = nullptr; +} + +void FDTensor::Resize(const std::vector &new_shape, + const FDDataType &data_type, + const std::string &tensor_name, + const Device &new_device) { + if (device != new_device) { + FreeFn(); + } + external_data_ptr = nullptr; + name = tensor_name; + device = new_device; + dtype = data_type; + int new_nbytes = std::accumulate(new_shape.begin(), new_shape.end(), 1, + std::multiplies()) * + FDDataTypeSize(data_type); + ReallocFn(new_nbytes); + shape.assign(new_shape.begin(), new_shape.end()); +} + +bool FDTensor::Reshape(const std::vector &new_shape) { + int numel = Numel(); + const int64_t unk_dim_val = -1; + const int64_t copy_dim_val = 0; + + std::vector output_shape(new_shape.size(), 0); + int64_t capacity = 1; + int unk_dim_idx = -1; + for (size_t i = 0; i < new_shape.size(); ++i) { + if (new_shape[i] == unk_dim_val) { + FDASSERT(unk_dim_idx == -1, + "Only one dimension value of 'shape' in ReshapeOp can " + "be -1. But received shape = [%s], shape[%d] is also -1.", + Str(new_shape).c_str(), i); + unk_dim_idx = i; + } else if (new_shape[i] == copy_dim_val) { + FDASSERT(i < shape.size(), + "The index of 0 in `shape` must be less than " + "the input tensor X's dimensions. " + "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " + "X's dimensions = %d.", + Str(new_shape).c_str(), i, Str(shape).c_str(), shape.size()); + } else { + FDASSERT(new_shape[i] > 0, + "Each dimension value of 'shape' in ReshapeOp must not " + "be negative except one unknown dimension. " + "But received shape = [%s], shape[%d] = %d.", + Str(new_shape).c_str(), i, new_shape[i]); + } + capacity *= (new_shape[i] ? new_shape[i] : shape[i]); + output_shape[i] = (new_shape[i] ? new_shape[i] : shape[i]); + } + if (unk_dim_idx != -1) { + output_shape[unk_dim_idx] = -numel / capacity; + FDASSERT(output_shape[unk_dim_idx] * capacity == -numel, + "The 'shape' attribute in ReshapeOp is invalid. " + "The input tensor X'size must be divisible by known " + "capacity of 'shape'. " + "But received X's shape = [%s], X's size = %d, " + "'shape' is [%s], known capacity of 'shape' is %d.", + Str(shape).c_str(), numel, Str(new_shape).c_str(), capacity); + } else { + FDASSERT(numel == capacity, + "The 'shape' in ReshapeOp is invalid. " + "The input tensor X'size must be equal to the capacity of " + "'shape'. " + "But received X's shape = [%s], X's size = %d, 'shape' is " + "[%s], the capacity of 'shape' is %d.", + Str(shape).c_str(), numel, Str(shape).c_str(), capacity); + } + shape = output_shape; + return true; +} + +void FDTensor::PrintInfo(const std::string &prefix) const { + std::cout << prefix << ": name=" << name << ", shape="; + for (int i = 0; i < shape.size(); ++i) { + std::cout << shape[i] << " "; + } + std::cout << ", buffer_=" << buffer_ + << ", external_data_ptr=" << external_data_ptr; + double mean = 0; + double max = -99999999; + double min = 99999999; + if (dtype == FDDataType::FP32) { + CalculateStatisInfo(CpuData(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::FP64) { + CalculateStatisInfo(CpuData(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::INT8) { + CalculateStatisInfo(CpuData(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::UINT8) { + CalculateStatisInfo(CpuData(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::INT32) { + CalculateStatisInfo(CpuData(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::INT64) { + CalculateStatisInfo(CpuData(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::FP16) { + CalculateStatisInfo(CpuData(), Numel(), &mean, &max, &min); + } else { + FDASSERT(false, + "PrintInfo function doesn't support current situation, maybe you " + "need enhance this function now."); + } + std::cout << ", dtype=" << Str(dtype) << ", mean=" << mean << ", max=" << max + << ", min=" << min << std::endl; +} + +bool FDTensor::ReallocFn(size_t nbytes) { + if (device == Device::GPU) { +#ifdef WITH_GPU + size_t original_nbytes = nbytes_allocated; + if (nbytes > original_nbytes) { + if (buffer_ != nullptr) { + FDDeviceFree()(buffer_); + } + FDDeviceAllocator()(&buffer_, nbytes); + nbytes_allocated = nbytes; + } + return buffer_ != nullptr; +#else + FDASSERT(false, "The UltraInfer FDTensor allocator didn't compile under " + "-DWITH_GPU=ON," + "so this is an unexpected problem happend."); +#endif + } else { + if (is_pinned_memory) { +#ifdef WITH_GPU + size_t original_nbytes = nbytes_allocated; + if (nbytes > original_nbytes) { + if (buffer_ != nullptr) { + FDDeviceHostFree()(buffer_); + } + FDDeviceHostAllocator()(&buffer_, nbytes); + nbytes_allocated = nbytes; + } + return buffer_ != nullptr; +#else + FDASSERT(false, "The UltraInfer FDTensor allocator didn't compile under " + "-DWITH_GPU=ON," + "so this is an unexpected problem happend."); +#endif + } + buffer_ = realloc(buffer_, nbytes); + nbytes_allocated = nbytes; + return buffer_ != nullptr; + } +} + +void FDTensor::FreeFn() { + if (external_data_ptr != nullptr) + external_data_ptr = nullptr; + if (buffer_ != nullptr) { + if (device == Device::GPU) { +#ifdef WITH_GPU + FDDeviceFree()(buffer_); +#endif + } else { + if (is_pinned_memory) { +#ifdef WITH_GPU + FDDeviceHostFree()(buffer_); +#endif + } else { + FDHostFree()(buffer_); + } + } + buffer_ = nullptr; + nbytes_allocated = 0; + } +} + +// TODO(liqi): no src_device and dst_device +// should support copy from cpu or gpu to cpu or gpu +void FDTensor::CopyBuffer(void *dst, const void *src, size_t nbytes, + const Device &device, bool is_pinned_memory) { + if (device == Device::GPU) { +#ifdef WITH_GPU + FDASSERT(cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToDevice) == 0, + "[ERROR] Error occurs while copy memory from GPU to GPU"); +#else + FDASSERT(false, + "The UltraInfer didn't compile under -DWITH_GPU=ON, so copying " + "gpu buffer is " + "an unexpected problem happend."); +#endif + } else { + if (is_pinned_memory) { +#ifdef WITH_GPU + FDASSERT(cudaMemcpy(dst, src, nbytes, cudaMemcpyHostToHost) == 0, + "[ERROR] Error occurs while copy memory from host to host"); +#else + FDASSERT(false, + "The UltraInfer didn't compile under -DWITH_GPU=ON, so copying " + "gpu buffer is " + "an unexpected problem happend."); +#endif + } else { + std::memcpy(dst, src, nbytes); + } + } +} + +FDTensor::FDTensor(const std::string &tensor_name) { name = tensor_name; } +FDTensor::FDTensor(const char *tensor_name) { name = tensor_name; } + +FDTensor::FDTensor(const Scalar &scalar) { + Allocate({1}, scalar.dtype()); + switch (scalar.dtype()) { + case FDDataType::BOOL: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::UINT8: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::INT8: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::INT16: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::INT32: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::INT64: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::FP16: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::FP32: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + case FDDataType::FP64: + (reinterpret_cast(Data()))[0] = scalar.to(); + break; + default: + break; + } +} + +FDTensor::FDTensor(const FDTensor &other) + : shape(other.shape), name(other.name), dtype(other.dtype), + device(other.device), device_id(other.device_id) { + // Copy buffer + if (other.buffer_ == nullptr) { + FreeFn(); + } else { + size_t nbytes = Nbytes(); + FDASSERT(ReallocFn(nbytes), + "The UltraInfer FDTensor allocate memory error"); + CopyBuffer(buffer_, other.buffer_, nbytes, device, is_pinned_memory); + } + external_data_ptr = other.external_data_ptr; +} + +FDTensor::FDTensor(FDTensor &&other) + : buffer_(other.buffer_), shape(std::move(other.shape)), + name(std::move(other.name)), dtype(other.dtype), + external_data_ptr(other.external_data_ptr), device(other.device), + device_id(other.device_id), nbytes_allocated(other.nbytes_allocated) { + other.name = ""; + // Note(zhoushunjie): Avoid double free. + other.buffer_ = nullptr; + other.external_data_ptr = nullptr; +} + +FDTensor &FDTensor::operator=(const FDTensor &other) { + if (&other != this) { + // Copy buffer + device_id = other.device_id; + if (other.buffer_ == nullptr) { + FreeFn(); + buffer_ = nullptr; + shape = other.shape; + name = other.name; + dtype = other.dtype; + device = other.device; + } else { + Resize(other.shape, other.dtype, other.name, other.device); + size_t nbytes = Nbytes(); + CopyBuffer(buffer_, other.buffer_, nbytes, device, is_pinned_memory); + } + external_data_ptr = other.external_data_ptr; + } + return *this; +} + +FDTensor &FDTensor::operator=(FDTensor &&other) { + if (&other != this) { + FreeFn(); + buffer_ = other.buffer_; + external_data_ptr = other.external_data_ptr; + + shape = std::move(other.shape); + name = std::move(other.name); + dtype = other.dtype; + device = other.device; + device_id = other.device_id; + nbytes_allocated = other.nbytes_allocated; + + other.name = ""; + // Note(zhoushunjie): Avoid double free. + other.buffer_ = nullptr; + other.external_data_ptr = nullptr; + } + return *this; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/fd_tensor.h b/libs/ultrainfer/ultrainfer/core/fd_tensor.h new file mode 100755 index 0000000000..f1f2460ef7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/fd_tensor.h @@ -0,0 +1,216 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "ultrainfer/core/allocate.h" +#include "ultrainfer/core/fd_scalar.h" +#include "ultrainfer/core/fd_type.h" +#include "ultrainfer/runtime/enum_variables.h" + +namespace ultrainfer { + +/*! @brief FDTensor object used to represend data matrix + * + */ +struct ULTRAINFER_DECL FDTensor { + /** \brief Set data buffer for a FDTensor, e.g + * ``` + * std::vector buffer(1 * 3 * 224 * 224, 0); + * FDTensor tensor; + * tensor.SetData({1, 3, 224, 224}, FDDataType::FLOAT, buffer.data()); + * ``` + * \param[in] tensor_shape The shape of tensor + * \param[in] data_type The data type of tensor + * \param[in] data_buffer The pointer of data buffer memory + * \param[in] copy Whether to copy memory from data_buffer to tensor, if + * false, this tensor will share memory with data_buffer, and the data is + * managed by userself \param[in] data_device The device of data_buffer, e.g + * if data_buffer is a pointer to GPU data, the device should be Device::GPU + * \param[in] data_device_id The device id of data_buffer + */ + void SetData(const std::vector &tensor_shape, + const FDDataType &data_type, void *data_buffer, + bool copy = false, const Device &data_device = Device::CPU, + int data_device_id = -1) { + SetExternalData(tensor_shape, data_type, data_buffer, data_device, + data_device_id); + if (copy) { + StopSharing(); + } + } + + /// Get data pointer of tensor + void *GetData() { return MutableData(); } + /// Get data pointer of tensor + const void *GetData() const { return Data(); } + + /// Expand the shape of tensor, it will not change the data memory, just + /// modify its attribute `shape` + void ExpandDim(int64_t axis = 0); + + /// Squeeze the shape of tensor, it will not change the data memory, just + /// modify its attribute `shape` + void Squeeze(int64_t axis = 0); + + /// Reshape the tensor, it will not change the data memory, just modify its + /// attribute `shape` + bool Reshape(const std::vector &new_shape); + + /// Total size of tensor memory buffer in bytes + int Nbytes() const; + + /// Total number of elements in tensor + int Numel() const; + + /// Get shape of tensor + std::vector Shape() const { return shape; } + + /// Get dtype of tensor + FDDataType Dtype() const { return dtype; } + + /** \brief Allocate cpu data buffer for a FDTensor, e.g + * ``` + * FDTensor tensor; + * tensor.Allocate(FDDataType::FLOAT, {1, 3, 224, 224}; + * ``` + * \param[in] data_type The data type of tensor + * \param[in] tensor_shape The shape of tensor + */ + void Allocate(const FDDataType &data_type, + const std::vector &data_shape) { + Allocate(data_shape, data_type, name); + } + + /// Debug function, print shape, dtype, mean, max, min of tensor + void PrintInfo(const std::string &prefix = "Debug TensorInfo: ") const; + + /// Name of tensor, while feed to runtime, this need be defined + std::string name = ""; + + /// Whether the tensor is owned the data buffer or share the data buffer from + /// outside + bool IsShared() { return external_data_ptr != nullptr; } + /// If the tensor is share the data buffer from outside, `StopSharing` will + /// copy to its own structure; Otherwise, do nothing + void StopSharing(); + + // ****************************************************** + // The following member and function only used by inside UltraInfer, maybe + // removed in next version + + void *buffer_ = nullptr; + std::vector shape = {0}; + FDDataType dtype = FDDataType::INT8; + + // This use to skip memory copy step + // the external_data_ptr will point to the user allocated memory + // user has to maintain the memory, allocate and release + void *external_data_ptr = nullptr; + // The internal data will be on CPU + // Some times, the external data is on the GPU, and we are going to use + // GPU to inference the model + // so we can skip data transfer, which may improve the efficience + Device device = Device::CPU; + // By default the device id of FDTensor is -1, which means this value is + // invalid, and FDTensor is using the same device id as Runtime. + int device_id = -1; + + // Whether the data buffer is in pinned memory, which is allocated + // with cudaMallocHost() + bool is_pinned_memory = false; + + // if the external data is not on CPU, we use this temporary buffer + // to transfer data to CPU at some cases we need to visit the + // other devices' data + std::vector temporary_cpu_buffer; + + // The number of bytes allocated so far. + // When resizing GPU memory, we will free and realloc the memory only if the + // required size is larger than this value. + size_t nbytes_allocated = 0; + + // Get data buffer pointer + void *MutableData(); + + void *Data(); + + const void *Data() const; + + // Use this data to get the tensor data to process + // Since the most senario is process data in CPU + // this function will return a pointer to cpu memory + // buffer. + // If the original data is on other device, the data + // will copy to cpu store in `temporary_cpu_buffer` + const void *CpuData() const; + + // void SetDataBuffer(const std::vector& new_shape, const FDDataType& + // data_type, void* data_buffer, bool copy = false, const Device& new_device = + // Device::CPU, int new_device_id = -1); Set user memory buffer for Tensor, + // the memory is managed by the user it self, but the Tensor will share the + // memory with user So take care with the user buffer + void SetExternalData(const std::vector &new_shape, + const FDDataType &data_type, void *data_buffer, + const Device &new_device = Device::CPU, + int new_device_id = -1); + // Initialize Tensor + // Include setting attribute for tensor + // and allocate cpu memory buffer + void Allocate(const std::vector &new_shape, + const FDDataType &data_type, + const std::string &tensor_name = "", + const Device &new_device = Device::CPU); + + void Resize(size_t nbytes); + + void Resize(const std::vector &new_shape); + + void Resize(const std::vector &new_shape, + const FDDataType &data_type, const std::string &tensor_name = "", + const Device &new_device = Device::CPU); + + bool ReallocFn(size_t nbytes); + + void FreeFn(); + + FDTensor() {} + explicit FDTensor(const std::string &tensor_name); + explicit FDTensor(const char *tensor_name); + + // Deep copy + FDTensor(const FDTensor &other); + // Move constructor + FDTensor(FDTensor &&other); + + // Deep copy assignment + FDTensor &operator=(const FDTensor &other); + // Move assignment + FDTensor &operator=(FDTensor &&other); + + // Scalar to FDTensor + explicit FDTensor(const Scalar &scalar); + + ~FDTensor() { FreeFn(); } + + static void CopyBuffer(void *dst, const void *src, size_t nbytes, + const Device &device = Device::CPU, + bool is_pinned_memory = false); +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/fd_type.cc b/libs/ultrainfer/ultrainfer/core/fd_type.cc new file mode 100755 index 0000000000..3168d46715 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/fd_type.cc @@ -0,0 +1,137 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/core/fd_type.h" + +#include "ultrainfer/core/float16.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +int FDDataTypeSize(const FDDataType &data_type) { + if (data_type == FDDataType::BOOL) { + return sizeof(bool); + } else if (data_type == FDDataType::INT16) { + return sizeof(int16_t); + } else if (data_type == FDDataType::INT32) { + return sizeof(int32_t); + } else if (data_type == FDDataType::INT64) { + return sizeof(int64_t); + } else if (data_type == FDDataType::FP32) { + return sizeof(float); + } else if (data_type == FDDataType::FP64) { + return sizeof(double); + } else if (data_type == FDDataType::UINT8) { + return sizeof(uint8_t); + } else if (data_type == FDDataType::INT8) { + return sizeof(int8_t); + } else if (data_type == FDDataType::FP16) { + return sizeof(float16); + } else { + FDASSERT(false, "Unexpected data type: %s", Str(data_type).c_str()); + } + return -1; +} + +std::string Str(const FDDataType &fdt) { + std::string out; + switch (fdt) { + case FDDataType::BOOL: + out = "FDDataType::BOOL"; + break; + case FDDataType::INT16: + out = "FDDataType::INT16"; + break; + case FDDataType::INT32: + out = "FDDataType::INT32"; + break; + case FDDataType::INT64: + out = "FDDataType::INT64"; + break; + case FDDataType::FP32: + out = "FDDataType::FP32"; + break; + case FDDataType::FP64: + out = "FDDataType::FP64"; + break; + case FDDataType::FP16: + out = "FDDataType::FP16"; + break; + case FDDataType::UINT8: + out = "FDDataType::UINT8"; + break; + case FDDataType::INT8: + out = "FDDataType::INT8"; + break; + default: + out = "FDDataType::UNKNOWN"; + } + return out; +} + +std::ostream &operator<<(std::ostream &out, const FDDataType &fdt) { + switch (fdt) { + case FDDataType::BOOL: + out << "FDDataType::BOOL"; + break; + case FDDataType::INT16: + out << "FDDataType::INT16"; + break; + case FDDataType::INT32: + out << "FDDataType::INT32"; + break; + case FDDataType::INT64: + out << "FDDataType::INT64"; + break; + case FDDataType::FP32: + out << "FDDataType::FP32"; + break; + case FDDataType::FP64: + out << "FDDataType::FP64"; + break; + case FDDataType::FP16: + out << "FDDataType::FP16"; + break; + case FDDataType::UINT8: + out << "FDDataType::UINT8"; + break; + case FDDataType::INT8: + out << "FDDataType::INT8"; + break; + default: + out << "FDDataType::UNKNOWN"; + } + return out; +} + +template +const FDDataType TypeToDataType::dtype = UNKNOWN1; + +template <> const FDDataType TypeToDataType::dtype = BOOL; + +template <> const FDDataType TypeToDataType::dtype = INT16; + +template <> const FDDataType TypeToDataType::dtype = INT32; + +template <> const FDDataType TypeToDataType::dtype = INT64; + +template <> const FDDataType TypeToDataType::dtype = FP32; + +template <> const FDDataType TypeToDataType::dtype = FP64; + +template <> const FDDataType TypeToDataType::dtype = UINT8; + +template <> const FDDataType TypeToDataType::dtype = INT8; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/fd_type.h b/libs/ultrainfer/ultrainfer/core/fd_type.h new file mode 100755 index 0000000000..7bedd7e329 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/fd_type.h @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include + +#include "ultrainfer/core/config.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +enum ULTRAINFER_DECL FDDataType { + BOOL, + INT16, + INT32, + INT64, + FP16, + FP32, + FP64, + UNKNOWN1, + UNKNOWN2, + UNKNOWN3, + UNKNOWN4, + UNKNOWN5, + UNKNOWN6, + UNKNOWN7, + UNKNOWN8, + UNKNOWN9, + UNKNOWN10, + UNKNOWN11, + UNKNOWN12, + UNKNOWN13, + UINT8, + INT8 +}; + +ULTRAINFER_DECL std::ostream &operator<<(std::ostream &out, + const FDDataType &fdt); + +ULTRAINFER_DECL std::string Str(const FDDataType &fdt); + +ULTRAINFER_DECL int32_t FDDataTypeSize(const FDDataType &data_dtype); + +template struct ULTRAINFER_DECL TypeToDataType { + static const FDDataType dtype; +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/core/float16.h b/libs/ultrainfer/ultrainfer/core/float16.h new file mode 100755 index 0000000000..bcacf21dba --- /dev/null +++ b/libs/ultrainfer/ultrainfer/core/float16.h @@ -0,0 +1,651 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include +#include + +#if !defined(_WIN32) +#define FD_ALIGN(x) __attribute__((aligned(x))) +#else +#define FD_ALIGN(x) __declspec(align(x)) +#endif + +namespace ultrainfer { + +struct FD_ALIGN(2) float16 { +public: + uint16_t x; + + // The following defaulted special class member functions + // are added to make float16 pass the std::is_trivial test + float16() = default; + float16(const float16 &o) = default; + float16 &operator=(const float16 &o) = default; + float16(float16 &&o) = default; + float16 &operator=(float16 &&o) = default; + ~float16() = default; + + // Constructors + +#ifdef FD_WITH_NATIVE_FP16 + // __fp16 is a native half precision data type for arm cpu, + // float16_t is an alias for __fp16 + inline explicit float16(const float16_t &h) { + x = *reinterpret_cast(&h); + } +#endif + + inline explicit float16(float val) { +#if defined(FD_WITH_NATIVE_FP16) + float32x4_t tmp = vld1q_dup_f32(&val); + float16_t res = vget_lane_f16(vcvt_f16_f32(tmp), 0); + x = *reinterpret_cast(&res); + +#elif defined(__F16C__) + x = _cvtss_sh(val, 0); + +#else + // Conversion routine adapted from + // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion + Bits v, s; + v.f = val; + uint32_t sign = v.si & sigN; + v.si ^= sign; + sign >>= shiftSign; // logical shift + s.si = mulN; + s.si = s.f * v.f; // correct subnormals + v.si ^= (s.si ^ v.si) & -(minN > v.si); + v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN)); + v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN)); + v.ui >>= shift; // logical shift + v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC); + v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC); + x = v.ui | sign; + +#endif + } + + inline explicit float16(bool b) : x(b ? 0x3c00 : 0) {} + + template + inline explicit float16(const T &val) + : x(float16(static_cast(val)).x) {} + + // Assignment operators + +#ifdef FD_WITH_NATIVE_FP16 + inline float16 &operator=(const float16_t &rhs) { + x = *reinterpret_cast(&rhs); + return *this; + } +#endif + + inline float16 &operator=(bool b) { + x = b ? 0x3c00 : 0; + return *this; + } + + inline float16 &operator=(int8_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(uint8_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(int16_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(uint16_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(int32_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(uint32_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(int64_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(uint64_t val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(float val) { + x = float16(val).x; + return *this; + } + + inline float16 &operator=(double val) { + x = float16(val).x; + return *this; + } + +// Conversion opertors +#ifdef FD_WITH_NATIVE_FP16 + HOSTDEVICE inline explicit operator float16_t() const { + return *reinterpret_cast(this); + } +#endif + + inline operator float() const { +#if defined(FD_WITH_NATIVE_FP16) + float16x4_t res = vld1_dup_f16(reinterpret_cast(this)); + return vgetq_lane_f32(vcvt_f32_f16(res), 0); + +#elif defined(__F16C__) + return _cvtsh_ss(this->x); + +#else + // Conversion routine adapted from + // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion + Bits v; + v.ui = this->x; + int32_t sign = v.si & sigC; + v.si ^= sign; + sign <<= shiftSign; + v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC); + v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC); + Bits s; + s.si = mulC; + s.f *= v.si; + int32_t mask = -(norC > v.si); + v.si <<= shift; + v.si ^= (s.si ^ v.si) & mask; + v.si |= sign; + return v.f; + +#endif + } + + inline explicit operator bool() const { return (x & 0x7fff) != 0; } + + inline explicit operator int8_t() const { + return static_cast(static_cast(*this)); + } + + inline explicit operator uint8_t() const { + return static_cast(static_cast(*this)); + } + + inline explicit operator int16_t() const { + return static_cast(static_cast(*this)); + } + + inline explicit operator uint16_t() const { + return static_cast(static_cast(*this)); + } + + inline explicit operator int32_t() const { + return static_cast(static_cast(*this)); + } + + inline explicit operator uint32_t() const { + return static_cast(static_cast(*this)); + } + + inline explicit operator int64_t() const { + return static_cast(static_cast(*this)); + } + + inline explicit operator uint64_t() const { + return static_cast(static_cast(*this)); + } + + inline operator double() const { + return static_cast(static_cast(*this)); + } + + inline bool operator>(const float &other) const { + return this->operator float() > other; + } + + inline bool operator>(const double &other) const { + return this->operator double() > other; + } + + inline bool operator<(const float &other) const { + return this->operator float() > other; + } + + inline bool operator<(const double &other) const { + return this->operator double() > other; + } + + template ::value, + bool>::type = true> + inline float16 &operator+=(const T &other) { + *this = float16(static_cast(*this) + other); + return *this; + } + +private: + union Bits { + float f; + int32_t si; + uint32_t ui; + }; + + static const int shift = 13; + static const int shiftSign = 16; + + static const int32_t infN = 0x7F800000; + static const int32_t maxN = 0x477FE000; // max flt16 as flt32 + static const int32_t minN = 0x38800000; // min flt16 normal as flt32 + static const int32_t sigN = 0x80000000; // sign bit + + static constexpr int32_t infC = infN >> shift; + static constexpr int32_t nanN = (infC + 1) + << shift; // minimum flt16 nan as float32 + static constexpr int32_t maxC = maxN >> shift; + static constexpr int32_t minC = minN >> shift; + static constexpr int32_t sigC = sigN >> shiftSign; + + static const int32_t mulN = 0x52000000; // (1 << 23) / minN + static const int32_t mulC = 0x33800000; // minN / (1 << (23 - shift)) + static const int32_t subC = 0x003FF; // max flt32 subnormal downshifted + static const int32_t norC = 0x00400; // min flt32 normal downshifted + + static constexpr int32_t maxD = infC - maxC - 1; + static constexpr int32_t minD = minC - subC - 1; +}; + +// Arithmetic operators for float16 on ARMv8.2-A CPU +#if defined(FD_WITH_NATIVE_FP16) +inline float16 operator+(const float16 &a, const float16 &b) { + float16 res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fadd h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} + +inline float16 operator-(const float16 &a, const float16 &b) { + float16 res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fsub h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} + +inline float16 operator*(const float16 &a, const float16 &b) { + float16 res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fmul h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} + +inline float16 operator/(const float16 &a, const float16 &b) { + float16 res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fdiv h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} + +inline float16 operator-(const float16 &a) { + float16 res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "fneg h0, h0\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0"); + return res; +} + +inline float16 &operator+=(float16 &a, const float16 &b) { // NOLINT + a = a + b; + return a; +} + +inline float16 &operator-=(float16 &a, const float16 &b) { // NOLINT + a = a - b; + return a; +} + +inline float16 &operator*=(float16 &a, const float16 &b) { // NOLINT + a = a * b; + return a; +} + +inline float16 &operator/=(float16 &a, const float16 &b) { // NOLINT + a = a / b; + return a; +} + +inline bool operator==(const float16 &a, const float16 &b) { + uint16_t res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fcmeq h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +inline bool operator!=(const float16 &a, const float16 &b) { return !(a == b); } + +inline bool operator<(const float16 &a, const float16 &b) { + uint16_t res; + asm volatile("ld1 {v1.h}[0], [%[a_ptr]]\n" + "ld1 {v0.h}[0], [%[b_ptr]]\n" + "fcmgt h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +inline bool operator<=(const float16 &a, const float16 &b) { + uint16_t res; + asm volatile("ld1 {v1.h}[0], [%[a_ptr]]\n" + "ld1 {v0.h}[0], [%[b_ptr]]\n" + "fcmge h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +inline bool operator>(const float16 &a, const float16 &b) { + uint16_t res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fcmgt h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +inline bool operator>=(const float16 &a, const float16 &b) { + uint16_t res; + asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fcmge h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +#else +inline float16 operator+(const float16 &a, const float16 &b) { + return float16(static_cast(a) + static_cast(b)); +} + +inline float16 operator-(const float16 &a, const float16 &b) { + return float16(static_cast(a) - static_cast(b)); +} + +inline float16 operator*(const float16 &a, const float16 &b) { + return float16(static_cast(a) * static_cast(b)); +} + +inline float16 operator/(const float16 &a, const float16 &b) { + return float16(static_cast(a) / static_cast(b)); +} + +inline float16 operator-(const float16 &a) { + float16 res; + res.x = a.x ^ 0x8000; + return res; +} + +inline float16 &operator+=(float16 &a, const float16 &b) { // NOLINT + a = float16(static_cast(a) + static_cast(b)); + return a; +} + +inline float16 &operator-=(float16 &a, const float16 &b) { // NOLINT + a = float16(static_cast(a) - static_cast(b)); + return a; +} + +inline float16 &operator*=(float16 &a, const float16 &b) { // NOLINT + a = float16(static_cast(a) * static_cast(b)); + return a; +} + +inline float16 &operator/=(float16 &a, const float16 &b) { // NOLINT + a = float16(static_cast(a) / static_cast(b)); + return a; +} + +inline bool operator==(const float16 &a, const float16 &b) { + return static_cast(a) == static_cast(b); +} + +inline bool operator!=(const float16 &a, const float16 &b) { + return static_cast(a) != static_cast(b); +} + +inline bool operator<(const float16 &a, const float16 &b) { + return static_cast(a) < static_cast(b); +} + +inline bool operator<=(const float16 &a, const float16 &b) { + return static_cast(a) <= static_cast(b); +} + +inline bool operator>(const float16 &a, const float16 &b) { + return static_cast(a) > static_cast(b); +} + +inline bool operator>=(const float16 &a, const float16 &b) { + return static_cast(a) >= static_cast(b); +} +#endif + + template ::value || + std::is_same::value, + bool>::type = true> + inline T &operator+=(T &a, const float16 &b) { // NOLINT + auto c = static_cast(a) + static_cast(b); + a = static_cast(c); + return a; + } + + inline double &operator+=(double &a, const float16 &b) { // NOLINT + a = a + static_cast(b); + return a; + } + + inline float16 raw_uint16_to_float16(uint16_t a) { + float16 res; + res.x = a; + return res; + } + + inline bool(isnan)(const float16 &a) { return (a.x & 0x7fff) > 0x7c00; } + + inline bool(isinf)(const float16 &a) { return (a.x & 0x7fff) == 0x7c00; } + + inline bool(isfinite)(const float16 &a) { + return !((isnan)(a)) && !((isinf)(a)); + } + + inline float16(abs)(const float16 &a) { + return float16(std::abs(static_cast(a))); + } + + inline std::ostream &operator<<(std::ostream &os, const float16 &a) { + os << static_cast(a); + return os; + } +} // namespace ultrainfer + +namespace std { + +// Override the std::is_pod::value for float16 +// The reason is that different compilers implemented std::is_pod based on +// different C++ standards. float16 class is a plain old data in C++11 given +// that it is both trivial and standard_layout. +// However, std::is_pod in nvcc 8.0 host c++ compiler follows C++0x and is +// more restricted in that you cannot provide any customized +// constructor in float16. Hence, we override is_pod here following C++11 +// so that .cu files can be successfully compiled by nvcc. +template <> struct is_pod { + static const bool value = is_trivial::value && + is_standard_layout::value; +}; + +template <> +struct is_floating_point + : std::integral_constant< + bool, std::is_same::type>::value> {}; +template <> struct is_signed { + static const bool value = true; +}; + +template <> struct is_unsigned { + static const bool value = false; +}; + +inline bool isnan(const ultrainfer::float16 &a) { return ultrainfer::isnan(a); } + +inline bool isinf(const ultrainfer::float16 &a) { return ultrainfer::isinf(a); } + +template <> struct numeric_limits { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 3; + static const int max_digits10 = 5; + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static ultrainfer::float16(min)() { + return ultrainfer::raw_uint16_to_float16(0x400); + } + static ultrainfer::float16 lowest() { + return ultrainfer::raw_uint16_to_float16(0xfbff); + } + static ultrainfer::float16(max)() { + return ultrainfer::raw_uint16_to_float16(0x7bff); + } + static ultrainfer::float16 epsilon() { + return ultrainfer::raw_uint16_to_float16(0x0800); + } + static ultrainfer::float16 round_error() { return ultrainfer::float16(0.5); } + static ultrainfer::float16 infinity() { + return ultrainfer::raw_uint16_to_float16(0x7c00); + } + static ultrainfer::float16 quiet_NaN() { + return ultrainfer::raw_uint16_to_float16(0x7e00); + } + static ultrainfer::float16 signaling_NaN() { + return ultrainfer::raw_uint16_to_float16(0x7e00); + } + static ultrainfer::float16 denorm_min() { + return ultrainfer::raw_uint16_to_float16(0x1); + } +}; + +inline ultrainfer::float16 abs(const ultrainfer::float16 &a) { + return ultrainfer::abs(a); +} + +} // namespace std diff --git a/libs/ultrainfer/ultrainfer/function/cast.cc b/libs/ultrainfer/ultrainfer/function/cast.cc new file mode 100755 index 0000000000..db424b5d81 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/cast.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/cast.h" +#include + +namespace ultrainfer { +namespace function { + +template struct CastOpTransformFunctor { + OutT operator()(InT in) const { return static_cast(in); } +}; + +template +void CastKernel(const FDTensor &x, FDTensor *out, FDDataType output_dtype) { + + FD_VISIT_ALL_TYPES(output_dtype, "CastOpTransformFunctor", ([&] { + auto *in_begin = reinterpret_cast(x.Data()); + auto *in_end = in_begin + x.Numel(); + FDTensor out_tmp; + out_tmp.Allocate(x.Shape(), output_dtype); + auto *out_begin = + reinterpret_cast(out_tmp.Data()); + std::transform(in_begin, in_end, out_begin, + CastOpTransformFunctor()); + *out = std::move(out_tmp); + })); +} + +void Cast(const FDTensor &x, FDTensor *out, FDDataType output_dtype) { + FD_VISIT_ALL_TYPES(x.dtype, "CastKernel", + ([&] { CastKernel(x, out, output_dtype); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/cast.h b/libs/ultrainfer/ultrainfer/function/cast.h new file mode 100755 index 0000000000..2a90c67088 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/cast.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Cast x to output data type element-wise. Only for float type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. + @param output_dtype The type of output tensor. +*/ +ULTRAINFER_DECL void Cast(const FDTensor &x, FDTensor *out, + FDDataType output_dtype); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/clip.cc b/libs/ultrainfer/ultrainfer/function/clip.cc new file mode 100755 index 0000000000..95308fb7d2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/clip.cc @@ -0,0 +1,59 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/clip.h" +#include + +namespace ultrainfer { +namespace function { + +template class ClipFunctor { +public: + explicit ClipFunctor(const T min, const T max) : min_(min), max_(max) {} + T operator()(const T x) const { + return x < min_ ? min_ : x > max_ ? max_ : x; + } + +private: + T min_; + T max_; +}; + +template +void ClipKernel(const FDTensor &x, double min, double max, FDTensor *out) { + T max_ = static_cast(max); + T min_ = static_cast(min); + + FDASSERT(min_ < max_, + "max should be greater than or equal to min. But received min = %f, " + "max = %f", + static_cast(min_), static_cast(max_)); + FDTensor tmp; + tmp.Allocate(x.Shape(), x.Dtype()); + const T *x_data = reinterpret_cast(x.Data()); + + int64_t numel = x.Numel(); + T *out_data = reinterpret_cast(tmp.Data()); + + std::transform(x_data, x_data + numel, out_data, ClipFunctor(min_, max_)); + *out = std::move(tmp); +} + +void Clip(const FDTensor &x, double min, double max, FDTensor *out) { + FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ClipKernel", + ([&] { ClipKernel(x, min, max, out); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/clip.h b/libs/ultrainfer/ultrainfer/function/clip.h new file mode 100755 index 0000000000..34d6f873f7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/clip.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** This operator clip all elements in input into the range [ min, max ]. + Support float32, float64, int32, int64 + @param x The input tensor. + @param min The lower bound + @param max The uppper bound + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Clip(const FDTensor &x, double min, double max, + FDTensor *out); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/concat.cc b/libs/ultrainfer/ultrainfer/function/concat.cc new file mode 100755 index 0000000000..1d70ea2f42 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/concat.cc @@ -0,0 +1,118 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/concat.h" + +#include "ultrainfer/utils/utils.h" +#include +#include +#include +#include + +namespace ultrainfer { +namespace function { + +std::vector +ComputeAndCheckConcatOutputShape(const std::vector &input, int axis) { + const size_t n = input.size(); + auto out_dims = input[0].shape; + size_t in_zero_dims_size = out_dims.size(); + for (size_t i = 1; i < n; ++i) { + FDASSERT(input[i].shape.size() == out_dims.size(), + "The shape of input[0] and input[%d] is expected to be equal. But " + "received input[0]'s shape = %s, input[%d]'s shape = %s.", + i, Str(out_dims).c_str(), i, Str(input[i].shape).c_str()); + for (size_t j = 0; j < in_zero_dims_size; j++) { + if (j == axis) { + out_dims[axis] += input[i].shape[axis]; + } else { + FDASSERT( + input[0].shape[j] == input[i].shape[j], + "The %d-th dimension of input[0] and input[%d] is expected to be " + "equal." + "But received input[0]'s shape = %s, input[%d]'s shape = %s.", + j, i, Str(input[0].shape).c_str(), i, Str(input[i].shape).c_str()); + } + } + } + return out_dims; +} + +template struct ConcatFunctor { + void operator()(const std::vector &input, int axis, + FDTensor *output) { + size_t num = input.size(); + + int64_t rows = 1; + auto dim_0 = input[0].shape; + for (int i = 0; i < axis; ++i) { + rows *= dim_0[i]; + } + int64_t out_rows = rows, out_cols = 0; + + std::vector input_cols(num); + for (size_t i = 0; i < num; ++i) { + int64_t t_cols = input[i].Numel() / rows; + out_cols += t_cols; + input_cols[i] = t_cols; + } + + // computation + T *output_data = reinterpret_cast(output->Data()); + int64_t col_idx = 0; + for (size_t j = 0; j < num; ++j) { + int64_t col_len = input_cols[j]; + const T *input_data = reinterpret_cast(input[j].Data()); + for (int64_t k = 0; k < out_rows; ++k) { + FDTensor::CopyBuffer(output_data + k * out_cols + col_idx, + input_data + k * col_len, sizeof(T) * col_len, + input[j].device, input[j].is_pinned_memory); + } + col_idx += col_len; + } + } +}; + +template +void ConcatKernel(const std::vector &input, FDTensor *output, + int axis) { + auto output_shape = ComputeAndCheckConcatOutputShape(input, axis); + FDTensor output_tmp; + output_tmp.Resize(output_shape, TypeToDataType::dtype, output->name, + input[0].device); + + ConcatFunctor functor; + functor(input, axis, &output_tmp); + *output = std::move(output_tmp); +} + +void Concat(const std::vector &x, FDTensor *out, int axis) { + FDASSERT(x.size() > 0, + "The number of FDTensor array should be larger than 0, but the size " + "of input is %d", + x.size()); + int64_t rank = x[0].shape.size(); + FDASSERT(axis >= -rank && axis < rank, + "The axis is expected to be in range of [%d, %d), but got %d", -rank, + rank, axis); + if (axis < 0) { + axis += rank; + } + + FD_VISIT_ALL_TYPES(x[0].dtype, "Concat", + ([&] { ConcatKernel(x, out, axis); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/concat.h b/libs/ultrainfer/ultrainfer/function/concat.h new file mode 100755 index 0000000000..862b9784e9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/concat.h @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Excute the concatenate operation for input FDTensor along given axis. + @param x The input tensor. + @param out The output tensor which stores the result. + @param axis Axis which will be concatenated. +*/ + +ULTRAINFER_DECL void Concat(const std::vector &x, FDTensor *out, + int axis = 0); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/cuda_cast.cu b/libs/ultrainfer/ultrainfer/function/cuda_cast.cu new file mode 100755 index 0000000000..c97e7ea3a3 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/cuda_cast.cu @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef WITH_GPU +#include "ultrainfer/function/cuda_cast.h" +namespace ultrainfer { +namespace function { +template +__global__ void CudaCastKernel(const T_IN *in, T_OUT *out, int edge) { + int position = blockDim.x * blockIdx.x + threadIdx.x; + if (position >= edge) + return; + out[position] = (T_OUT)in[position]; +} + +void CudaCast(const FDTensor &in, FDTensor *out, cudaStream_t stream) { + int jobs = in.Numel(); + int threads = 256; + int blocks = ceil(jobs / (float)threads); + if (in.dtype == FDDataType::INT64 && out->dtype == FDDataType::INT32) { + CudaCastKernel<<>>( + reinterpret_cast(const_cast(in.Data())), + reinterpret_cast(out->MutableData()), jobs); + } else if (in.dtype == FDDataType::INT32 && out->dtype == FDDataType::INT64) { + CudaCastKernel<<>>( + reinterpret_cast(const_cast(in.Data())), + reinterpret_cast(out->MutableData()), jobs); + } else { + FDASSERT(false, "CudaCast only support input INT64, output INT32."); + } +} + +} // namespace function +} // namespace ultrainfer +#endif diff --git a/libs/ultrainfer/ultrainfer/function/cuda_cast.h b/libs/ultrainfer/ultrainfer/function/cuda_cast.h new file mode 100755 index 0000000000..594be05a0f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/cuda_cast.h @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { +/** Cast the type of the data in GPU buffer. + @param in The input tensor. + @param out The output tensor + @param stream CUDA stream +*/ +ULTRAINFER_DECL void CudaCast(const FDTensor &in, FDTensor *out, + cudaStream_t stream); +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/cumprod.cc b/libs/ultrainfer/ultrainfer/function/cumprod.cc new file mode 100755 index 0000000000..47eb89cc9a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/cumprod.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/cumprod.h" + +namespace ultrainfer { +namespace function { + +void GetCumprodDimInfo(const std::vector &dim, int cumprod_dim, + size_t *outer_dim, size_t *mid_dim, size_t *inner_dim) { + int dim_size = dim.size(); + FDASSERT(cumprod_dim >= -dim_size, + "The input dim of CumprodOp should be larger than the opposite " + "rank of input x which is %d. But received dim = %d", + -dim_size, cumprod_dim); + FDASSERT(cumprod_dim < dim_size, + "The input dim of CumprodOp should be smaller than the " + "rank of input x which is %d. But received dim = %d", + dim_size, cumprod_dim); + if (cumprod_dim < 0) + cumprod_dim += dim_size; + + *outer_dim = 1; + for (int i = 0; i < cumprod_dim; ++i) { + *outer_dim *= dim[i]; + } + *mid_dim = dim[cumprod_dim]; + *inner_dim = 1; + for (int i = cumprod_dim + 1; i < dim_size; ++i) { + *inner_dim *= dim[i]; + } +} + +template +void CumprodKernel(const FDTensor &x, FDTensor *out, int axis) { + auto *x_data = reinterpret_cast(x.Data()); + auto shape = x.Shape(); + + size_t outer_dim = 1; + size_t mid_dim = 1; + size_t inner_dim = 1; + GetCumprodDimInfo(shape, axis, &outer_dim, &mid_dim, &inner_dim); + + out->Allocate(x.Shape(), x.Dtype()); + auto *out_data = reinterpret_cast(out->Data()); + + for (size_t i = 0; i < outer_dim; i++) { + for (size_t j = 0; j < mid_dim; j++) { + for (size_t k = 0; k < inner_dim; k++) { + size_t pos = i * mid_dim * inner_dim + j * inner_dim + k; + if (j == 0) { + out_data[pos] = x_data[pos]; + } else { + out_data[pos] = out_data[pos - inner_dim] * x_data[pos]; + } + } + } + } +} + +void Cumprod(const FDTensor &x, FDTensor *out, int axis) { + FD_VISIT_INT_FLOAT_TYPES(x.dtype, "CumprodKernel", + ([&] { CumprodKernel(x, out, axis); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/cumprod.h b/libs/ultrainfer/ultrainfer/function/cumprod.h new file mode 100755 index 0000000000..ec5ec92845 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/cumprod.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Excute the concatenate operation for input FDTensor along given axis. + @param x The input tensor. + @param out The output tensor which stores the result. + @param axisi Axis which will be concatenated. +*/ + +ULTRAINFER_DECL void Cumprod(const FDTensor &x, FDTensor *out, int axis = 0); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/eigen.cc b/libs/ultrainfer/ultrainfer/function/eigen.cc new file mode 100755 index 0000000000..b60fb125eb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/eigen.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/eigen.h" + +namespace ultrainfer { +namespace function { +std::shared_ptr EigenDeviceWrapper::instance_ = nullptr; + +std::shared_ptr EigenDeviceWrapper::GetInstance() { + if (instance_ == nullptr) { + instance_ = std::make_shared(); + } + return instance_; +} + +const Eigen::DefaultDevice *EigenDeviceWrapper::GetDevice() const { + return &device_; +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/eigen.h b/libs/ultrainfer/ultrainfer/function/eigen.h new file mode 100755 index 0000000000..0562a26d02 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/eigen.h @@ -0,0 +1,139 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/axis_utils.h" +#include "unsupported/Eigen/CXX11/Tensor" +#include +#include +#include + +namespace ultrainfer { +namespace function { +// EigenDim converts shape into Eigen::DSizes. +template struct EigenDim { + using Type = Eigen::DSizes; + + static Type From(const std::vector &dims) { + Type ret; + for (int64_t d = 0; d < dims.size(); d++) { + ret[d] = dims[d]; + } + return ret; + } +}; + +// Interpret FDTensor as EigenTensor and EigenConstTensor. +template +struct EigenTensor { + using Type = Eigen::TensorMap>; + + using ConstType = + Eigen::TensorMap>; + + static Type From(FDTensor &tensor, + const std::vector &dims) { // NOLINT + return Type(reinterpret_cast(tensor.Data()), EigenDim::From(dims)); + } + + static Type From(FDTensor &tensor) { // NOLINT + return From(tensor, tensor.shape); + } // NOLINT + + static ConstType From(const FDTensor &tensor, + const std::vector &dims) { + return ConstType(reinterpret_cast(tensor.Data()), + EigenDim::From(dims)); + } + + static ConstType From(const FDTensor &tensor) { + return From(tensor, tensor.shape); + } +}; + +template +struct EigenScalar { + // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. + using Type = Eigen::TensorMap< + Eigen::TensorFixedSize, MajorType, IndexType>>; + using ConstType = Eigen::TensorMap< + Eigen::TensorFixedSize, MajorType, IndexType>>; + + static Type From(FDTensor &tensor) { + return Type(reinterpret_cast(tensor.Data())); + } // NOLINT + + static ConstType From(const FDTensor &tensor) { + return ConstType(reinterpret_cast(tensor.Data())); + } +}; + +template +struct EigenVector : public EigenTensor { + // Flatten reshapes a Tensor into an EigenVector. + static typename EigenVector::Type Flatten(FDTensor &tensor) { // NOLINT + return EigenVector::From(tensor, {tensor.Numel()}); + } + + static typename EigenVector::ConstType + Flatten(const FDTensor &tensor) { // NOLINT + return EigenVector::From(tensor, {tensor.Numel()}); + } +}; + +template +struct EigenMatrix : public EigenTensor { + static typename EigenMatrix::Type Reshape(FDTensor &tensor, // NOLINT + int num_col_dims) { + int rank = tensor.shape.size(); + FDASSERT((num_col_dims > 0 && num_col_dims < rank), + "Input dimension number(num_col_dims) must be between 0 and %d, " + "but received number is %d.", + rank, num_col_dims); + const int n = SizeToAxis(num_col_dims, tensor.shape); + const int d = SizeFromAxis(num_col_dims, tensor.shape); + return EigenMatrix::From(tensor, {n, d}); + } + + static typename EigenMatrix::ConstType Reshape(const FDTensor &tensor, + int num_col_dims) { + int rank = tensor.shape.size(); + FDASSERT((num_col_dims > 0 && num_col_dims < rank), + "Input dimension number(num_col_dims) must be between 0 and %d, " + "but received number is %d.", + rank, num_col_dims); + const int n = SizeToAxis(num_col_dims, tensor.shape); + const int d = SizeFromAxis(num_col_dims, tensor.shape); + return EigenMatrix::From(tensor, {n, d}); + } +}; + +class EigenDeviceWrapper { +public: + static std::shared_ptr GetInstance(); + const Eigen::DefaultDevice *GetDevice() const; + +private: + Eigen::DefaultDevice device_; + static std::shared_ptr instance_; +}; + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/elementwise.cc b/libs/ultrainfer/ultrainfer/function/elementwise.cc new file mode 100755 index 0000000000..7e8b4d65cc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/elementwise.cc @@ -0,0 +1,110 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/elementwise.h" +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/function/elementwise_base.h" +#include "ultrainfer/function/elementwise_functor.h" +#include "ultrainfer/utils/utils.h" +#include + +namespace ultrainfer { +namespace function { + +DEFINE_ELEMENTWISE_OP(Add); +DEFINE_ELEMENTWISE_OP(Multiply); +DEFINE_ELEMENTWISE_OP(Subtract); +DEFINE_ELEMENTWISE_OP(Divide); + +void Add(const FDTensor &x, const FDTensor &y, FDTensor *out) { + FD_VISIT_ALL_TYPES(x.dtype, "AddRawKernel", + ([&] { AddRawKernel()(x, y, -1, out); })); +} + +void Subtract(const FDTensor &x, const FDTensor &y, FDTensor *out) { + FD_VISIT_ALL_TYPES(x.dtype, "SubtractRawKernel", + ([&] { SubtractRawKernel()(x, y, -1, out); })); +} + +void Multiply(const FDTensor &x, const FDTensor &y, FDTensor *out) { + FD_VISIT_ALL_TYPES(x.dtype, "MultiplyRawKernel", + ([&] { MultiplyRawKernel()(x, y, -1, out); })); +} + +void Divide(const FDTensor &x, const FDTensor &y, FDTensor *out) { + FD_VISIT_ALL_TYPES(x.dtype, "DivideRawKernel", + ([&] { DivideRawKernel()(x, y, -1, out); })); +} + +template struct MaximumRawKernel { + void operator()(const FDTensor &x, const FDTensor &y, int axis, + FDTensor *out) { + ElementwiseCompute, T>(x, y, axis, MaximumFunctor(), + out); + } +}; + +void Maximum(const FDTensor &x, const FDTensor &y, FDTensor *out) { + FD_VISIT_ALL_TYPES(x.dtype, "MaximumRawKernel", + ([&] { MaximumRawKernel()(x, y, -1, out); })); +} + +} // namespace function + +FDTensor operator+(const FDTensor &x, const FDTensor &y) { + FDTensor out; + function::Add(x, y, &out); + return out; +} + +FDTensor operator-(const FDTensor &x, const FDTensor &y) { + FDTensor out; + function::Subtract(x, y, &out); + return out; +} + +FDTensor operator*(const FDTensor &x, const FDTensor &y) { + FDTensor out; + function::Multiply(x, y, &out); + return out; +} + +FDTensor operator/(const FDTensor &x, const FDTensor &y) { + FDTensor out; + function::Divide(x, y, &out); + return out; +} + +#define INSTANTIATE_OPERATOR(operation_type) \ + template FDTensor operator operation_type(const FDTensor &x, bool y); \ + template FDTensor operator operation_type(const FDTensor &x, uint8_t y); \ + template FDTensor operator operation_type(const FDTensor &x, int16_t y); \ + template FDTensor operator operation_type(const FDTensor &x, int y); \ + template FDTensor operator operation_type(const FDTensor &x, int64_t y); \ + template FDTensor operator operation_type(const FDTensor &x, float y); \ + template FDTensor operator operation_type(const FDTensor &x, double y); \ + template FDTensor operator operation_type(bool x, const FDTensor &y); \ + template FDTensor operator operation_type(uint8_t x, const FDTensor &y); \ + template FDTensor operator operation_type(int16_t x, const FDTensor &y); \ + template FDTensor operator operation_type(int x, const FDTensor &y); \ + template FDTensor operator operation_type(int64_t x, const FDTensor &y); \ + template FDTensor operator operation_type(float x, const FDTensor &y); \ + template FDTensor operator operation_type(double x, const FDTensor &y) + +INSTANTIATE_OPERATOR(+); +INSTANTIATE_OPERATOR(-); +INSTANTIATE_OPERATOR(*); +INSTANTIATE_OPERATOR(/); + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/elementwise.h b/libs/ultrainfer/ultrainfer/function/elementwise.h new file mode 100755 index 0000000000..bc6aeb93a2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/elementwise.h @@ -0,0 +1,105 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_scalar.h" +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { + +namespace function { + +/** Excute the add operation for input FDTensors. *out = x + y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Add(const FDTensor &x, const FDTensor &y, FDTensor *out); + +/** Excute the subtract operation for input FDTensors. *out = x - y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Subtract(const FDTensor &x, const FDTensor &y, + FDTensor *out); + +/** Excute the multiply operation for input FDTensors. *out = x * y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Multiply(const FDTensor &x, const FDTensor &y, + FDTensor *out); + +/** Excute the divide operation for input FDTensors. *out = x / y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Divide(const FDTensor &x, const FDTensor &y, + FDTensor *out); + +/** Excute the maximum operation for input FDTensors. *out = max(x, y). + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Maximum(const FDTensor &x, const FDTensor &y, + FDTensor *out); + +} // namespace function + +ULTRAINFER_DECL FDTensor operator+(const FDTensor &x, const FDTensor &y); + +template FDTensor operator+(const FDTensor &x, T y) { + return x + FDTensor(Scalar(y)); +} + +template FDTensor operator+(T x, const FDTensor &y) { + return FDTensor(Scalar(x)) + y; +} + +ULTRAINFER_DECL FDTensor operator-(const FDTensor &x, const FDTensor &y); + +template FDTensor operator-(const FDTensor &x, T y) { + return x - FDTensor(Scalar(y)); +} + +template FDTensor operator-(T x, const FDTensor &y) { + return FDTensor(Scalar(x)) - y; +} + +ULTRAINFER_DECL FDTensor operator*(const FDTensor &x, const FDTensor &y); + +template FDTensor operator*(const FDTensor &x, T y) { + return x * FDTensor(Scalar(y)); +} + +template FDTensor operator*(T x, const FDTensor &y) { + return FDTensor(Scalar(x)) * y; +} + +ULTRAINFER_DECL FDTensor operator/(const FDTensor &x, const FDTensor &y); + +template FDTensor operator/(const FDTensor &x, T y) { + return x / FDTensor(Scalar(y)); +} + +template FDTensor operator/(T x, const FDTensor &y) { + return FDTensor(Scalar(x)) / y; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/elementwise_base.h b/libs/ultrainfer/ultrainfer/function/elementwise_base.h new file mode 100755 index 0000000000..b03172d3bf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/elementwise_base.h @@ -0,0 +1,265 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/function/eigen.h" + +namespace ultrainfer { +namespace function { + +#define DEFINE_ELEMENTWISE_OP(name) \ + template struct name##RawKernel { \ + void operator()(const FDTensor &x, const FDTensor &y, int axis, \ + FDTensor *out) { \ + if (x.Shape() == y.Shape()) { \ + SameDimsElementwiseCompute>()(x, y, out); \ + } else { \ + auto x_dims = x.Shape(); \ + auto y_dims = y.Shape(); \ + if (x_dims.size() >= y_dims.size()) { \ + ElementwiseCompute, T>(x, y, axis, \ + name##Functor(), out); \ + } else { \ + ElementwiseCompute, T>( \ + x, y, axis, Inverse##name##Functor(), out); \ + } \ + } \ + } \ + } + +inline void GetMidDims(const std::vector &x_dims, + const std::vector &y_dims, const int axis, + int *pre, int *n, int *post, + int *is_run_common_broadcast) { + *pre = 1; + *n = 1; + *post = 1; + *is_run_common_broadcast = 0; + for (int i = 0; i < axis; ++i) { + (*pre) *= x_dims[i]; + } + for (int i = 0; i < y_dims.size(); ++i) { + if (x_dims[i + axis] != y_dims[i]) { + FDASSERT(y_dims[i] == 1 || x_dims[i + axis] == 1, + "Broadcast dimension mismatch. Operands " + "could not be broadcast together with the shape of " + "X = [%s] and the shape of Y = [%s]. Received [%d] " + "in X is not equal to [%d] in Y.", + Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis], + y_dims[i]); + *is_run_common_broadcast = 1; + return; + } + (*n) *= y_dims[i]; + } + for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) { + (*post) *= x_dims[i]; + } +} + +inline std::vector +TrimTrailingSingularDims(const std::vector &dims) { + // Remove trailing dimensions of size 1 for y + auto actual_dims_size = dims.size(); + for (; actual_dims_size != 0; --actual_dims_size) { + if (dims[actual_dims_size - 1] != 1) + break; + } + if (actual_dims_size == dims.size()) + return dims; + std::vector trim_dims; + trim_dims.resize(actual_dims_size); + for (int i = 0; i < actual_dims_size; ++i) { + trim_dims[i] = dims[i]; + } + return trim_dims; +} + +inline int GetElementwiseIndex(const int64_t *x_dims_array, const int max_dim, + const int64_t *index_array) { + int index_ = 0; + for (int i = 0; i < max_dim; i++) { + if (x_dims_array[i] > 1) { + index_ = index_ * x_dims_array[i] + index_array[i]; + } + } + return index_; +} + +inline void UpdateElementwiseIndexArray(const int64_t *out_dims_array, + const int max_dim, + int64_t *index_array) { + for (int i = max_dim - 1; i >= 0; --i) { + ++index_array[i]; + if (index_array[i] >= out_dims_array[i]) { + index_array[i] -= out_dims_array[i]; + } else { + break; + } + } +} + +inline void GetBroadcastDimsArrays(const std::vector &x_dims, + const std::vector &y_dims, + int64_t *x_dims_array, int64_t *y_dims_array, + int64_t *out_dims_array, const int max_dim, + const int axis) { + FDASSERT(axis >= 0, + "Axis should be great than or equal to 0, but received axis is %d.", + axis); + FDASSERT(axis < max_dim, + "Axis should be less than %d, but received axis is %d.", max_dim, + axis); + if (x_dims.size() > y_dims.size()) { + std::fill(y_dims_array, y_dims_array + axis, 1); + if (axis + y_dims.size() < max_dim) { + std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1); + } + std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array); + std::copy(y_dims.data(), y_dims.data() + y_dims.size(), + y_dims_array + axis); + } else { + std::fill(x_dims_array, x_dims_array + axis, 1); + if (axis + x_dims.size() < max_dim) { + std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1); + } + std::copy(x_dims.data(), x_dims.data() + x_dims.size(), + x_dims_array + axis); + std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array); + } + + for (int i = 0; i < max_dim; i++) { + FDASSERT(x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 || + y_dims_array[i] <= 1, + "Broadcast dimension mismatch. Operands " + "could not be broadcast together with the shape of " + "X = [%s] and the shape of Y = [%s]. Received [%d] " + "in X is not equal to [%d] in Y.", + Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis], + y_dims[i]); + if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) || + (x_dims_array[i] == 1 && y_dims_array[i] == 1)) { + out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]); + } else { + out_dims_array[i] = -1; + } + } +} + +template +void CommonForwardBroadcastCPU(const FDTensor &x, const FDTensor &y, + FDTensor *z, int64_t *x_dims_array, + int64_t *y_dims_array, int64_t *out_dims_array, + int max_dim, Functor func, + const bool is_xsize_larger = true) { + std::vector index_array(max_dim, 0); + const T *x_data = reinterpret_cast(x.Data()); + const T *y_data = reinterpret_cast(y.Data()); + FDASSERT(x_data != nullptr, "The input X should not be empty."); + FDASSERT(y_data != nullptr, "The input X should not be empty."); + OutType *out_data = reinterpret_cast(z->Data()); + + const int out_size = std::accumulate(out_dims_array, out_dims_array + max_dim, + 1, std::multiplies()); + int x_index, y_index; + for (int out_index = 0; out_index < out_size; ++out_index) { + x_index = GetElementwiseIndex(x_dims_array, max_dim, index_array.data()); + y_index = GetElementwiseIndex(y_dims_array, max_dim, index_array.data()); + if (is_xsize_larger) { + out_data[out_index] = func(x_data[x_index], y_data[y_index]); + } else { + out_data[out_index] = func(y_data[y_index], x_data[x_index]); + } + + UpdateElementwiseIndexArray(out_dims_array, max_dim, index_array.data()); + } +} + +template +void CommonElementwiseBroadcastForward(const FDTensor &x, const FDTensor &y, + FDTensor *z, + const std::vector &x_dims, + const std::vector &y_dims, + Functor func, int axis, + const bool is_xsize_larger = true) { + int x_dims_size = x_dims.size(); + int y_dims_size = y_dims.size(); + int max_dim = (std::max)(x_dims_size, y_dims_size); + axis = (axis == -1 ? std::abs(x_dims_size - y_dims_size) : axis); + FDASSERT(axis >= 0, + "Axis should be great than or equal to 0, but received axis is %d.", + axis); + FDASSERT(axis < max_dim, + "Axis should be less than %d, but received axis is %d.", max_dim, + axis); + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), + y_dims_array.data(), out_dims_array.data(), max_dim, + axis); + FDTensor tmp; + tmp.Allocate(out_dims_array, TypeToDataType::dtype); + CommonForwardBroadcastCPU( + x, y, &tmp, x_dims_array.data(), y_dims_array.data(), + out_dims_array.data(), max_dim, func, is_xsize_larger); + *z = std::move(tmp); +} + +template +void ElementwiseCompute(const FDTensor &x, const FDTensor &y, int axis, + Functor func, FDTensor *z) { + auto x_dims = x.Shape(); + auto y_dims = y.Shape(); + bool is_xsize_larger = true; + int max_dim = x_dims.size(); + if (x_dims.size() < y_dims.size()) { + is_xsize_larger = false; + max_dim = y_dims.size(); + } + + int diff_size = x_dims.size() - y_dims.size(); + axis = (axis == -1 ? std::abs(diff_size) : axis); + FDASSERT(axis >= 0, + "Axis should be great than or equal to 0, but received axis is %d.", + axis); + FDASSERT(axis < max_dim, + "Axis should be less than %d, but received axis is %d.", max_dim, + axis); + + int pre, n, post, is_run_common_broadcast, axis_trim = 0; + if (is_xsize_larger) { + auto y_dims_trimed = TrimTrailingSingularDims(y_dims); + axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis; + GetMidDims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post, + &is_run_common_broadcast); + } else { + auto x_dims_trimed = TrimTrailingSingularDims(x_dims); + axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis; + GetMidDims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post, + &is_run_common_broadcast); + } + // special case for common implementation. + // case 1: x=[2,3,1,5], y=[2,1,4,1] + // case 2: x=[2,3,4], y=[1,1,4] + CommonElementwiseBroadcastForward( + x, y, z, x_dims, y_dims, func, axis, is_xsize_larger); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/elementwise_functor.h b/libs/ultrainfer/ultrainfer/function/elementwise_functor.h new file mode 100755 index 0000000000..9058b28d2b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/elementwise_functor.h @@ -0,0 +1,131 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/function/elementwise.h" +#include "ultrainfer/function/elementwise_base.h" +#include + +namespace ultrainfer { +namespace function { + +template struct SameDimsElementwiseCompute { + void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) { + z->Allocate(x.Shape(), x.Dtype()); + Functor()(x, y, z); + } +}; + +template struct SameDimsAddFunctor { + void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) { + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x + eigen_y; + } +}; + +template struct SameDimsSubtractFunctor { + void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) { + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x - eigen_y; + } +}; + +template struct SameDimsMultiplyFunctor { + void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) { + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x * eigen_y; + } +}; + +template struct SameDimsDivideFunctor { + void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) { + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x / eigen_y; + } +}; + +// Add +template struct AddFunctor { + inline T operator()(const T a, const T b) const { return a + b; } +}; +template struct InverseAddFunctor { + inline T operator()(const T a, const T b) const { return b + a; } +}; + +// Subtract +template struct SubtractFunctor { + inline T operator()(const T a, const T b) const { return a - b; } +}; +template struct InverseSubtractFunctor { + inline T operator()(const T a, const T b) const { return b - a; } +}; + +// Multiply +template struct MultiplyFunctor { + inline T operator()(const T a, const T b) const { return a * b; } +}; +template <> struct MultiplyFunctor { + inline bool operator()(const bool a, const bool b) const { return a && b; } +}; +template struct InverseMultiplyFunctor { + inline T operator()(const T a, const T b) const { return b * a; } +}; +template <> struct InverseMultiplyFunctor { + inline bool operator()(const bool a, const bool b) const { return b && a; } +}; + +// Divide +#define DIV_ERROR_INFO \ + "InvalidArgumentError: Integer division by zero encountered in " \ + "(floor) divide. Please check the input value." + +template struct DivideFunctor { + inline T operator()(const T a, const T b) const { return a / b; } +}; + +template +struct DivideFunctor< + T, typename std::enable_if::value>::type> { + inline T operator()(const T a, const T b) const { + // For int32/int64, need to check whether the divison is zero. + FDASSERT(b != 0, DIV_ERROR_INFO); + return a / b; + } +}; + +template struct InverseDivideFunctor { + inline T operator()(const T a, const T b) const { return b / a; } +}; + +// Maximum +template struct MaximumFunctor { + inline T operator()(const T a, const T b) const { return a > b ? a : b; } +}; + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/full.cc b/libs/ultrainfer/ultrainfer/function/full.cc new file mode 100755 index 0000000000..65d0860612 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/full.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/full.h" +#include "ultrainfer/function/eigen.h" +#include + +namespace ultrainfer { +namespace function { + +template void FullValue(FDTensor *tensor, const Scalar &val) { + auto t = EigenVector::Flatten(*tensor); + auto &place = *EigenDeviceWrapper::GetInstance()->GetDevice(); + t.device(place) = t.constant(val.to()); +} + +void Full(const Scalar &value, const std::vector &shape, FDTensor *out, + FDDataType dtype) { + FD_VISIT_ALL_TYPES(dtype, "Full", ([&] { + out->Allocate(shape, dtype); + FullValue(out, value); + })); +} + +void FullLike(const FDTensor &x, const Scalar &value, FDTensor *out, + FDDataType dtype) { + Full(value, x.Shape(), out, dtype); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/full.h b/libs/ultrainfer/ultrainfer/function/full.h new file mode 100755 index 0000000000..42deb1822f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/full.h @@ -0,0 +1,44 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_scalar.h" +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Fill the value to tensor + @param value The value to be filled in tensor + @param shape The shape of output tensor. + @param out The output tensor which stores the result. + @param dtype The data type of output tensor. Default to float32 +*/ +ULTRAINFER_DECL void Full(const Scalar &value, + const std::vector &shape, FDTensor *out, + FDDataType dtype = FDDataType::FP32); + +/** Fill the value to tensor + @param x The input tensor. + @param value The value to be filled in tensor + @param out The output tensor which stores the result. + @param dtype The data type of output tensor. Default to float32 +*/ +ULTRAINFER_DECL void FullLike(const FDTensor &x, const Scalar &value, + FDTensor *out, + FDDataType dtype = FDDataType::FP32); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/functions.h b/libs/ultrainfer/ultrainfer/function/functions.h new file mode 100755 index 0000000000..3efa03e0c0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/functions.h @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/function/cast.h" +#include "ultrainfer/function/clip.h" +#include "ultrainfer/function/concat.h" +#include "ultrainfer/function/cumprod.h" +#include "ultrainfer/function/elementwise.h" +#include "ultrainfer/function/full.h" +#include "ultrainfer/function/gather_scatter_along_axis.h" +#include "ultrainfer/function/gaussian_random.h" +#include "ultrainfer/function/isfinite.h" +#include "ultrainfer/function/linspace.h" +#include "ultrainfer/function/math.h" +#include "ultrainfer/function/pad.h" +#include "ultrainfer/function/quantile.h" +#include "ultrainfer/function/reduce.h" +#include "ultrainfer/function/slice.h" +#include "ultrainfer/function/softmax.h" +#include "ultrainfer/function/sort.h" +#include "ultrainfer/function/split.h" +#include "ultrainfer/function/tile.h" +#include "ultrainfer/function/transpose.h" diff --git a/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc new file mode 100755 index 0000000000..4cbb64662e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/gather_scatter_along_axis.h" +#include "ultrainfer/function/tile.h" + +namespace ultrainfer { +namespace function { + +class TensorAssign { +public: + template + void operator()(tensor_t *self_data, tensor_t *src_data) const { + *self_data = *src_data; + } +}; +static TensorAssign tensor_assign; + +template +struct GatherScatterFunctor { + template + void operator()(const FDTensor &x, int axis, const FDTensor &index, + FDTensor *result, const func_t &reduce_op) { + if (index.Numel() == 0) { + return; + } + result->Allocate(index.Shape(), x.Dtype()); + const T *x_data = reinterpret_cast(x.Data()); + const index_t *index_data = reinterpret_cast(index.Data()); + T *result_data = reinterpret_cast(result->Data()); + + int64_t x_size = x.Numel(); + int64_t index_size = index.Numel(); + int64_t result_size = result->Numel(); + auto x_dims = x.Shape(); + auto index_dims = index.Shape(); + auto result_dims = result->Shape(); + if (x_size == 0 || result_size == 0 || index_size == 0) { + FDASSERT(false, "zero size input found, self_size, result_size, " + "index_size cannot be 0"); + return; + } + int select_dim_size = index_dims[axis]; + // index matrix has different shape with self matrix or src matrix. + int replaced_select_dim_size = + is_scatter_like ? result_dims[axis] : x_dims[axis]; + int64_t inner_dim_size = 1; + int64_t outer_dim_size = 1; + for (int64_t i = 0; i < axis; ++i) { + inner_dim_size *= index_dims[i]; + } + + for (int i = axis + 1; i < index_dims.size(); i++) { + outer_dim_size *= index_dims[i]; + } + int64_t index_idx = 0; + int64_t self_idx, src_idx; + // N layer loop squeezed into 3 layers loop + for (int64_t i = 0; i < inner_dim_size; i++) { + for (int64_t j = 0; j < select_dim_size; j++) { + for (int64_t k = 0; k < outer_dim_size; k++) { + int64_t index = index_data[index_idx]; + // This index might out of bound of index matrix's index, so here + // multiply the replaced_select_dim_size. + int64_t replace_index = k + index * outer_dim_size + + i * outer_dim_size * replaced_select_dim_size; + + self_idx = is_scatter_like ? replace_index : index_idx; + src_idx = is_scatter_like ? index_idx : replace_index; + + reduce_op((T *)(result_data + self_idx), // NOLINT + (T *)(x_data + src_idx)); // NOLINT + + index_idx++; + } + } + } + } +}; + +template struct GatherFunctor { + void operator()(const FDTensor &x, int axis, const FDTensor &index, + FDTensor *result) { + FD_VISIT_INT_TYPES(index.Dtype(), "GatherFunctor", [&]() { + auto x_shape = x.Shape(); + auto index_shape = index.Shape(); + std::vector repeat_times(x_shape.size(), 1); + for (int i = 0; i < x_shape.size(); ++i) { + repeat_times[i] = x_shape[i] / index_shape[i]; + } + repeat_times[axis] = 1; + FDTensor gs_index; + Tile(index, repeat_times, &gs_index); + GatherScatterFunctor()( + x, axis, gs_index, result, tensor_assign); + }); + } +}; + +void GatherAlongAxis(const FDTensor &x, const FDTensor &index, FDTensor *result, + int axis) { + int rank = x.Shape().size(); + FDASSERT(axis >= -rank && axis < rank, + "axis should be in range [-%d, %d - 1].", rank, rank - 1); + if (axis < 0) { + axis += rank; + } + FD_VISIT_ALL_TYPES(x.Dtype(), "GatherAlongAxis", [&]() { + GatherFunctor()(x, axis, index, result); + }); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h new file mode 100755 index 0000000000..4ff44cc501 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Output is obtained by gathering entries of axis of x indexed by index and + * concatenate them together. + @param x The input tensor. + @param index The index of a tensor to gather. + @param out The output tensor which stores the result. + @param axis Axis which will be gathered. +*/ +ULTRAINFER_DECL void GatherAlongAxis(const FDTensor &x, const FDTensor &index, + FDTensor *result, int axis); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/gaussian_random.cc b/libs/ultrainfer/ultrainfer/function/gaussian_random.cc new file mode 100755 index 0000000000..c0a01de1d1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/gaussian_random.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/gaussian_random.h" +#include +#include +#include + +namespace ultrainfer { +namespace function { + +template +void GaussianRandomKernel(const std::vector &shape, float mean, + float std, int seed, FDTensor *out) { + std::normal_distribution dist(mean, std); + + out->Allocate(shape, TypeToDataType::dtype); + int64_t size = out->Numel(); + T *data = reinterpret_cast(out->Data()); + std::mt19937_64 engine; + engine.seed(seed); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } +} + +void GaussianRandom(const std::vector &shape, FDTensor *out, + FDDataType dtype, float mean, float std, int seed) { + FD_VISIT_FLOAT_TYPES(dtype, "GaussianRandomKernel", [&]() { + GaussianRandomKernel(shape, mean, std, seed, out); + }); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/gaussian_random.h b/libs/ultrainfer/ultrainfer/function/gaussian_random.h new file mode 100755 index 0000000000..53f2711a7a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/gaussian_random.h @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Output is obtained by gathering entries of axis of x indexed by index and + * concatenate them together. + @param shape The output tensor shape. + @param out the output tensor. + @param mean mean value of gaussian random + @param std standard value of gaussian random + @param seed The seed of random generator. + @param dtype The data type of the output Tensor. +*/ +void GaussianRandom(const std::vector &shape, FDTensor *out, + FDDataType dtype = FDDataType::FP32, float mean = 0.0f, + float std = 1.0f, int seed = 0); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/isfinite.cc b/libs/ultrainfer/ultrainfer/function/isfinite.cc new file mode 100755 index 0000000000..b46b5b0a97 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/isfinite.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/isfinite.h" +#include "ultrainfer/core/float16.h" +#include +#include + +namespace ultrainfer { +namespace function { + +template struct IsNanFunctor { + OutT operator()(const T &a) const { return static_cast(std::isnan(a)); } +}; + +template +struct IsNanFunctor::value>::type> { + OutT operator()(const T &a) const { return static_cast(false); } +}; + +template struct IsNanFunctor { + OutT operator()(const ultrainfer::float16 &a) const { + return static_cast(ultrainfer::isnan(a)); + } +}; + +template struct IsInfFunctor { + OutT operator()(const T &a) const { return static_cast(std::isinf(a)); } +}; + +template +struct IsInfFunctor::value>::type> { + OutT operator()(const T &a) const { return static_cast(false); } +}; + +template struct IsInfFunctor { + OutT operator()(const ultrainfer::float16 &a) const { + return static_cast(ultrainfer::isinf(a)); + } +}; + +template +struct IsFiniteFunctor { + OutT operator()(const T &a) const { + return static_cast(std::isfinite(a)); + } +}; + +template +struct IsFiniteFunctor< + T, OutT, typename std::enable_if::value>::type> { + OutT operator()(const T &a) const { return static_cast(true); } +}; + +template +struct IsFiniteFunctor { + OutT operator()(const ultrainfer::float16 &a) const { + return static_cast(ultrainfer::isfinite(a)); + } +}; + +#define DEFINE_ISFINITE_KERNEL(isfinite_kernel, functor) \ + template \ + void isfinite_kernel(const FDTensor &x, FDTensor *out, FDDataType dtype) { \ + FD_VISIT_ALL_TYPES(dtype, #isfinite_kernel, ([&] { \ + out->Allocate(x.Shape(), dtype); \ + functor unary_func; \ + data_t *out_ptr = \ + reinterpret_cast(out->Data()); \ + const T *input_ptr = \ + reinterpret_cast(x.Data()); \ + std::transform(input_ptr, input_ptr + x.Numel(), \ + out_ptr, unary_func); \ + })); \ + } + +DEFINE_ISFINITE_KERNEL(IsNanKernel, IsNanFunctor) +DEFINE_ISFINITE_KERNEL(IsInfKernel, IsInfFunctor) +DEFINE_ISFINITE_KERNEL(IsFiniteKernel, IsFiniteFunctor) +#undef DEFINE_ISFINITE_KERNEL + +void IsNan(const FDTensor &x, FDTensor *out, FDDataType dtype) { + FD_VISIT_FLOAT_TYPES(x.dtype, "IsNanKernel", + ([&] { IsNanKernel(x, out, dtype); })); +} + +void IsInf(const FDTensor &x, FDTensor *out, FDDataType dtype) { + FD_VISIT_FLOAT_TYPES(x.dtype, "IsInfKernel", + ([&] { IsInfKernel(x, out, dtype); })); +} + +void IsFinite(const FDTensor &x, FDTensor *out, FDDataType dtype) { + FD_VISIT_FLOAT_TYPES(x.dtype, "IsFiniteKernel", + ([&] { IsFiniteKernel(x, out, dtype); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/isfinite.h b/libs/ultrainfer/ultrainfer/function/isfinite.h new file mode 100755 index 0000000000..466cd60f9c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/isfinite.h @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Return whether every element of input tensor is NaN or not. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dtype The output data type +*/ +ULTRAINFER_DECL void IsNan(const FDTensor &x, FDTensor *out, + FDDataType dtype = FDDataType::BOOL); + +/** Return whether every element of input tensor is Inf or not. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dtype The output data type +*/ +ULTRAINFER_DECL void IsInf(const FDTensor &x, FDTensor *out, + FDDataType dtype = FDDataType::BOOL); + +/** Return whether every element of input tensor is finite or not. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dtype The output data type +*/ +ULTRAINFER_DECL void IsFinite(const FDTensor &x, FDTensor *out, + FDDataType dtype = FDDataType::BOOL); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/linspace.cc b/libs/ultrainfer/ultrainfer/function/linspace.cc new file mode 100755 index 0000000000..030c525e41 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/linspace.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/linspace.h" + +namespace ultrainfer { +namespace function { + +template +void LinspaceKernel(double start, double end, int num, FDTensor *out) { + FDASSERT( + num > 0, + "The num of linspace op should be larger than 0, but received num is %d", + num); + out->Allocate({num}, TypeToDataType::dtype); + T *out_data = reinterpret_cast(out->Data()); + if (num > 1) { + // step should be of double type for all types + double step = (static_cast(end - start)) / (num - 1); + int half_num = num / 2; + for (int i = 0; i < num; ++i) { + if (i < half_num) { + out_data[i] = static_cast(start + step * i); + } else { + out_data[i] = static_cast(end - step * (num - i - 1)); + } + } + } else { + out_data[0] = static_cast(start); + } +} + +void Linspace(double start, double end, int num, FDTensor *out, + FDDataType dtype) { + FD_VISIT_INT_FLOAT_TYPES(dtype, "LinspaceKernel", ([&] { + LinspaceKernel(start, end, num, out); + })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/linspace.h b/libs/ultrainfer/ultrainfer/function/linspace.h new file mode 100755 index 0000000000..94e7d330d2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/linspace.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Return fixed number of evenly spaced values within a given interval. + @param start The input start is start variable of range. + @param end The input stop is start variable of range. + @param num The input num is given num of the sequence. + @param out The output tensor which stores the result. + @param dtype The data type of output tensor, default to float32. +*/ +ULTRAINFER_DECL void Linspace(double start, double end, int num, FDTensor *out, + FDDataType dtype = FDDataType::FP32); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/math.cc b/libs/ultrainfer/ultrainfer/function/math.cc new file mode 100755 index 0000000000..c9290ba5ce --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/math.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/math.h" +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/function/math_functor.h" + +namespace ultrainfer { +namespace function { + +#define DEFINE_ACTIVATION_KERNEL(name, functor_class) \ + template void name##Kernel(const FDTensor &x, FDTensor *out) { \ + functor_class functor; \ + ActivationImpl>(x, out, functor); \ + } + +template +void ActivationImpl(const FDTensor &X, FDTensor *Out, const Functor &functor) { + FDASSERT(Out != nullptr, "Output Out should not be nullptr"); + FDTensor out_tmp; + auto x = EigenVector::Flatten(X); + out_tmp.Allocate(X.Shape(), X.Dtype()); + auto out = EigenVector::Flatten(out_tmp); + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + functor(dev, x, out); + *Out = std::move(out_tmp); +} + +DEFINE_ACTIVATION_KERNEL(Sqrt, SqrtFunctor) +DEFINE_ACTIVATION_KERNEL(Log, LogFunctor) +DEFINE_ACTIVATION_KERNEL(Round, RoundFunctor) +DEFINE_ACTIVATION_KERNEL(Exp, ExpFunctor) +DEFINE_ACTIVATION_KERNEL(Abs, AbsFunctor) +DEFINE_ACTIVATION_KERNEL(Ceil, CeilFunctor) +DEFINE_ACTIVATION_KERNEL(Floor, FloorFunctor) + +void Sqrt(const FDTensor &x, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "SqrtKernel", + ([&] { SqrtKernel(x, out); })); +} + +void Log(const FDTensor &x, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "LogKernel", + ([&] { LogKernel(x, out); })); +} + +void Round(const FDTensor &x, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "RoundKernel", + ([&] { RoundKernel(x, out); })); +} + +void Exp(const FDTensor &x, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "ExpKernel", + ([&] { ExpKernel(x, out); })); +} + +void Abs(const FDTensor &x, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "AbsKernel", + ([&] { AbsKernel(x, out); })); +} + +void Ceil(const FDTensor &x, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "CeilKernel", + ([&] { CeilKernel(x, out); })); +} + +void Floor(const FDTensor &x, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "FloorKernel", + ([&] { FloorKernel(x, out); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/math.h b/libs/ultrainfer/ultrainfer/function/math.h new file mode 100755 index 0000000000..fe53d3487b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/math.h @@ -0,0 +1,70 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Calculates the sqrt of the given input Tensor, element-wise. Only for float + type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Sqrt(const FDTensor &x, FDTensor *out); + +/** Calculates the natural log of the given input Tensor, element-wise. Only for + float type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Log(const FDTensor &x, FDTensor *out); + +/** Rounds the values in the input to the nearest integer value, element-wise. + Only for float type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Round(const FDTensor &x, FDTensor *out); + +/** Computes exp of x element-wise with a natural number e as the base, + element-wise. Only for float type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Exp(const FDTensor &x, FDTensor *out); + +/** This operator is used to perform elementwise abs for input X. Only for float + type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Abs(const FDTensor &x, FDTensor *out); + +/** Computes ceil of x element-wise. Only for float type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Ceil(const FDTensor &x, FDTensor *out); + +/** Computes floor of x element-wise. Only for float type FDTensor + @param x The input tensor. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Floor(const FDTensor &x, FDTensor *out); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/math_functor.h b/libs/ultrainfer/ultrainfer/function/math_functor.h new file mode 100755 index 0000000000..083007d012 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/math_functor.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/function/eigen.h" + +namespace ultrainfer { +namespace function { + +// log(x) = natural logarithm of x +template struct LogFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.log(); + } +}; + +// exp functor +// exp(x) = e^x +template struct ExpFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.exp(); + } +}; + +// round(x) = [x] +template struct RoundFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.round(); + } +}; + +// sqrt(x) = x^(1/2) +template struct SqrtFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.sqrt(); + } +}; + +// abs(x) = x if x > 0 else -x +template struct AbsFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = + x.unaryExpr([](T v) { return v > static_cast(0) ? v : -v; }); + } +}; + +// ceil(x) = ceiling(x) +template struct CeilFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.ceil(); + } +}; + +// floor(x) = flooring(x) +template struct FloorFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.floor(); + } +}; + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/pad.cc b/libs/ultrainfer/ultrainfer/function/pad.cc new file mode 100755 index 0000000000..86bf452c28 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/pad.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/pad.h" + +#include + +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace function { +template struct PadEigen { + using Array = std::array, Rank>; + using Array32Bit = std::array, Rank>; + using InType = Eigen::TensorMap< + Eigen::Tensor>; + using InType32BitIndex = + Eigen::TensorMap, + Eigen::Aligned>; + using OutType = Eigen::TensorMap< + Eigen::Tensor>; + using OutType32BitIndex = + Eigen::TensorMap, + Eigen::Aligned>; + + static void Eval(const Eigen::DefaultDevice &dev, OutType out, + const InType &in, const Array &padding, const T value) { + out.device(dev) = in.pad(padding, value); + } + + static void Eval32(const Eigen::DefaultDevice &dev, OutType32BitIndex out, + const InType32BitIndex &in, const Array32Bit &padding, + const T value) { + out.device(dev) = in.pad(padding, value); + } +}; + +template +void PadFunction(const std::vector &pads, const FDTensor &src, T pad_value, + FDTensor *out) { + std::array, D> paddings; + + for (size_t i = 0; i < paddings.size(); ++i) { + paddings[i].first = pads[i * 2]; + paddings[i].second = pads[i * 2 + 1]; + } + + auto src_tensor = EigenTensor::From(src); + auto out_tensor = EigenTensor::From(*out); + + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + PadEigen::Eval(dev, out_tensor, src_tensor, paddings, pad_value); +} + +template +void PaddingFunctor(int rank, const std::vector &pads, T pad_value, + const FDTensor &src, FDTensor *out) { + switch (rank) { + case 1: + PadFunction(pads, src, pad_value, out); + break; + case 2: + PadFunction(pads, src, pad_value, out); + break; + case 3: + PadFunction(pads, src, pad_value, out); + break; + case 4: + PadFunction(pads, src, pad_value, out); + break; + case 5: + PadFunction(pads, src, pad_value, out); + break; + case 6: + PadFunction(pads, src, pad_value, out); + break; + default: + FDASSERT( + false, + "Pad only support tensors with no more than 6 dimensions currently."); + } +} + +template +void PadKernel(const FDTensor &x, const std::vector &paddings, + const T &pad_value, FDTensor *out) { + std::vector new_shape(x.shape.size()); + for (size_t i = 0; i < x.shape.size(); ++i) { + new_shape[i] = x.shape[i] + paddings[2 * i] + paddings[2 * i + 1]; + } + out->Allocate(new_shape, x.dtype); + PaddingFunctor(x.shape.size(), paddings, pad_value, x, out); +} + +void Pad(const FDTensor &x, FDTensor *out, const std::vector &pads, + float value) { + FDASSERT(pads.size() == x.shape.size() * 2, + "Size of pads:%zu must be 2 times of rank:%zu.", pads.size(), + x.shape.size()); + FDTensor out_tmp; + FD_VISIT_ALL_TYPES(x.dtype, "PadKernel", + ([&] { PadKernel(x, pads, value, &out_tmp); })); + *out = std::move(out_tmp); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/pad.h b/libs/ultrainfer/ultrainfer/function/pad.h new file mode 100755 index 0000000000..2b94c5587a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/pad.h @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { +/** Excute the pad operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param pads The size of padding for each dimension, for 3-D tensor, the pads + should be [1d-left, 1d-right, 2d-left, 2d-right, 3d-left, 3d-right] + @param pad_value The value which will fill into out tensor +*/ +ULTRAINFER_DECL void Pad(const FDTensor &x, FDTensor *out, + const std::vector &pads, float pad_value = 0); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/quantile.cc b/libs/ultrainfer/ultrainfer/function/quantile.cc new file mode 100755 index 0000000000..54dfc15544 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/quantile.cc @@ -0,0 +1,130 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/quantile.h" +#include "ultrainfer/core/fd_scalar.h" +#include "ultrainfer/function/cast.h" +#include "ultrainfer/function/concat.h" +#include "ultrainfer/function/elementwise.h" +#include "ultrainfer/function/gather_scatter_along_axis.h" +#include "ultrainfer/function/isfinite.h" +#include "ultrainfer/function/math.h" +#include "ultrainfer/function/reduce.h" +#include "ultrainfer/function/sort.h" +#include "ultrainfer/function/transpose.h" +#include +#include +#include + +namespace ultrainfer { +namespace function { + +template +void QuantileKernel(const FDTensor &x, const std::vector &q, + const std::vector &axis, FDTensor *out) { + FDASSERT(q.size() > 0, "q should not be empty."); + FDASSERT(axis.size() > 0, "axis should not be empty."); + std::vector axis_src; + std::vector out_shape = x.Shape(); + int64_t rank = x.Shape().size(); + for (auto axis_single : axis) { + FDASSERT(axis_single >= -rank && axis_single < rank, + "The axis is expected to be in range of [%d, %d), but got %d", + -rank, rank, axis_single); + if (axis_single < 0) { + axis_single += rank; + } + axis_src.push_back(axis_single); + out_shape[axis_single] = 1; + } + std::vector axis_dst; + for (int64_t i = 0; i < rank; ++i) { + if (std::find(axis_src.begin(), axis_src.end(), i) == axis_src.end()) { + axis_dst.push_back(i); + } + } + axis_dst.insert(axis_dst.end(), axis_src.begin(), axis_src.end()); + FDTensor y; + Transpose(x, &y, axis_dst); + std::vector y_shape(rank - axis_src.size(), 0); + y_shape.push_back(-1); + y.Reshape({y_shape}); + + int64_t target_axis = rank - 1; + FDTensor mask, valid_counts, mask_any; + IsNan(y, &mask); + Any(mask, &mask_any, {target_axis}, true); + bool *mask_data = reinterpret_cast(mask.Data()); + std::transform(mask_data, mask_data + mask.Numel(), mask_data, + [](const bool &val) { return !val; }); + Cast(mask_any, &mask_any, FDDataType::FP64); + Cast(mask, &mask, FDDataType::FP64); + Sum(mask, &valid_counts, {target_axis}, true); + + FDTensor one_tensor(Scalar(static_cast(1.0))); + + std::vector indices; + FDTensor last_index(Scalar(static_cast(x.Shape()[target_axis]))); + for (auto q_num : q) { + FDASSERT(q_num >= 0 && q_num <= 1, "q should be in range [0, 1]"); + FDTensor q_tensor(static_cast(q_num)); + FDTensor index = q_tensor * (valid_counts - one_tensor); + index = mask_any * last_index + (one_tensor - mask_any) * index; + indices.push_back(index); + } + + std::vector outputs; + FDTensor sorted_tensor, sorted_indices_tensor; + Sort(y, &sorted_tensor, &sorted_indices_tensor, target_axis); + Cast(sorted_tensor, &sorted_tensor, FDDataType::FP64); + + FDTensor indices_below, indices_upper; + for (auto &&index : indices) { + Floor(index, &indices_below); + Ceil(index, &indices_upper); + Cast(indices_below, &indices_below, FDDataType::INT32); + Cast(indices_upper, &indices_upper, FDDataType::INT32); + FDTensor tensor_below, tensor_upper; + GatherAlongAxis(sorted_tensor, indices_below, &tensor_below, target_axis); + GatherAlongAxis(sorted_tensor, indices_upper, &tensor_upper, target_axis); + // Need to cast to FP64 to compute with index and tensor_upper + Cast(indices_below, &indices_below, FDDataType::FP64); + + FDTensor weight = index - indices_below; + FDTensor out = tensor_below + weight * (tensor_upper - tensor_below); + out.Squeeze(target_axis); + if (out.Dtype() != x.Dtype()) { + Cast(out, &out, x.Dtype()); + } + outputs.push_back(std::move(out)); + } + if (outputs.size() > 1) { + // Execute stack operation + for (auto &output : outputs) { + output.ExpandDim(0); + } + Concat(outputs, out, 0); + } else { + *out = std::move(outputs[0]); + } +} + +void Quantile(const FDTensor &x, const std::vector &q, + const std::vector &axis, FDTensor *out) { + FD_VISIT_FLOAT_TYPES(x.dtype, "QuantileKernel", + ([&] { QuantileKernel(x, q, axis, out); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/quantile.h b/libs/ultrainfer/ultrainfer/function/quantile.h new file mode 100755 index 0000000000..a678032555 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/quantile.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Compute the quantile of the input along the specified axis. If any values + ** in a reduced row are NaN, then the quantiles for that reduction will be NaN. + @param x The input tensor. + @param q The q for calculate quantile, which should be in range [0, 1]. + @param axis The axis along which to calculate quantile. axis should be int + or list of int. + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Quantile(const FDTensor &x, const std::vector &q, + const std::vector &axis, FDTensor *out); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/reduce.cc b/libs/ultrainfer/ultrainfer/function/reduce.cc new file mode 100755 index 0000000000..25247f86dd --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/reduce.cc @@ -0,0 +1,414 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/reduce.h" + +#include +#include + +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/function/reduce_functor.h" +#include "ultrainfer/function/transpose.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace function { +template +void ReduceFunctor(const FDTensor &input, FDTensor *output, + const std::vector &dims, bool keep_dim) { + auto x = EigenTensor::From(input); + auto x_rank = static_cast(x.dimensions().size()); + auto reduce_dim = Eigen::array(); + std::vector dims_ref = dims; + + auto out_dims = input.shape; + for (size_t i = 0; i < dims_ref.size(); ++i) { + if (dims_ref[i] < 0) + dims_ref[i] = x_rank + dims_ref[i]; + reduce_dim[i] = dims_ref[i]; + out_dims[dims_ref[i]] = 1; + } + auto origin_output_dims = out_dims; + output->Allocate(origin_output_dims, TypeToDataType::dtype); + // construct the squeezed output tensor + if (x_rank > 1) { + const int kDelFlag = -2; + for (size_t i = 0; i < dims_ref.size(); ++i) { + out_dims[dims_ref[i]] = kDelFlag; + } + out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag), + out_dims.end()); + } + + auto &place = *EigenDeviceWrapper::GetInstance()->GetDevice(); + Functor functor; + if (D == 1) { + auto out = EigenScalar::From(*output); + functor(place, &x, &out, reduce_dim); + } else { + auto out = EigenTensor::From(*output, out_dims); + functor(place, &x, &out, reduce_dim); + if (!keep_dim) { + output->shape = std::move(out_dims); + } + } +} + +#define HANDLE_REDUCE_DIM(NDIM, RDIM) \ + if (ndim == NDIM && rdim == RDIM) { \ + ReduceFunctor(input, output, dims, keep_dim); \ + } + +inline void GetShuffledDim(const std::vector &src_dims, + std::vector *dst_dims, + const std::vector &reduced_dims, + std::vector *perm_axis) { + // check if it's a reduced dim + std::vector src_dims_check(src_dims.size(), false); + size_t src_size = src_dims.size(); + size_t reduce_size = reduced_dims.size(); + std::vector regular_reduced_dims = reduced_dims; + for (size_t i = 0; i < regular_reduced_dims.size(); i++) { + if (regular_reduced_dims[i] < 0) { + regular_reduced_dims[i] = src_size + regular_reduced_dims[i]; + } + } + + for (size_t i = 0; i < reduce_size; ++i) { + dst_dims->at(src_size - reduce_size + i) = + src_dims[regular_reduced_dims[i]]; + (*perm_axis)[src_size - reduce_size + i] = regular_reduced_dims[i]; + src_dims_check[regular_reduced_dims[i]] = true; + } + + size_t offset = 0; + for (size_t i = 0; i < src_dims_check.size(); ++i) { + bool is_reduced = src_dims_check[i]; + if (!is_reduced) { + (*perm_axis)[offset] = i; + dst_dims->at(offset++) = src_dims[i]; + } + } +} + +template +void GetShuffledInput(const FDTensor &input, FDTensor *shuffled_input, + const std::vector &dims) { + auto shuffled_dims = input.shape; + std::vector perm_axis(input.shape.size()); + GetShuffledDim(input.shape, &shuffled_dims, dims, &perm_axis); + + shuffled_input->Allocate(shuffled_dims, input.dtype); + Transpose(input, shuffled_input, perm_axis); +} + +//////////////// HandleLargeDim +template +void HandleLargeDim(const FDTensor &input, FDTensor *output, + const std::vector &dims, bool keep_dim) { + auto out_dims = input.shape; + std::vector dims_ref = dims; + auto x_rank = input.shape.size(); + for (size_t i = 0; i < dims_ref.size(); ++i) { + if (dims_ref[i] < 0) + dims_ref[i] = x_rank + dims_ref[i]; + out_dims[dims_ref[i]] = 1; + } + if (!keep_dim) { + const int kDelFlag = -2; + for (size_t i = 0; i < dims_ref.size(); ++i) { + out_dims[dims_ref[i]] = kDelFlag; + } + out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag), + out_dims.end()); + } + output->Allocate(out_dims, TypeToDataType::dtype); + // shuffle the reduced dim to the end + FDTensor shuffled_input; + GetShuffledInput(input, &shuffled_input, dims); + + // transpose to 2D tensor whose shape is {unreduced, reduced}. + const int64_t unreduced = output->Numel(); + const int64_t reduced = shuffled_input.Numel() / unreduced; + shuffled_input.Allocate({unreduced, reduced}, TypeToDataType::dtype); + + output->shape = {unreduced}; + ReduceFunctor(shuffled_input, output, {1}, keep_dim); + output->shape = out_dims; +} + +////////////// ReduceKernel + +template +void ReduceKernelImpl(const FDTensor &input, FDTensor *output, + const std::vector &dims, bool keep_dim, + bool reduce_all) { + output->Allocate({1}, TypeToDataType::dtype); + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + if (reduce_all) { + // Flatten and reduce 1-D tensor + auto x = EigenVector::Flatten(input); + auto out = EigenScalar::From(*output); + auto reduce_dim = Eigen::array({{0}}); + + Functor functor; + functor(dev, &x, &out, reduce_dim); + } else { + int ndim = input.shape.size(); + int rdim = dims.size(); + if (ndim > 4) { + HandleLargeDim(input, output, dims, keep_dim); + } else { + HANDLE_REDUCE_DIM(4, 3); + HANDLE_REDUCE_DIM(4, 2); + HANDLE_REDUCE_DIM(4, 1); + HANDLE_REDUCE_DIM(3, 2); + HANDLE_REDUCE_DIM(3, 1); + HANDLE_REDUCE_DIM(2, 1); + HANDLE_REDUCE_DIM(1, 1); + } + } +} + +template +void BoolReduceKernel(const FDTensor &input, FDTensor *output, + const std::vector &dims, bool keep_dim, + bool reduce_all) { + // The dims has full dim, set the reduce_all is True + const auto &input_dim_size = input.shape.size(); + std::set dims_set(dims.begin(), dims.end()); + bool full_dim = true; + for (auto i = 0; i < input_dim_size; i++) { + if (dims_set.find(i) == dims_set.end()) { + full_dim = false; + break; + } + } + reduce_all = (reduce_all || full_dim); + + ReduceKernelImpl(input, output, dims, keep_dim, reduce_all); +} + +template +void Reduce(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + // If the dims has full dim, set the reduce_all is True + const int &input_dim_size = x.shape.size(); + std::set dims_set(dims.begin(), dims.end()); + bool full_dim = true; + for (int i = 0; i < input_dim_size; ++i) { + if (dims_set.find(i) == dims_set.end() && + dims_set.find(i - input_dim_size) == dims_set.end()) { + full_dim = false; + break; + } + } + reduce_all = (reduce_all || full_dim); + + FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ReduceKernelImpl", ([&] { + ReduceKernelImpl( + x, out, dims, keep_dim, reduce_all); + })); +} + +enum ArgMinMaxType { kArgMin, kArgMax }; + +template +struct ArgMinMaxFunctor {}; + +#define DECLARE_ARG_MIN_MAX_FUNCTOR(eigen_op_type, enum_argminmax_value) \ + template \ + struct ArgMinMaxFunctor { \ + void operator()(const FDTensor &in, FDTensor *out, \ + const std::vector &x_dims, int64_t axis, \ + bool keepdims, bool flatten) { \ + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); \ + auto in_eigen = EigenTensor::From(in, x_dims); \ + if (keepdims) { \ + if (!flatten) { \ + auto out_eigen = EigenTensor::From(*out); \ + out_eigen.device(dev) = \ + in_eigen.eigen_op_type(axis).template cast(); \ + } else { \ + auto out_eigen = EigenScalar::From(*out); \ + out_eigen.device(dev) = \ + in_eigen.eigen_op_type(axis).template cast(); \ + } \ + } else { \ + auto out_eigen = EigenTensor::From(*out); \ + out_eigen.device(dev) = \ + in_eigen.eigen_op_type(axis).template cast(); \ + } \ + } \ + } + +DECLARE_ARG_MIN_MAX_FUNCTOR(argmin, ArgMinMaxType::kArgMin); +DECLARE_ARG_MIN_MAX_FUNCTOR(argmax, ArgMinMaxType::kArgMax); + +template +void ArgMinMaxKernel(const FDTensor &x, FDTensor *out, int64_t axis, + bool keepdims, bool flatten) { + bool new_keepdims = keepdims | flatten; + // if flatten, will construct the new dims for the cacluate + std::vector x_dims; + int new_axis = axis; + if (flatten) { + x_dims = {x.Numel()}; + // if flatten, the axis just as 0 + new_axis = 0; + } else { + x_dims = x.shape; + if (axis < 0) + new_axis = axis + x_dims.size(); + } +#define CALL_ARG_MINMAX_FUNCTOR(rank) \ + ArgMinMaxFunctor functor##rank; \ + functor##rank(x, out, x_dims, new_axis, new_keepdims, flatten) + + switch (x_dims.size()) { + case 1: + CALL_ARG_MINMAX_FUNCTOR(1); + break; + case 2: + CALL_ARG_MINMAX_FUNCTOR(2); + break; + case 3: + CALL_ARG_MINMAX_FUNCTOR(3); + break; + case 4: + CALL_ARG_MINMAX_FUNCTOR(4); + break; + case 5: + CALL_ARG_MINMAX_FUNCTOR(5); + break; + case 6: + CALL_ARG_MINMAX_FUNCTOR(6); + break; + default: + FDASSERT(x_dims.size() <= 6, + "%s operator doesn't supports tensors whose ranks are greater " + "than 6.", + (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")); + break; +#undef CALL_ARG_MINMAX_FUNCTOR + } +} + +template +void ArgMinMax(const FDTensor &x, FDTensor *out, int64_t axis, + FDDataType output_dtype, bool keepdims, bool flatten) { + const auto &x_dims = x.shape; + int64_t x_rank = x_dims.size(); + FDASSERT(axis >= -x_rank, + "'axis'(%lld) must be greater than or equal to -Rank(X)(%lld).", + axis, -x_rank); + FDASSERT(axis < x_rank, + "'axis'(%lld) must be less than or equal to Rank(X)(%lld).", axis, + x_rank); + FDASSERT( + output_dtype == FDDataType::INT32 || FDDataType::INT64 || + FDDataType::UINT8, + "The attribute of dtype in argmin/argmax must be [%s], [%s] or [%s], but " + "received [%s].", + Str(FDDataType::INT32).c_str(), Str(FDDataType::INT64).c_str(), + Str(FDDataType::UINT8).c_str(), Str(output_dtype).c_str()); + if (axis < 0) + axis += x_rank; + if (output_dtype == FDDataType::INT32) { + int64_t all_element_num = 0; + if (flatten) { + all_element_num = x.Numel(); + + } else { + all_element_num = x_dims[axis]; + } + FDASSERT(all_element_num <= (std::numeric_limits::max)(), + "The element num of the argmin/argmax input at axis is " + "%lld, is larger than int32 maximum value:%d, you must " + "set the dtype of argmin/argmax to 'int64'.", + all_element_num, (std::numeric_limits::max)()); + } + std::vector vec; + if (flatten) { + vec.emplace_back(static_cast(1)); + } else { + for (int64_t i = 0; i < axis; i++) + vec.emplace_back(x_dims[i]); + if (keepdims) { + vec.emplace_back(static_cast(1)); + } + for (int64_t i = axis + 1; i < x_rank; i++) + vec.emplace_back(x_dims[i]); + } + out->Allocate(vec, output_dtype); + + FD_VISIT_INT_TYPES(output_dtype, "ArgMinMaxKernel", ([&] { + ArgMinMaxKernel( + x, out, axis, keepdims, flatten); + })); +} + +void Max(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + Reduce(x, out, dims, keep_dim, reduce_all); +} + +void Min(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + Reduce(x, out, dims, keep_dim, reduce_all); +} + +void Sum(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + Reduce(x, out, dims, keep_dim, reduce_all); +} + +void All(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + BoolReduceKernel(x, out, dims, keep_dim, reduce_all); +} + +void Any(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + BoolReduceKernel(x, out, dims, keep_dim, reduce_all); +} + +void Mean(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + Reduce(x, out, dims, keep_dim, reduce_all); +} + +void Prod(const FDTensor &x, FDTensor *out, const std::vector &dims, + bool keep_dim, bool reduce_all) { + Reduce(x, out, dims, keep_dim, reduce_all); +} + +void ArgMax(const FDTensor &x, FDTensor *out, int64_t axis, + FDDataType output_dtype, bool keep_dim, bool flatten) { + FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] { + ArgMinMax( + x, out, axis, output_dtype, keep_dim, flatten); + })); +} + +void ArgMin(const FDTensor &x, FDTensor *out, int64_t axis, + FDDataType output_dtype, bool keep_dim, bool flatten) { + FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] { + ArgMinMax( + x, out, axis, output_dtype, keep_dim, flatten); + })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/reduce.h b/libs/ultrainfer/ultrainfer/function/reduce.h new file mode 100755 index 0000000000..7b0d2cc30a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/reduce.h @@ -0,0 +1,127 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { +/** Excute the maximum operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which will be reduced. + @param keep_dim Whether to keep the reduced dims, default false. + @param reduce_all Whether to reduce all dims, default false. +*/ +ULTRAINFER_DECL void Max(const FDTensor &x, FDTensor *out, + const std::vector &dims, + bool keep_dim = false, bool reduce_all = false); + +/** Excute the minimum operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which will be reduced. + @param keep_dim Whether to keep the reduced dims, default false. + @param reduce_all Whether to reduce all dims, default false. +*/ +ULTRAINFER_DECL void Min(const FDTensor &x, FDTensor *out, + const std::vector &dims, + bool keep_dim = false, bool reduce_all = false); + +/** Excute the sum operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which will be reduced. + @param keep_dim Whether to keep the reduced dims, default false. + @param reduce_all Whether to reduce all dims, default false. +*/ +ULTRAINFER_DECL void Sum(const FDTensor &x, FDTensor *out, + const std::vector &dims, + bool keep_dim = false, bool reduce_all = false); + +/** Excute the all operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which will be reduced. + @param keep_dim Whether to keep the reduced dims, default false. + @param reduce_all Whether to reduce all dims, default false. +*/ +ULTRAINFER_DECL void All(const FDTensor &x, FDTensor *out, + const std::vector &dims, + bool keep_dim = false, bool reduce_all = false); + +/** Excute the any operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which will be reduced. + @param keep_dim Whether to keep the reduced dims, default false. + @param reduce_all Whether to reduce all dims, default false. +*/ +ULTRAINFER_DECL void Any(const FDTensor &x, FDTensor *out, + const std::vector &dims, + bool keep_dim = false, bool reduce_all = false); + +/** Excute the mean operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which will be reduced. + @param keep_dim Whether to keep the reduced dims, default false. + @param reduce_all Whether to reduce all dims, default false. +*/ +ULTRAINFER_DECL void Mean(const FDTensor &x, FDTensor *out, + const std::vector &dims, + bool keep_dim = false, bool reduce_all = false); + +/** Excute the product operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which will be reduced. + @param keep_dim Whether to keep the reduced dims, default false. + @param reduce_all Whether to reduce all dims, default false. +*/ +ULTRAINFER_DECL void Prod(const FDTensor &x, FDTensor *out, + const std::vector &dims, + bool keep_dim = false, bool reduce_all = false); + +/** Excute the argmax operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param axis The axis which will be reduced. + @param output_dtype The data type of output FDTensor, INT64 or INT32, + default to INT64. + @param keep_dim Whether to keep the reduced dims, default false. + @param flatten Whether to flatten FDTensor to get the argmin index, default + false. +*/ +ULTRAINFER_DECL void ArgMax(const FDTensor &x, FDTensor *out, int64_t axis, + FDDataType output_dtype = FDDataType::INT64, + bool keep_dim = false, bool flatten = false); + +/** Excute the argmin operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param axis The axis which will be reduced. + @param output_dtype The data type of output FDTensor, INT64 or INT32, + default to INT64. + @param keep_dim Whether to keep the reduced dims, default false. + @param flatten Whether to flatten FDTensor to get the argmin index, default + false. +*/ +ULTRAINFER_DECL void ArgMin(const FDTensor &x, FDTensor *out, int64_t axis, + FDDataType output_dtype = FDDataType::INT64, + bool keep_dim = false, bool flatten = false); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/reduce_functor.h b/libs/ultrainfer/ultrainfer/function/reduce_functor.h new file mode 100755 index 0000000000..f31210095b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/reduce_functor.h @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/function/eigen.h" +namespace ultrainfer { +namespace function { +//////// Max Functor /////// +struct MaxFunctor { + template + void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) { + y->device(dev) = x->maximum(dim); + } +}; + +//////// Min Functor /////// +struct MinFunctor { + template + void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) { + y->device(dev) = x->minimum(dim); + } +}; + +//////// Sum Functor /////// +struct SumFunctor { + template + void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) { + y->device(dev) = x->sum(dim); + } +}; + +//////// All Functor /////// +struct AllFunctor { + template + void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) { + y->device(dev) = x->all(dim); + } +}; + +//////// Any Functor /////// +struct AnyFunctor { + template + void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) { + y->device(dev) = x->any(dim); + } +}; + +//////// Mean Functor /////// +struct MeanFunctor { + template + void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) { + y->device(dev) = x->mean(dim); + } +}; + +//////// Prod Functor /////// +struct ProdFunctor { + template + void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) { + y->device(dev) = x->prod(dim); + } +}; + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/slice.cc b/libs/ultrainfer/ultrainfer/function/slice.cc new file mode 100755 index 0000000000..3d44fab6b7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/slice.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/slice.h" +#include "ultrainfer/function/eigen.h" + +#include + +namespace ultrainfer { +namespace function { + +std::vector GetSliceDims(const std::vector &in_dims, + const std::vector &axes, + const std::vector &starts, + const std::vector &ends, + std::vector *steps = nullptr) { + std::vector slice_dims(in_dims); + + for (size_t i = 0; i < axes.size(); ++i) { + int64_t axis = axes[i]; + if (in_dims[axis] == -1) { + continue; + } + + int64_t start = starts[i]; + int64_t end = ends[i]; + int64_t step = steps == nullptr ? 1 : (*steps)[i]; + + if (step > 0) { + slice_dims[axis] = (end - start + step - 1) / step; + } else { + slice_dims[axis] = (end - start + step + 1) / step; + } + } + return slice_dims; +} + +void CheckAndUpdateSliceAttrs(const std::vector &in_dims, + const std::vector &axes, + std::vector *starts, + std::vector *ends, + std::vector *steps = nullptr) { + for (size_t i = 0; i < axes.size(); ++i) { + int64_t axis = axes[i]; + FDASSERT(axis < in_dims.size(), + "The axis value should be less than the rank of input, " + "but received axes[%d] = %d, rank of input is %d.", + i, axis, in_dims.size()); + int64_t dim_value = in_dims[axis]; + + if (dim_value > 0) { + int64_t step = steps == nullptr ? 1 : (*steps)[i]; + FDASSERT(step != 0, "Step should not be 0, but received step = %d.", + step); + int64_t start = + (*starts)[i] < 0 ? ((*starts)[i] + dim_value) : (*starts)[i]; + start = (std::max)(start, static_cast(0)); + + int64_t end = + 0 < step && (*ends)[i] < 0 ? ((*ends)[i] + dim_value) : (*ends)[i]; + end = (std::min)(end, dim_value); + + if (step > 0) { + start = (std::min)(start, dim_value); + end = (std::max)(end, static_cast(0)); + FDASSERT(end > start, + "When step > 0, end should be greater than start, but " + "received end = %d, start = %d.", + end, start) + } else { + start = (std::min)(start, dim_value - 1); + if (end < -1) { + end += dim_value; + } + end = (std::max)(end, static_cast(-1)); + FDASSERT(start >= end, + "When step < 0, start should be greater than end, but " + "received start = %d, end = %d.", + start, end); + } + + (*starts)[i] = start; + (*ends)[i] = end; + } else if (dim_value == 0) { + (*starts)[i] = 0; + (*ends)[i] = 0; + } + } +} + +template +void SliceKernel(const FDTensor &x, const std::vector &axes, + const std::vector &starts, + const std::vector &ends, FDTensor *out) { + FDASSERT(starts.size() == axes.size(), + "The size of starts must be equal to the size of axes."); + FDASSERT(ends.size() == axes.size(), + "The size of ends must be equal to the size of axes."); + auto starts_idx = starts; + auto end_idx = ends; + auto in_dims = x.Shape(); + CheckAndUpdateSliceAttrs(in_dims, axes, &starts_idx, &end_idx); + auto slice_dims = GetSliceDims(in_dims, axes, starts, ends); + + auto offsets = Eigen::DSizes(); + auto extents = Eigen::DSizes(); + for (size_t i = 0; i < D; ++i) { + offsets[i] = 0; + extents[i] = slice_dims[i]; + } + for (size_t i = 0; i < axes.size(); ++i) { + offsets[axes[i]] = starts[i]; + } + + out->Allocate(slice_dims, x.Dtype()); + auto in_t = EigenTensor::From(x, in_dims); + auto out_t = EigenTensor::From(*out, slice_dims); + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + out_t.device(dev) = in_t.slice(offsets, extents); +} + +void Slice(const FDTensor &x, const std::vector &axes, + const std::vector &starts, const std::vector &ends, + FDTensor *out) { + FD_VISIT_ALL_TYPES( + x.dtype, "SliceKernel", ([&] { + int rank = x.Shape().size(); + switch (rank) { + case 1: + SliceKernel(x, axes, starts, ends, out); + break; + case 2: + SliceKernel(x, axes, starts, ends, out); + break; + case 3: + SliceKernel(x, axes, starts, ends, out); + break; + case 4: + SliceKernel(x, axes, starts, ends, out); + break; + case 5: + SliceKernel(x, axes, starts, ends, out); + break; + case 6: + SliceKernel(x, axes, starts, ends, out); + break; + default: + FDASSERT(false, + "The rank of input should be less than 7, but received %d.", + rank); + } + })); +} + +void Slice(const FDTensor &x, const std::vector &axes, + const std::vector &index, FDTensor *out) { + std::vector ends = index; + for (int i = 0; i < ends.size(); ++i) { + ends[i] += 1; + } + Slice(x, axes, index, ends, out); + for (int i = 0; i < axes.size(); ++i) { + if (out->Shape().size() <= 1) { + break; + } + out->Squeeze(axes[i]); + } +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/slice.h b/libs/ultrainfer/ultrainfer/function/slice.h new file mode 100755 index 0000000000..240b0455fb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/slice.h @@ -0,0 +1,44 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** This operator produces a slice of input along multiple axes. + @param x The input tensor. + @param axes Axes that starts and ends apply to. + @param starts If starts is a list or tuple, the elements of it should be + integers or Tensors with shape [1]. If starts is an Tensor, it should + be an 1-D Tensor. It represents starting indices of corresponding axis + in axes + @param ends If ends is a list or tuple, the elements of it should be + integers or Tensors with shape [1]. If ends is an Tensor, it should + be an 1-D Tensor . It represents ending indices of corresponding axis + in axes. + @param out The output tensor which stores the result. +*/ + +ULTRAINFER_DECL void Slice(const FDTensor &x, const std::vector &axes, + const std::vector &starts, + const std::vector &ends, FDTensor *out); + +ULTRAINFER_DECL void Slice(const FDTensor &x, const std::vector &axes, + const std::vector &index, FDTensor *out); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/softmax.cc b/libs/ultrainfer/ultrainfer/function/softmax.cc new file mode 100755 index 0000000000..7cf9fdf640 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/softmax.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/softmax.h" + +#include + +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/utils/axis_utils.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace function { +template struct ValueClip { + T operator()(const T &x) const { + const T kThreshold = static_cast(-64.); + return x < kThreshold ? kThreshold : x; + } +}; + +template struct SoftmaxEigen { + void operator()(const FDTensor &x, FDTensor *out, int axis_dim) { + constexpr int kBatchDim = 0; + constexpr int kClassDim = 1; + constexpr int kAxisDim = 1; + + auto logits = EigenMatrix::From(x); + auto softmax = EigenMatrix::From(*out); + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + const int num_remain = num_classes / axis_dim; + Eigen::DSizes along_axis(kAxisDim); + Eigen::DSizes batch_classes(batch_size, num_classes); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + Eigen::DSizes batch_one_remain(batch_size, 1, num_remain); + Eigen::DSizes one_axis_one(1, axis_dim, 1); + Eigen::DSizes one_axis(1, axis_dim); + Eigen::DSizes batch_axis_remain(batch_size, axis_dim, num_remain); + + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + // For numerical stability, logits should be shifted by maximum number along + // axis, calculate shifted_logits into softmax tensor for memory reuse. + if (num_remain == 1) { + // axis == -1, axis and class in same dimension, calculate along + // class dimension directly for higher performance + softmax.device(dev) = (logits - logits.maximum(along_axis) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)) + .unaryExpr(ValueClip()); + } else { + // axis != -1, class dimension split into (axis, remain), max and sum + // should be calculated along axis dimension + softmax.device(dev) = + (logits.reshape(batch_axis_remain) - logits.reshape(batch_axis_remain) + .maximum(along_axis) + .eval() + .reshape(batch_one_remain) + .broadcast(one_axis_one) + .reshape(batch_axis_remain)) + .reshape(batch_classes) + .unaryExpr(ValueClip()); + } + softmax.device(dev) = softmax.exp(); + softmax.device(dev) = (softmax * softmax.reshape(batch_axis_remain) + .sum(along_axis) + .inverse() + .eval() + .broadcast(one_axis)); + } +}; + +template +void SoftmaxFunctor(const FDTensor &x, FDTensor *out, int axis) { + SoftmaxEigen()(x, out, axis); +} + +template +void SoftmaxKernel(const FDTensor &x, FDTensor *out, int axis) { + const int rank = x.shape.size(); + const int calc_axis = CanonicalAxis(axis, rank); + int axis_dim = x.shape[calc_axis]; + out->Allocate(x.shape, x.dtype); + if (out->Numel() == 0) { + return; + } + const int n = SizeToAxis(calc_axis, x.shape); + const int d = SizeFromAxis(calc_axis, x.shape); + // Reshape to 2d tensor + + FDTensor x_2d, out_2d; + x_2d.SetExternalData({n, d}, x.dtype, const_cast(x.Data())); + out_2d.SetExternalData({n, d}, out->dtype, out->Data()); + + SoftmaxFunctor(x_2d, &out_2d, axis_dim); +} + +void Softmax(const FDTensor &x, FDTensor *out, int axis) { + FDASSERT( + std::abs(axis) < x.shape.size(), + "The absolute given axis should be smaller than the input's " + "dimension. Expected absolute axis is smaller than %lu, but receive %d.", + x.shape.size(), std::abs(axis)); + // Note(zhoushunjie): The FDTensor out may equal to FDTensor x, so firstly we + // use out_temp to get the softmax result, then we move the out_temp to out. + FDTensor out_tmp; + FD_VISIT_FLOAT_TYPES(x.dtype, "SoftmaxKernel", + ([&] { SoftmaxKernel(x, &out_tmp, axis); })); + *out = std::move(out_tmp); +} +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/softmax.h b/libs/ultrainfer/ultrainfer/function/softmax.h new file mode 100755 index 0000000000..29a1258e98 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/softmax.h @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { +/** Excute the softmax operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param axis The axis to be computed softmax value. +*/ +ULTRAINFER_DECL void Softmax(const FDTensor &x, FDTensor *out, int axis = -1); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/sort.cc b/libs/ultrainfer/ultrainfer/function/sort.cc new file mode 100755 index 0000000000..8f062883dc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/sort.cc @@ -0,0 +1,120 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/sort.h" +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/function/transpose.h" +#include +#include +#include + +namespace ultrainfer { +namespace function { + +template +static void FullSort(Type input_height, Type input_width, int input_dim, + const FDTensor *input, FDTensor *out, FDTensor *indices, + bool descending) { + out->Allocate(input->Shape(), input->Dtype()); + indices->Allocate(input->Shape(), TypeToDataType::dtype); + + T *t_out = reinterpret_cast(out->Data()); + Type *t_indices = reinterpret_cast(indices->Data()); + + for (Type i = 0; i < input_height; ++i) { + std::vector> col_vec; + col_vec.reserve(input_width); + if (input_dim == 1) { + auto e_input = EigenVector::Flatten(*input); + for (Type j = 0; j < input_width; ++j) { + col_vec.push_back(std::pair(e_input(j), j)); + } + } else { + auto e_input = EigenMatrix::Reshape(*input, input_dim - 1); + for (Type j = 0; j < input_width; ++j) { + col_vec.push_back(std::pair(e_input(i, j), j)); + } + } + std::sort(col_vec.begin(), col_vec.end(), + [&](const std::pair &l, const std::pair &r) { + if (descending) + return (std::isnan(static_cast(l.first)) && + !std::isnan(static_cast(r.first))) || + (l.first > r.first); + else + return (!std::isnan(static_cast(l.first)) && + std::isnan(static_cast(r.first))) || + (l.first < r.first); + }); + + for (Type j = 0; j < input_width; ++j) { + t_out[i * input_width + j] = col_vec[j].first; + t_indices[i * input_width + j] = col_vec[j].second; + } + } +} + +template +void SortKernel(const FDTensor &x, FDTensor *out, FDTensor *indices, + FDDataType indices_type, bool descending, int axis) { + auto input_shape = x.Shape(); + int rank = input_shape.size(); + axis = (axis < 0) ? (rank + axis) : axis; + // Do full sort + if (axis == -1 || axis + 1 == rank) { + int64_t numel = x.Numel(); + int64_t input_width = input_shape[axis]; + int64_t input_height = numel / input_width; + FD_VISIT_INT_TYPES(indices_type, "FullSort", ([&] { + FullSort(input_height, input_width, rank, + &x, out, indices, descending); + })); + } else { + // If not full sort do transpose + std::vector trans; + for (int i = 0; i < axis; i++) { + trans.push_back(i); + } + trans.push_back(rank - 1); + for (int i = axis + 1; i < rank - 1; i++) { + trans.push_back(i); + } + trans.push_back(axis); + + FDTensor trans_inp; + Transpose(x, &trans_inp, trans); + int64_t numel = x.Numel(); + int64_t input_width = input_shape[axis]; + int64_t input_height = numel / input_width; + FD_VISIT_INT_TYPES(indices_type, "FullSort", ([&] { + FullSort(input_height, input_width, rank, + &trans_inp, out, indices, + descending); + })); + // transpose back + Transpose(*out, out, trans); + Transpose(*indices, indices, trans); + } +} + +void Sort(const FDTensor &x, FDTensor *out, FDTensor *indices, int axis, + bool descending, FDDataType indices_type) { + FD_VISIT_INT_FLOAT_TYPES(x.dtype, "SortKernel", ([&] { + SortKernel(x, out, indices, indices_type, + descending, axis); + })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/sort.h b/libs/ultrainfer/ultrainfer/function/sort.h new file mode 100755 index 0000000000..fea3b8ce82 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/sort.h @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** + * @brief Performs sorting on the input tensor along the given axis and outputs + * two tensors, Output(Out) and Output(Indices). They reserve the same + * shape with Input(X), and Output(Out) represents the sorted tensor + * while Output(Indices) gives the sorted order along the given axis + * Attr(axis). + * @param x The input of sort + * @param out The sorted tensor of sort op, with the same shape as + * x + * @param indices The indices of a tensor giving the sorted order, with + * the same shape as x + * @param axis The axis along which to sort the tensor. + * When axis < 0, the actual axis will be the |axis|'th + * counting backwards + * @param descending The descending attribute is a flag to tell + * algorithm how to sort the input data. + * If descending is true, will sort by descending order, + * else if false, sort by ascending order + * @param indices_type The data type of indices, default to int64 + */ +ULTRAINFER_DECL void Sort(const FDTensor &x, FDTensor *out, FDTensor *indices, + int axis = 0, bool descending = false, + FDDataType indices_type = FDDataType::INT64); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/split.cc b/libs/ultrainfer/ultrainfer/function/split.cc new file mode 100755 index 0000000000..be70ff115c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/split.cc @@ -0,0 +1,160 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/split.h" +#include "ultrainfer/utils/utils.h" +#include + +namespace ultrainfer { +namespace function { + +/* + * All tensors' dimension should be the same and the values of + * each dimension must be the same, except the axis dimension. + */ +template struct SplitFunctor { +public: + void operator()(const FDTensor &input, + const std::vector &ref_inputs, int axis, + std::vector *outputs) { + if (input.Numel() == 0) { + return; + } + + size_t num = outputs->size(); + + int input_rows = 1; + auto dim_0 = ref_inputs[0]->Shape(); + for (int i = 0; i < axis; ++i) { + input_rows *= dim_0[i]; + } + + int input_cols = 0; + + std::vector output_cols(outputs->size()); + for (size_t i = 0; i < num; ++i) { + int t_cols = ref_inputs[i]->Numel() / input_rows; + input_cols += t_cols; + output_cols[i] = t_cols; + } + + // computation + for (int k = 0; k < input_rows; ++k) { + const T *src_ptr = + reinterpret_cast(input.Data()) + k * input_cols; + int col_idx = 0; + for (size_t j = 0; j < num; ++j) { + int col_len = output_cols[j]; + auto *out_tensor = &(outputs->at(j)); + if (out_tensor != nullptr) { + T *dst_ptr = reinterpret_cast(out_tensor->Data()) + k * col_len; + std::memcpy(dst_ptr, src_ptr + col_idx, sizeof(T) * col_len); + } + col_idx += col_len; + } + } + } +}; + +inline int GetSplitAxisValue(const FDTensor &x, int axis) { + int rank = x.Shape().size(); + FDASSERT(axis >= -rank && axis < rank, + "The axis is expected to be in range of [%d, %d), but got %d", -rank, + rank, axis); + if (axis < 0) { + axis = axis + rank; + } + return axis; +} + +void CreateSplitOutputs(const FDTensor &x, + const std::vector §ions_data, + std::vector *outs, int axis) { + axis = GetSplitAxisValue(x, axis); + auto input_axis_dim = x.Shape().at(axis); + std::vector sections_vec; + const int unknow_dim_val = -1; + int unknow_dim_idx = -1; + int num_of_unknow = 0; + int sum_of_section = 0; + + for (size_t i = 0; i < sections_data.size(); ++i) { + sections_vec.push_back(sections_data[i]); + if (sections_data[i] == unknow_dim_val) { + num_of_unknow++; + unknow_dim_idx = i; + } else { + sum_of_section += sections_data[i]; + } + } + + FDASSERT(num_of_unknow <= 1, + "Only one dimension value of Attr(num_or_sections) " + "in SplitOp can be -1. " + "But received Attr(num_or_sections) = [%s].", + Str(sections_data).c_str()); + if (unknow_dim_idx != -1) { + // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1]. + // input_axis_dim = 5, sum_of_sections = 5. + // the following check will fail. + FDASSERT(sum_of_section < input_axis_dim, + "Sum of Attr(num_or_sections) other than unknown section " + "must be less than the input's " + "size " + "along the split dimension. But received Attr(num_or_sections) " + "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.", + Str(sections_data).c_str(), Str(x.Shape()).c_str(), axis); + sections_vec[unknow_dim_idx] = input_axis_dim - sum_of_section; + } else { + FDASSERT(sum_of_section == input_axis_dim, + "Sum of Attr(num_or_sections) must be equal to the input's " + "size " + "along the split dimension. But received Attr(num_or_sections)" + " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.", + Str(sections_data).c_str(), Str(x.Shape()).c_str(), axis); + } + // fill out dims + std::vector> out_dims(sections_vec.size(), x.Shape()); + for (size_t i = 0; i < sections_vec.size(); ++i) { + out_dims[i][axis] = sections_vec[i]; + } + for (size_t i = 0; i < sections_vec.size(); ++i) { + (*outs)[i].Allocate(out_dims[i], x.Dtype()); + } +} + +template +void SplitKernel(const FDTensor &x, const std::vector §ion, + std::vector *outs, int axis) { + size_t out_number = section.size(); + outs->resize(out_number); + CreateSplitOutputs(x, section, outs, axis); + + std::vector shape_refer; + for (size_t j = 0; j < outs->size(); ++j) { + shape_refer.emplace_back(&((*outs)[j])); + } + SplitFunctor functor; + functor(x, shape_refer, axis, outs); +} + +void Split(const FDTensor &x, const std::vector &num_or_sections, + std::vector *out, int axis) { + FD_VISIT_ALL_TYPES(x.Dtype(), "Split", ([&] { + SplitKernel(x, num_or_sections, out, axis); + })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/split.h b/libs/ultrainfer/ultrainfer/function/split.h new file mode 100755 index 0000000000..1b3a2063ec --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/split.h @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Split the input tensor into multiple sub-Tensors. + @param x The input tensor. + @param num_or_sections f num_or_sections is an int, then num_or_sections + indicates the number of equal sized sub-Tensors that the x will + be divided into. + @param out The output vector tensor which stores the result. + @param axis Axis which will be splitted. +*/ + +ULTRAINFER_DECL void Split(const FDTensor &x, + const std::vector &num_or_sections, + std::vector *out, int axis = 0); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/tile.cc b/libs/ultrainfer/ultrainfer/function/tile.cc new file mode 100755 index 0000000000..3406d690f1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/tile.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/tile.h" +#include "ultrainfer/function/eigen.h" + +namespace ultrainfer { +namespace function { + +template +void TileFunctor(const FDTensor &x, + const std::vector &origin_repeat_times, + FDTensor *out) { + auto x_shape = x.Shape(); + auto repeat_times = origin_repeat_times; + for (size_t i = 0; i < repeat_times.size(); ++i) { + FDASSERT(repeat_times[i] > 0, + "All elements of the input 'repeat_times' " + "for tile op must be positive integers, but " + "the value received is %d.", + repeat_times[i]); + } + if (repeat_times.size() < x_shape.size()) { + int diff = x_shape.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, 1); + } else { + int diff = repeat_times.size() - x_shape.size(); + x_shape.insert(x_shape.begin(), diff, 1); + } + FDASSERT(repeat_times.size() == x_shape.size(), + "The rank (%d) of the input 'x' and the rank (%d) of the input " + "'repeat_times' for tile op must match after promotion.", + x_shape.size(), repeat_times.size()); + + if (Rank == 0) { + // Deep copy + *out = x; + return; + } + + FDTensor out_tmp; + Eigen::DSizes bcast_dims; + for (size_t i = 0; i < repeat_times.size(); ++i) { + bcast_dims[i] = repeat_times[i]; + } + + std::vector out_shape(x_shape); + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_shape[i] *= repeat_times[i]; + } + + out_tmp.Allocate(out_shape, x.Dtype()); + auto eigen_x = EigenTensor::From(x, x_shape); + auto eigen_out = EigenTensor::From(out_tmp, out_shape); + + const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + eigen_out.device(dev) = eigen_x.broadcast(bcast_dims); + + *out = std::move(out_tmp); +} + +template +void TileKernel(const FDTensor &x, const std::vector &repeat_times, + FDTensor *out) { + auto rank = x.Shape().size(); + auto repeat_times_size = repeat_times.size(); + rank = (std::max)(rank, repeat_times_size); + switch (rank) { + case 0: + *out = x; + break; + case 1: + TileFunctor(x, repeat_times, out); + break; + case 2: + TileFunctor(x, repeat_times, out); + break; + case 3: + TileFunctor(x, repeat_times, out); + break; + case 4: + TileFunctor(x, repeat_times, out); + break; + case 5: + TileFunctor(x, repeat_times, out); + break; + case 6: + TileFunctor(x, repeat_times, out); + break; + } +} + +void Tile(const FDTensor &x, const std::vector &repeat_times, + FDTensor *out) { + FD_VISIT_ALL_TYPES(x.dtype, "TileKernel", + ([&] { TileKernel(x, repeat_times, out); })); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/tile.h b/libs/ultrainfer/ultrainfer/function/tile.h new file mode 100755 index 0000000000..9ba545d894 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/tile.h @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { +namespace function { + +/** Construct a new Tensor by repeating x the number of times given by + ** repeat_times. After tiling, the value of the i’th dimension of the + ** output is equal to x.shape[i]*repeat_times[i]. Both the number of + ** dimensions of x and the number of elements in repeat_times should + ** be less than or equal to 6.Support all data types. + @param x The input tensor. + @param repeat_times The lower bound + @param out The output tensor which stores the result. +*/ +ULTRAINFER_DECL void Tile(const FDTensor &x, + const std::vector &repeat_times, + FDTensor *out); + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/transpose.cc b/libs/ultrainfer/ultrainfer/function/transpose.cc new file mode 100755 index 0000000000..e33c986140 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/transpose.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/function/transpose.h" +#include "ultrainfer/function/eigen.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace function { +template struct TransposeNormalKernel { + void operator()(const FDTensor &in, FDTensor *out, + const std::vector &axis) { + const int rank = axis.size(); + auto in_stride = GetStride(in.shape); + auto out_stride = GetStride(out->shape); + const T *in_ptr = reinterpret_cast(in.Data()); + T *out_ptr = reinterpret_cast(out->Data()); + + auto transpose_helper = [&](int64_t beg, int64_t end) { + for (int64_t out_idx = beg; out_idx < end; ++out_idx) { + int64_t in_idx = 0; + int64_t tmp_idx = out_idx; + // calculate the input index + for (int i = 0; i < rank; ++i) { + const int64_t coordinate = tmp_idx / out_stride[i]; + tmp_idx -= coordinate * out_stride[i]; + in_idx += coordinate * in_stride[axis[i]]; + } + out_ptr[out_idx] = in_ptr[in_idx]; + } + }; + transpose_helper(0, out->Numel()); + } +}; + +template struct TransposeKernelImpl { + void operator()(const FDTensor &in, FDTensor *out, + const std::vector &axis) { + Eigen::array permute; + for (int i = 0; i < Rank; i++) { + permute[i] = axis[i]; + } + + auto &place = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_in = EigenTensor::From(in); + auto eigen_out = EigenTensor::From(*out); + eigen_out.device(place) = eigen_in.shuffle(permute); + } +}; + +template +void TransposeKernel(const FDTensor &x, FDTensor *out, + const std::vector &axis) { + int rank = axis.size(); + switch (rank) { + case 1: + TransposeKernelImpl trans1; + trans1(x, out, axis); + break; + case 2: + TransposeKernelImpl trans2; + trans2(x, out, axis); + break; + case 3: + TransposeKernelImpl trans3; + trans3(x, out, axis); + break; + case 4: + TransposeKernelImpl trans4; + trans4(x, out, axis); + break; + default: + // for rank >= 4 situation + TransposeNormalKernel trans_normal; + trans_normal(x, out, axis); + } +} + +void Transpose(const FDTensor &x, FDTensor *out, + const std::vector &dims) { + size_t dims_size = dims.size(); + FDASSERT(dims_size == x.shape.size(), + "The input tensor's dimension should be equal to the dims's size. " + "Expect dims size is %lu, but receive %lu.", + x.shape.size(), dims_size); + std::vector count(dims_size, 0); + for (size_t i = 0; i < dims_size; i++) { + FDASSERT(dims[i] >= 0, + "The dims should be greater than or equal to 0, but receive %lld.", + dims[i]); + FDASSERT(dims[i] < static_cast(dims_size) && ++count[dims[i]] == 1, + "Each element of Attribute axis should be a unique value range " + "from 0 to (dims - 1), where the dims is the axis's size, unique " + "value means this axis value can appear only once. "); + } + std::vector out_dims(dims_size); + for (size_t i = 0; i < dims_size; i++) { + out_dims[i] = x.shape[dims[i]]; + } + + // Note(zhoushunjie): The FDTensor out may equal to FDTensor x, so firstly we + // use out_temp to get the transposed result, then we move the out_temp to + // out. + FDTensor out_temp; + out_temp.Allocate(out_dims, x.dtype); + FD_VISIT_ALL_TYPES(x.dtype, "TransposeKernel", + ([&] { TransposeKernel(x, &out_temp, dims); })); + *out = std::move(out_temp); +} + +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/function/transpose.h b/libs/ultrainfer/ultrainfer/function/transpose.h new file mode 100755 index 0000000000..687f7603de --- /dev/null +++ b/libs/ultrainfer/ultrainfer/function/transpose.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" + +namespace ultrainfer { + +/** \brief All C++ FDTensor Operation APIs are defined inside this namespace + * + */ +namespace function { +/** Excute the transpose operation for input FDTensor along given dims. + @param x The input tensor. + @param out The output tensor which stores the result. + @param dims The vector of axis which the input tensor will transpose. +*/ +ULTRAINFER_DECL void Transpose(const FDTensor &x, FDTensor *out, + const std::vector &dims); +} // namespace function +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pipeline.h b/libs/ultrainfer/ultrainfer/pipeline.h new file mode 100755 index 0000000000..6568e1a106 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pipeline.h @@ -0,0 +1,21 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "ultrainfer/core/config.h" +#ifdef ENABLE_VISION +#include "ultrainfer/pipeline/pptinypose/pipeline.h" +#endif + +#include "ultrainfer/vision/visualize/visualize.h" diff --git a/libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc b/libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc new file mode 100755 index 0000000000..b42fb7e2c5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindPPTinyPosePipeline(pybind11::module &m); + +void BindPipeline(pybind11::module &m) { BindPPTinyPosePipeline(m); } +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc new file mode 100755 index 0000000000..7b806fe947 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc @@ -0,0 +1,70 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pipeline/pptinypose/pipeline.h" + +namespace ultrainfer { +namespace pipeline { +PPTinyPose::PPTinyPose( + ultrainfer::vision::detection::PicoDet *det_model, + ultrainfer::vision::keypointdetection::PPTinyPose *pptinypose_model) + : detector_(det_model), pptinypose_model_(pptinypose_model) {} + +bool PPTinyPose::Detect(cv::Mat *img, + ultrainfer::vision::DetectionResult *detection_res) { + if (!detector_->Predict(img, detection_res)) { + FDERROR << "There's a error while detectiong human box in image." + << std::endl; + return false; + } + return true; +} + +bool PPTinyPose::KeypointDetect( + cv::Mat *img, ultrainfer::vision::KeyPointDetectionResult *result, + ultrainfer::vision::DetectionResult &detection_result) { + if (!pptinypose_model_->Predict(img, result, detection_result)) { + FDERROR << "There's a error while detecting keypoint in image " + << std::endl; + return false; + } + return true; +} + +bool PPTinyPose::Predict(cv::Mat *img, + ultrainfer::vision::KeyPointDetectionResult *result) { + result->Clear(); + ultrainfer::vision::DetectionResult detection_res; + if (nullptr != detector_ && !Detect(img, &detection_res)) { + FDERROR << "Failed to detect image." << std::endl; + return false; + } + ultrainfer::vision::DetectionResult filter_detection_res; + for (size_t i = 0; i < detection_res.boxes.size(); ++i) { + if (detection_res.scores[i] > detection_model_score_threshold) { + filter_detection_res.boxes.push_back(detection_res.boxes[i]); + filter_detection_res.scores.push_back(detection_res.scores[i]); + filter_detection_res.label_ids.push_back(detection_res.label_ids[i]); + } + } + if (nullptr != pptinypose_model_ && + !KeypointDetect(img, result, filter_detection_res)) { + FDERROR << "Failed to detect keypoint in image " << std::endl; + return false; + } + return true; +}; + +} // namespace pipeline +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h new file mode 100755 index 0000000000..c3a8dcc7b1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h @@ -0,0 +1,70 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/detection/ppdet/model.h" +#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose.h" + +namespace ultrainfer { +/** \brief All pipeline model APIs are defined inside this namespace + * + */ +namespace pipeline { + +/*! @brief PPTinyPose Pipeline object used when to load a detection model + + * pptinypose model + */ +class ULTRAINFER_DECL PPTinyPose { +public: + /** \brief Set initialized detection model object and pptinypose model object + * + * \param[in] det_model Initialized detection model object + * \param[in] pptinypose_model Initialized pptinypose model object + */ + PPTinyPose( + ultrainfer::vision::detection::PicoDet *det_model, + ultrainfer::vision::keypointdetection::PPTinyPose *pptinypose_model); + + /** \brief Predict the keypoint detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread() + * \param[in] result The output keypoint detection result will be writen to + * this structure \return true if the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *img, + ultrainfer::vision::KeyPointDetectionResult *result); + + /* \brief The score threshold for detectin model to filter bbox before + * inputting pptinypose model + */ + float detection_model_score_threshold = 0; + +protected: + ultrainfer::vision::detection::PicoDet *detector_ = nullptr; + ultrainfer::vision::keypointdetection::PPTinyPose *pptinypose_model_ = + nullptr; + + virtual bool Detect(cv::Mat *img, + ultrainfer::vision::DetectionResult *result); + virtual bool + KeypointDetect(cv::Mat *img, + ultrainfer::vision::KeyPointDetectionResult *result, + ultrainfer::vision::DetectionResult &detection_result); +}; + +} // namespace pipeline +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc new file mode 100755 index 0000000000..7c6f3f9610 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" +#include + +namespace ultrainfer { +void BindPPTinyPosePipeline(pybind11::module &m) { + pybind11::class_(m, "PPTinyPose") + + .def( + pybind11::init()) + .def("predict", + [](pipeline::PPTinyPose &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::KeyPointDetectionResult res; + self.Predict(&mat, &res); + return res; + }) + + .def_readwrite("detection_model_score_threshold", + &pipeline::PPTinyPose::detection_model_score_threshold); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc b/libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc new file mode 100755 index 0000000000..82a526e9ed --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/ultrainfer_model.h" + +namespace ultrainfer { + +void BindFDModel(pybind11::module &m) { + pybind11::class_(m, "UltraInferModel") + .def(pybind11::init<>(), "Default Constructor") + .def("model_name", &UltraInferModel::ModelName) + .def("num_inputs_of_runtime", &UltraInferModel::NumInputsOfRuntime) + .def("num_outputs_of_runtime", &UltraInferModel::NumOutputsOfRuntime) + .def("input_info_of_runtime", &UltraInferModel::InputInfoOfRuntime) + .def("output_info_of_runtime", &UltraInferModel::OutputInfoOfRuntime) + .def("enable_record_time_of_runtime", + &UltraInferModel::EnableRecordTimeOfRuntime) + .def("disable_record_time_of_runtime", + &UltraInferModel::DisableRecordTimeOfRuntime) + .def("print_statis_info_of_runtime", + &UltraInferModel::PrintStatisInfoOfRuntime) + .def("get_profile_time", &UltraInferModel::GetProfileTime) + .def("initialized", &UltraInferModel::Initialized) + .def_readwrite("runtime_option", &UltraInferModel::runtime_option) + .def_readwrite("valid_cpu_backends", &UltraInferModel::valid_cpu_backends) + .def_readwrite("valid_gpu_backends", + &UltraInferModel::valid_gpu_backends); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc b/libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc new file mode 100755 index 0000000000..912748acbf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc @@ -0,0 +1,293 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "ultrainfer/core/fd_type.h" +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +DLDataType FDToDlpackType(FDDataType fd_dtype) { + DLDataType dl_dtype; + DLDataTypeCode dl_code; + + // Number of bits required for the data type. + size_t dt_size = 0; + + dl_dtype.lanes = 1; + switch (fd_dtype) { + case FDDataType::BOOL: + dl_code = DLDataTypeCode::kDLInt; + dt_size = 1; + break; + case FDDataType::UINT8: + dl_code = DLDataTypeCode::kDLUInt; + dt_size = 8; + break; + case FDDataType::INT8: + dl_code = DLDataTypeCode::kDLInt; + dt_size = 8; + break; + case FDDataType::INT16: + dl_code = DLDataTypeCode::kDLInt; + dt_size = 16; + break; + case FDDataType::INT32: + dl_code = DLDataTypeCode::kDLInt; + dt_size = 32; + break; + case FDDataType::INT64: + dl_code = DLDataTypeCode::kDLInt; + dt_size = 64; + break; + case FDDataType::FP16: + dl_code = DLDataTypeCode::kDLFloat; + dt_size = 16; + break; + case FDDataType::FP32: + dl_code = DLDataTypeCode::kDLFloat; + dt_size = 32; + break; + case FDDataType::FP64: + dl_code = DLDataTypeCode::kDLFloat; + dt_size = 64; + break; + + default: + FDASSERT(false, "Convert to DlPack, FDType \"%s\" is not supported.", + Str(fd_dtype).c_str()); + } + + dl_dtype.code = dl_code; + dl_dtype.bits = dt_size; + return dl_dtype; +} + +FDDataType DlpackToFDType(const DLDataType &data_type) { + FDASSERT(data_type.lanes == 1, "FDTensor does not support dlpack lanes != 1") + + if (data_type.code == DLDataTypeCode::kDLFloat) { + if (data_type.bits == 16) { + return FDDataType::FP16; + } else if (data_type.bits == 32) { + return FDDataType::FP32; + } else if (data_type.bits == 64) { + return FDDataType::FP64; + } + } + + if (data_type.code == DLDataTypeCode::kDLInt) { + if (data_type.bits == 8) { + return FDDataType::INT8; + } else if (data_type.bits == 16) { + return FDDataType::INT16; + } else if (data_type.bits == 32) { + return FDDataType::INT32; + } else if (data_type.bits == 64) { + return FDDataType::INT64; + } else if (data_type.bits == 1) { + return FDDataType::BOOL; + } + } + + if (data_type.code == DLDataTypeCode::kDLUInt) { + if (data_type.bits == 8) { + return FDDataType::UINT8; + } + } + + return FDDataType::UNKNOWN1; +} + +void DeleteUnusedDltensor(PyObject *dlp) { + if (PyCapsule_IsValid(dlp, "dltensor")) { + DLManagedTensor *dl_managed_tensor = + static_cast(PyCapsule_GetPointer(dlp, "dltensor")); + dl_managed_tensor->deleter(dl_managed_tensor); + } +} + +pybind11::capsule FDTensorToDLPack(FDTensor &fd_tensor) { + DLManagedTensor *dlpack_tensor = new DLManagedTensor; + dlpack_tensor->dl_tensor.ndim = fd_tensor.shape.size(); + dlpack_tensor->dl_tensor.byte_offset = 0; + dlpack_tensor->dl_tensor.data = fd_tensor.MutableData(); + dlpack_tensor->dl_tensor.shape = &(fd_tensor.shape[0]); + dlpack_tensor->dl_tensor.strides = nullptr; + dlpack_tensor->manager_ctx = &fd_tensor; + dlpack_tensor->deleter = [](DLManagedTensor *m) { + if (m->manager_ctx == nullptr) { + return; + } + + FDTensor *tensor_ptr = reinterpret_cast(m->manager_ctx); + pybind11::handle tensor_handle = pybind11::cast(tensor_ptr); + tensor_handle.dec_ref(); + free(m); + }; + + pybind11::handle tensor_handle = pybind11::cast(&fd_tensor); + + // Increase the reference count by one to make sure that the DLPack + // represenation doesn't become invalid when the tensor object goes out of + // scope. + tensor_handle.inc_ref(); + + dlpack_tensor->dl_tensor.dtype = FDToDlpackType(fd_tensor.dtype); + + dlpack_tensor->dl_tensor.device.device_id = fd_tensor.device_id; + if (fd_tensor.device == Device::GPU) { + if (fd_tensor.is_pinned_memory) { + dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDAHost; + } else { + dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDA; + } + } else { + dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU; + } + + return pybind11::capsule(static_cast(dlpack_tensor), "dltensor", + &DeleteUnusedDltensor); +} + +FDTensor FDTensorFromDLPack(const std::string &name, + const pybind11::capsule &dlpack_tensor) { + DLManagedTensor *dl_managed_tensor = + static_cast(dlpack_tensor.get_pointer()); + + void *memory_ptr = dl_managed_tensor->dl_tensor.data; + memory_ptr = reinterpret_cast(memory_ptr) + + dl_managed_tensor->dl_tensor.byte_offset; + + int64_t *strides = dl_managed_tensor->dl_tensor.strides; + + int ndim = dl_managed_tensor->dl_tensor.ndim; + std::vector dims(dl_managed_tensor->dl_tensor.shape, + dl_managed_tensor->dl_tensor.shape + ndim); + + // Check if the input is contiguous and in C order + if (strides != nullptr) { + int64_t calculated_stride{1}; + bool is_contiguous_c_order = true; + for (size_t i = 1; i < dims.size(); i++) { + if (strides[ndim - i] != calculated_stride) { + is_contiguous_c_order = false; + break; + } + + calculated_stride *= dims[ndim - i]; + } + + FDASSERT(is_contiguous_c_order, + "DLPack tensor is not contiguous. Only contiguous DLPack " + "tensors that are stored in C-Order are supported."); + } + + Device device; + int32_t device_id = -1; + bool is_pinned_memory = false; + switch (dl_managed_tensor->dl_tensor.device.device_type) { + case DLDeviceType::kDLCUDA: + device = Device::GPU; + device_id = dl_managed_tensor->dl_tensor.device.device_id; + break; + case DLDeviceType::kDLCPU: + device = Device::CPU; + break; + case DLDeviceType::kDLCUDAHost: + device = Device::CPU; + is_pinned_memory = true; + break; + default: + FDASSERT(false, + ("DLDevice type " + + std::to_string(dl_managed_tensor->dl_tensor.device.device_type) + + " is not support by Python backend.") + .c_str()); + break; + } + + FDDataType dtype = DlpackToFDType(dl_managed_tensor->dl_tensor.dtype); + + PyCapsule_SetName(dlpack_tensor.ptr(), "used_dlpack"); + FDTensor fd_tensor(name); + fd_tensor.SetExternalData(dims, dtype, memory_ptr, device, device_id); + fd_tensor.is_pinned_memory = is_pinned_memory; + return fd_tensor; +} + +void BindFDTensor(pybind11::module &m) { + pybind11::class_(m, "FDTensor") + .def(pybind11::init<>(), "Default Constructor") + .def_readwrite("name", &FDTensor::name) + .def_readonly("shape", &FDTensor::shape) + .def_readonly("dtype", &FDTensor::dtype) + .def_readonly("device", &FDTensor::device) + .def("numpy", [](FDTensor &self) { return TensorToPyArray(self); }) + .def("data", &FDTensor::MutableData) + .def("from_numpy", + [](FDTensor &self, pybind11::array &pyarray, + bool share_buffer = false) { + PyArrayToTensor(pyarray, &self, share_buffer); + }) + .def("from_external_data", + [](const std::string &name, size_t data_addr, + const std::vector &shape, const std::string &data_type, + const std::string &data_place, int device_id) { + auto fd_data_type = FDDataType::UNKNOWN1; + if (data_type == "FP32") { + fd_data_type = FDDataType::FP32; + } else if (data_type == "FP16") { + fd_data_type = FDDataType::FP16; + } else if (data_type == "INT32") { + fd_data_type = FDDataType::INT32; + } else if (data_type == "INT64") { + fd_data_type = FDDataType::INT64; + } else { + FDASSERT(false, + "FDTensor.from_external_data, datatype \"%s\" is not " + "supported.", + data_type.c_str()); + } + + Device fd_data_place; + bool copy = false; + if (data_place.find("gpu") != data_place.npos) { + fd_data_place = Device::GPU; + } else if (data_place.find("cpu") != data_place.npos) { + copy = true; + fd_data_place = Device::CPU; + } else { + FDASSERT(false, + ("Device type " + data_place + + " is not support by FDTensor.from_external_data.") + .c_str()); + } + void *data_ptr = nullptr; + data_ptr = reinterpret_cast(data_addr); + FDTensor fd_tensor(name); + fd_tensor.SetData(shape, fd_data_type, + static_cast(data_ptr), copy, + fd_data_place, device_id); + return fd_tensor; + }) + .def("to_dlpack", &FDTensorToDLPack) + .def("from_dlpack", &FDTensorFromDLPack) + .def("print_info", &FDTensor::PrintInfo); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pybind/main.cc.in b/libs/ultrainfer/ultrainfer/pybind/main.cc.in new file mode 100755 index 0000000000..83b28f4f67 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pybind/main.cc.in @@ -0,0 +1,181 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + + + +namespace ultrainfer { + +void BindFDTensor(pybind11::module&); +void BindRuntime(pybind11::module&); +void BindFDModel(pybind11::module&); +void BindVision(pybind11::module&); +void BindText(pybind11::module&); +void BindPipeline(pybind11::module&); + +pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) { + pybind11::dtype dt; + if (fd_dtype == FDDataType::INT32) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::INT64) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::FP32) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::FP64) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::UINT8) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::INT8) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::FP16) { + dt = pybind11::dtype::of(); + } else { + FDASSERT(false, "The function doesn't support data type of %s.", + Str(fd_dtype).c_str()); + } + return dt; +} + +FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) { + if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::INT32; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::INT64; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::FP32; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::FP64; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::UINT8; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::INT8; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::FP16; + } + FDASSERT(false, + "NumpyDataTypeToFDDataType() only support " + "int8/int32/int64/float32/float64/float16 now."); + return FDDataType::FP32; +} + +void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor, + bool share_buffer) { + auto dtype = NumpyDataTypeToFDDataType(pyarray.dtype()); + std::vector data_shape; + data_shape.insert(data_shape.begin(), pyarray.shape(), + pyarray.shape() + pyarray.ndim()); + if (share_buffer) { + tensor->SetExternalData(data_shape, dtype, + pyarray.mutable_data()); + } else { + tensor->Resize(data_shape, dtype); + memcpy(tensor->MutableData(), pyarray.mutable_data(), pyarray.nbytes()); + } +} + +void PyArrayToTensorList(std::vector& pyarrays, std::vector* tensors, + bool share_buffer) { + tensors->resize(pyarrays.size()); + for(auto i = 0; i < pyarrays.size(); ++i) { + PyArrayToTensor(pyarrays[i], &(*tensors)[i], share_buffer); + } +} + +pybind11::array TensorToPyArray(const FDTensor& tensor) { + auto numpy_dtype = FDDataTypeToNumpyDataType(tensor.dtype); + auto out = pybind11::array(numpy_dtype, tensor.shape); + memcpy(out.mutable_data(), tensor.CpuData(), tensor.Nbytes()); + return out; +} + +#ifdef ENABLE_VISION +int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) { + if (np_dtype.is(pybind11::dtype::of())) { + return CV_32S; + } else if (np_dtype.is(pybind11::dtype::of())) { + return CV_8S; + } else if (np_dtype.is(pybind11::dtype::of())) { + return CV_8U; + } else if (np_dtype.is(pybind11::dtype::of())) { + return CV_32F; + } else { + FDASSERT( + false, + "NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 " + "now."); + } + return CV_8U; +} + +int NumpyDataTypeToOpenCvTypeV2(pybind11::array& pyarray) { + if (pybind11::isinstance>(pyarray)) { + return CV_32S; + } else if (pybind11::isinstance>(pyarray)) { + return CV_8S; + } else if (pybind11::isinstance>(pyarray)) { + return CV_8U; + } else if (pybind11::isinstance>(pyarray)) { + return CV_32F; + } else { + FDASSERT( + false, + "NumpyDataTypeToOpenCvTypeV2() only support int32/int8/uint8/float32 " + "now."); + } + return CV_8U; +} + +cv::Mat PyArrayToCvMat(pybind11::array& pyarray) { + // auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype()); + auto cv_type = NumpyDataTypeToOpenCvTypeV2(pyarray); + FDASSERT( + pyarray.ndim() == 3, + "Require rank of array to be 3 with HWC format while converting it to " + "cv::Mat."); + int channel = *(pyarray.shape() + 2); + int height = *(pyarray.shape()); + int width = *(pyarray.shape() + 1); + return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel), + pyarray.mutable_data()); +} +#endif + +PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) { + m.doc() = + "Make programer easier to deploy deeplearning model, save time to save " + "the world!"; + + m.def("set_logger", &SetLogger); + + BindFDTensor(m); + BindRuntime(m); + BindFDModel(m); +#ifdef ENABLE_VISION + auto vision_module = + m.def_submodule("vision", "Vision module of UltraInfer."); + BindVision(vision_module); + auto pipeline_module = + m.def_submodule("pipeline", "Pipeline module of UltraInfer."); + BindPipeline(pipeline_module); +#endif + +#ifdef ENABLE_TEXT + auto text_module = + m.def_submodule("text", "Text module of UltraInfer."); + BindText(text_module); +#endif +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/pybind/main.h b/libs/ultrainfer/ultrainfer/pybind/main.h new file mode 100755 index 0000000000..ed99bf02a7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pybind/main.h @@ -0,0 +1,135 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include + +#include "ultrainfer/runtime/runtime.h" + +#ifdef ENABLE_VISION +#include "ultrainfer/pipeline.h" +#include "ultrainfer/vision.h" +#endif + +#ifdef ENABLE_TEXT +#include "ultrainfer/text.h" +#endif + +#include "ultrainfer/core/float16.h" + +namespace ultrainfer { + +void BindBackend(pybind11::module &); +void BindVision(pybind11::module &); +void BindText(pybind11::module &m); +void BindPipeline(pybind11::module &m); +void BindRKNPU2Config(pybind11::module &); + +pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType &fd_dtype); + +FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype &np_dtype); + +void PyArrayToTensor(pybind11::array &pyarray, FDTensor *tensor, + bool share_buffer = false); +void PyArrayToTensorList(std::vector &pyarray, + std::vector *tensor, + bool share_buffer = false); +pybind11::array TensorToPyArray(const FDTensor &tensor); + +#ifdef ENABLE_VISION +cv::Mat PyArrayToCvMat(pybind11::array &pyarray); +#endif + +template FDDataType CTypeToFDDataType() { + if (std::is_same::value) { + return FDDataType::INT32; + } else if (std::is_same::value) { + return FDDataType::INT64; + } else if (std::is_same::value) { + return FDDataType::FP32; + } else if (std::is_same::value) { + return FDDataType::FP64; + } else if (std::is_same::value) { + return FDDataType::INT8; + } + FDASSERT(false, "CTypeToFDDataType only support " + "int8/int32/int64/float32/float64 now."); + return FDDataType::FP32; +} + +template +std::vector +PyBackendInfer(T &self, const std::vector &names, + std::vector &data) { + std::vector inputs(data.size()); + for (size_t i = 0; i < data.size(); ++i) { + // TODO(jiangjiajun) here is considered to use user memory directly + auto dtype = NumpyDataTypeToFDDataType(data[i].dtype()); + std::vector data_shape; + data_shape.insert(data_shape.begin(), data[i].shape(), + data[i].shape() + data[i].ndim()); + inputs[i].Resize(data_shape, dtype); + memcpy(inputs[i].MutableData(), data[i].mutable_data(), data[i].nbytes()); + inputs[i].name = names[i]; + } + + std::vector outputs(self.NumOutputs()); + self.Infer(inputs, &outputs); + + std::vector results; + results.reserve(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype); + results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape)); + memcpy(results[i].mutable_data(), outputs[i].Data(), + outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype)); + } + return results; +} + +} // namespace ultrainfer + +namespace pybind11 { +namespace detail { + +// Note: use same enum number of float16 in numpy. +// import numpy as np +// print np.dtype(np.float16).num # 23 +constexpr int NPY_FLOAT16_ = 23; + +// Note: Since float16 is not a builtin type in C++, we register +// ultrainfer::float16 as numpy.float16. +// Ref: https://github.com/pybind/pybind11/issues/1776 +template <> struct npy_format_descriptor { + static pybind11::dtype dtype() { + handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_); + return reinterpret_borrow(ptr); + } + static std::string format() { + // Note: "e" represents float16. + // Details at: + // https://docs.python.org/3/library/struct.html#format-characters. + return "e"; + } + static constexpr auto name = _("float16"); +}; + +} // namespace detail +} // namespace pybind11 diff --git a/libs/ultrainfer/ultrainfer/pybind/runtime.cc b/libs/ultrainfer/ultrainfer/pybind/runtime.cc new file mode 100755 index 0000000000..43797f58da --- /dev/null +++ b/libs/ultrainfer/ultrainfer/pybind/runtime.cc @@ -0,0 +1,172 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindOption(pybind11::module &m); + +void BindRuntime(pybind11::module &m) { + BindOption(m); + + pybind11::class_(m, "TensorInfo") + .def_readwrite("name", &TensorInfo::name) + .def_readwrite("shape", &TensorInfo::shape) + .def_readwrite("dtype", &TensorInfo::dtype); + + pybind11::class_(m, "Runtime") + .def(pybind11::init()) + .def("init", &Runtime::Init) + .def("compile", + [](Runtime &self, + std::vector> &warm_datas, + const RuntimeOption &_option) { + size_t rows = warm_datas.size(); + size_t columns = warm_datas[0].size(); + std::vector> warm_tensors( + rows, std::vector(columns)); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < columns; ++j) { + auto dtype = + NumpyDataTypeToFDDataType(warm_datas[i][j].dtype()); + std::vector data_shape; + data_shape.insert(data_shape.begin(), warm_datas[i][j].shape(), + warm_datas[i][j].shape() + + warm_datas[i][j].ndim()); + warm_tensors[i][j].Resize(data_shape, dtype); + memcpy(warm_tensors[i][j].MutableData(), + warm_datas[i][j].mutable_data(), + warm_datas[i][j].nbytes()); + } + } + return self.Compile(warm_tensors); + }) + .def("infer", + [](Runtime &self, std::map &data) { + std::vector inputs(data.size()); + int index = 0; + for (auto iter = data.begin(); iter != data.end(); ++iter) { + std::vector data_shape; + data_shape.insert(data_shape.begin(), iter->second.shape(), + iter->second.shape() + iter->second.ndim()); + auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype()); + // TODO(jiangjiajun) Maybe skip memory copy is a better choice + // use SetExternalData + inputs[index].Resize(data_shape, dtype); + memcpy(inputs[index].MutableData(), iter->second.mutable_data(), + iter->second.nbytes()); + inputs[index].name = iter->first; + index += 1; + } + + std::vector outputs(self.NumOutputs()); + self.Infer(inputs, &outputs); + + std::vector results; + results.reserve(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype); + results.emplace_back( + pybind11::array(numpy_dtype, outputs[i].shape)); + memcpy(results[i].mutable_data(), outputs[i].Data(), + outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype)); + } + return results; + }) + .def("infer", + [](Runtime &self, std::map &data) { + std::vector inputs; + inputs.reserve(data.size()); + for (auto iter = data.begin(); iter != data.end(); ++iter) { + FDTensor tensor; + tensor.SetExternalData(iter->second.Shape(), + iter->second.Dtype(), iter->second.Data(), + iter->second.device); + tensor.name = iter->first; + inputs.push_back(tensor); + } + std::vector outputs; + if (!self.Infer(inputs, &outputs)) { + throw std::runtime_error("Failed to inference with Runtime."); + } + return outputs; + }) + .def("infer", + [](Runtime &self, std::vector &inputs) { + std::vector outputs; + self.Infer(inputs, &outputs); + return outputs; + }) + .def("bind_input_tensor", &Runtime::BindInputTensor) + .def("bind_output_tensor", &Runtime::BindOutputTensor) + .def("infer", [](Runtime &self) { self.Infer(); }) + .def("get_output_tensor", + [](Runtime &self, const std::string &name) { + FDTensor *output = self.GetOutputTensor(name); + if (output == nullptr) { + return pybind11::cast(nullptr); + } + return pybind11::cast(*output); + }) + .def("num_inputs", &Runtime::NumInputs) + .def("num_outputs", &Runtime::NumOutputs) + .def("get_input_info", &Runtime::GetInputInfo) + .def("get_output_info", &Runtime::GetOutputInfo) + .def("get_profile_time", &Runtime::GetProfileTime) + .def_readonly("option", &Runtime::option); + + pybind11::enum_(m, "Backend", pybind11::arithmetic(), + "Backend for inference.") + .value("UNKOWN", Backend::UNKNOWN) + .value("ORT", Backend::ORT) + .value("TRT", Backend::TRT) + .value("POROS", Backend::POROS) + .value("PDINFER", Backend::PDINFER) + .value("RKNPU2", Backend::RKNPU2) + .value("SOPHGOTPU", Backend::SOPHGOTPU) + .value("TVM", Backend::TVM) + .value("LITE", Backend::LITE); + pybind11::enum_(m, "ModelFormat", pybind11::arithmetic(), + "ModelFormat for inference.") + .value("PADDLE", ModelFormat::PADDLE) + .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT) + .value("RKNN", ModelFormat::RKNN) + .value("SOPHGO", ModelFormat::SOPHGO) + .value("ONNX", ModelFormat::ONNX) + .value("TVMFormat", ModelFormat::TVMFormat); + pybind11::enum_(m, "Device", pybind11::arithmetic(), + "Device for inference.") + .value("CPU", Device::CPU) + .value("GPU", Device::GPU) + .value("IPU", Device::IPU) + .value("RKNPU", Device::RKNPU) + .value("SOPHGOTPU", Device::SOPHGOTPUD); + + pybind11::enum_(m, "FDDataType", pybind11::arithmetic(), + "Data type of UltraInfer.") + .value("BOOL", FDDataType::BOOL) + .value("INT8", FDDataType::INT8) + .value("INT16", FDDataType::INT16) + .value("INT32", FDDataType::INT32) + .value("INT64", FDDataType::INT64) + .value("FP16", FDDataType::FP16) + .value("FP32", FDDataType::FP32) + .value("FP64", FDDataType::FP64) + .value("UINT8", FDDataType::UINT8); + + m.def("get_available_backends", []() { return GetAvailableBackends(); }); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime.h b/libs/ultrainfer/ultrainfer/runtime.h new file mode 100755 index 0000000000..ccd529dd34 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime.h @@ -0,0 +1,23 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file runtime.h + \brief A brief file description. + + More details + */ + +#pragma once +#include "ultrainfer/core/config.h" +#include "ultrainfer/runtime/runtime.h" diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/backend.h new file mode 100755 index 0000000000..ec9b720381 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/backend.h @@ -0,0 +1,158 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "ultrainfer/benchmark/benchmark.h" +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/core/fd_type.h" +#include "ultrainfer/runtime/runtime_option.h" + +namespace ultrainfer { + +/*! @brief Information of Tensor + */ +struct TensorInfo { + std::string name; ///< Name of tensor + std::vector shape; ///< Shape of tensor + FDDataType dtype; ///< Data type of tensor + + friend std::ostream &operator<<(std::ostream &output, + const TensorInfo &info) { + output << "TensorInfo(name: " << info.name << ", shape: ["; + for (size_t i = 0; i < info.shape.size(); ++i) { + if (i == info.shape.size() - 1) { + output << info.shape[i]; + } else { + output << info.shape[i] << ", "; + } + } + output << "], dtype: " << Str(info.dtype) << ")"; + return output; + } +}; + +class BaseBackend { +public: + bool initialized_ = false; + + BaseBackend() {} + virtual ~BaseBackend() = default; + + virtual bool Initialized() const { return initialized_; } + + virtual bool Init(const RuntimeOption &option) { + FDERROR << "Not Implement for " << option.backend << " in " << option.device + << "." << std::endl; + return false; + } + + // Get number of inputs of the model + virtual int NumInputs() const = 0; + // Get number of outputs of the model + virtual int NumOutputs() const = 0; + // Get information of input tensor + virtual TensorInfo GetInputInfo(int index) = 0; + // Get information of output tensor + virtual TensorInfo GetOutputInfo(int index) = 0; + // Get information of all the input tensors + virtual std::vector GetInputInfos() = 0; + // Get information of all the output tensors + virtual std::vector GetOutputInfos() = 0; + + // if copy_to_fd is true, copy memory data to FDTensor + // else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it) + virtual bool Infer(std::vector &inputs, + std::vector *outputs, + bool copy_to_fd = true) = 0; + // Optional: For those backends which can share memory + // while creating multiple inference engines with same model file + virtual std::unique_ptr Clone(RuntimeOption &runtime_option, + void *stream = nullptr, + int device_id = -1) { + FDERROR << "Clone no support " << runtime_option.backend << " " << stream + << " " << device_id << std::endl; + return nullptr; + } + + benchmark::BenchmarkOption benchmark_option_; + benchmark::BenchmarkResult benchmark_result_; +}; + +/** \brief Macros for Runtime benchmark profiling. + * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN' + * indicates that the least number of times the loop + * will repeat when profiling mode is not enabled. + * In most cases, the value should be 1, i.e., results are + * obtained by running the inference process once, when + * the profile mode is turned off, such as ONNX Runtime, + * OpenVINO, TensorRT, Paddle Inference, Paddle Lite, + * RKNPU2, SOPHGO etc. + * + * example code @code + * // OpenVINOBackend::Infer + * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + * // do something .... + * RUNTIME_PROFILE_LOOP_BEGIN(1) + * // The codes which wrapped by 'BEGIN(1) ~ END' scope + * // will only run once when profiling mode is not enabled. + * request_.infer(); + * RUNTIME_PROFILE_LOOP_END + * // do something .... + * RUNTIME_PROFILE_LOOP_H2D_D2H_END + * + * @endcode In this case, No global variables inside a function + * are wrapped by BEGIN and END, which may be required for + * subsequent tasks. But, some times we need to set 'base_loop' + * as 0, such as POROS. + * + * * example code @code + * // PorosBackend::Infer + * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + * // do something .... + * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0 + * // The codes which wrapped by 'BEGIN(0) ~ END' scope + * // will not run when profiling mode is not enabled. + * auto poros_outputs = _poros_module->forward(poros_inputs); + * RUNTIME_PROFILE_LOOP_END + * // Run another inference beyond the scope of 'BEGIN ~ END' + * // to get valid outputs for subsequent tasks. + * auto poros_outputs = _poros_module->forward(poros_inputs); + * // do something .... will use 'poros_outputs' ... + * if (poros_outputs.isTensor()) { + * // ... + * } + * RUNTIME_PROFILE_LOOP_H2D_D2H_END + * + * @endcode In this case, 'poros_outputs' inside a function + * are wrapped by BEGIN and END, which may be required for + * subsequent tasks. So, we set 'base_loop' as 0 and lanuch + * another infer to get the valid outputs beyond the scope + * of 'BEGIN ~ END' for subsequent tasks. + */ + +#define RUNTIME_PROFILE_LOOP_BEGIN(base_loop) \ + __RUNTIME_PROFILE_LOOP_BEGIN(benchmark_option_, (base_loop)) +#define RUNTIME_PROFILE_LOOP_END __RUNTIME_PROFILE_LOOP_END(benchmark_result_) +#define RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN \ + __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(benchmark_option_, 1) +#define RUNTIME_PROFILE_LOOP_H2D_D2H_END \ + __RUNTIME_PROFILE_LOOP_H2D_D2H_END(benchmark_result_) + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu new file mode 100755 index 0000000000..c60d274fb0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu @@ -0,0 +1,99 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef WITH_GPU + +#include "adaptive_pool2d_kernel.h" + +namespace ultrainfer { +template +__global__ void CudaCastKernel(const T1 *in, T2 *out, int edge, + int out_bc_offset, int in_bc_offset, int ih, + int iw, int oh, int ow, bool is_avg) { + int position = blockDim.x * blockIdx.x + threadIdx.x; + if (position >= edge) { + return; + } + int offset = floorf(float(position) / out_bc_offset); + int h = floorf(float(position % out_bc_offset) / ow); + int w = (position % out_bc_offset) % ow; + int hstart = floorf(static_cast(h * ih) / oh); + int hend = ceilf(static_cast((h + 1) * ih) / oh); + int wstart = floorf(static_cast(w * iw) / ow); + int wend = ceilf(static_cast((w + 1) * iw) / ow); + float ele_val = 0.0; + if (is_avg) { + ele_val = 0.0; + } else { + ele_val = + static_cast(in[offset * in_bc_offset + hstart * iw + wstart]); + } + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int input_idx = h * iw + w; + if (is_avg) { + ele_val = + ele_val + static_cast(in[offset * in_bc_offset + input_idx]); + } else { + ele_val = + (ele_val > + static_cast(in[offset * in_bc_offset + input_idx])) + ? ele_val + : static_cast(in[offset * in_bc_offset + input_idx]); + } + } + } + out[position] = static_cast( + ele_val / static_cast(((hend - hstart) * (wend - wstart)))); +} + +void CudaAdaptivePool(const std::vector &input_dims, + const std::vector &output_dims, void *output, + const void *input, void *compute_stream, + const std::string &pooling_type, const std::string &dtype, + const std::string &out_dtype) { + auto casted_compute_stream = reinterpret_cast(compute_stream); + int out_bc_offset = output_dims[2] * output_dims[3]; + int in_bc_offset = input_dims[2] * input_dims[3]; + int jobs = 1; + for (int i : output_dims) { + jobs *= i; + } + bool is_avg = pooling_type == "avg"; + int threads = 256; + int blocks = ceil(jobs / static_cast(threads)); + if (dtype == "float") { + CudaCastKernel<<>>( + static_cast(input), static_cast(output), jobs, + out_bc_offset, in_bc_offset, int(input_dims[2]), int(input_dims[3]), + int(output_dims[2]), int(output_dims[3]), is_avg); + } else if (dtype == "half") { + if (out_dtype == "half") { + CudaCastKernel<<>>( + static_cast(input), static_cast(output), jobs, + out_bc_offset, in_bc_offset, int(input_dims[2]), int(input_dims[3]), + int(output_dims[2]), int(output_dims[3]), is_avg); + } + if (out_dtype == "float") { + CudaCastKernel + <<>>( + static_cast(input), static_cast(output), + jobs, out_bc_offset, in_bc_offset, int(input_dims[2]), + int(input_dims[3]), int(output_dims[2]), int(output_dims[3]), + is_avg); + } + } +} +} // namespace ultrainfer +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h new file mode 100755 index 0000000000..10899e96b3 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h @@ -0,0 +1,35 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace ultrainfer { + +void CudaAdaptivePool(const std::vector &input_dims, + const std::vector &output_dims, void *output, + const void *input, void *compute_stream, + const std::string &pooling_type, + const std::string &dtype = "float", + const std::string &out_dtype = "float"); + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc new file mode 100755 index 0000000000..902014b165 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc @@ -0,0 +1,399 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/horizon/horizon_backend.h" +namespace ultrainfer { + +HorizonBackend::~HorizonBackend() { + int ret = -1; + // Release memory uniformly here + if (input_properties_ != nullptr) { + free(input_properties_); + } + if (output_properties_ != nullptr) { + free(output_properties_); + } + if (input_mems_ == nullptr) { + return; + } + for (int i = 0; i < NumInputs(); i++) { + + ret = hbSysFreeMem(&(input_mems_[i].sysMem[0])); + + if (ret != 0) { + FDERROR << "release input mem fail! ret=" << ret << std::endl; + } + if (input_mems_ != nullptr) { + free(input_mems_); + } + } + + for (int i = 0; i < NumOutputs(); i++) { + ret = hbSysFreeMem(&(output_mems_[i].sysMem[0])); + + if (ret != 0) { + FDERROR << "release output mem fail! ret=" << ret << std::endl; + } + if (output_mems_ != nullptr) { + free(output_mems_); + } + } + ret = hbDNNRelease(packed_dnn_handle_); + if (ret != 0) { + FDERROR << "hbDNNRelease fail! ret=" << ret << std::endl; + } +} + +bool HorizonBackend::GetModelInputOutputInfos() { + const char **model_name_list; + int model_count = 0; + int ret; + // get model name + ret = + hbDNNGetModelNameList(&model_name_list, &model_count, packed_dnn_handle_); + if (ret != 0) { + FDERROR << "get model name fail! ret=" << ret << std::endl; + return false; + } + // get dnn handle + ret = + hbDNNGetModelHandle(&dnn_handle_, packed_dnn_handle_, model_name_list[0]); + if (ret != 0) { + FDERROR << "get dnn handle fail! ret=" << ret << std::endl; + return false; + } + // get input infos + // Get detailed input parameters + int input_count = 0; + ret = hbDNNGetInputCount(&input_count, dnn_handle_); + if (ret != 0) { + FDERROR << "get input count fail! ret=" << ret << std::endl; + return false; + } + input_properties_ = (hbDNNTensorProperties *)malloc( + sizeof(hbDNNTensorProperties) * input_count); + memset(input_properties_, 0, input_count * sizeof(hbDNNTensorProperties)); + + inputs_desc_.resize(input_count); + + // get input info and copy to input tensor info + for (uint32_t i = 0; i < input_count; i++) { + ret = hbDNNGetInputTensorProperties(&input_properties_[i], dnn_handle_, i); + + if (ret != 0) { + FDERROR << "get input tensor properties fail! ret=" << ret << std::endl; + return false; + } + + if ((input_properties_[i].tensorLayout != HB_DNN_LAYOUT_NHWC)) { + FDERROR << "horizon_backend only support input layout is NHWC" + << std::endl; + } + if (input_properties_[i].tensorType != HB_DNN_IMG_TYPE_RGB) { + FDERROR << "horizon_backend only support input format is RGB" + << std::endl; + } + + const char *name; + + ret = hbDNNGetInputName(&name, dnn_handle_, i); + if (ret != 0) { + FDERROR << "get input tensor name fail! ret=" << ret << std::endl; + return false; + } + // copy input proper to input tensor info + std::string temp_name = name; + std::vector temp_shape{}; + int n_dims = input_properties_[i].validShape.numDimensions; + + temp_shape.resize(n_dims); + for (int j = 0; j < n_dims; j++) { + temp_shape[j] = (int)input_properties_[i].validShape.dimensionSize[j]; + } + + // Only support RGB format, so input type is UINT8 + FDDataType temp_dtype = FDDataType::UINT8; + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + inputs_desc_[i] = temp_input_info; + } + + // get output infos + // Get detailed output parameters + int output_count = 0; + ret = hbDNNGetOutputCount(&output_count, dnn_handle_); + if (ret != 0) { + FDERROR << "get output count fail! ret=" << ret << std::endl; + return false; + } + output_properties_ = (hbDNNTensorProperties *)malloc( + sizeof(hbDNNTensorProperties) * output_count); + memset(output_properties_, 0, output_count * sizeof(hbDNNTensorProperties)); + + outputs_desc_.resize(output_count); + + for (uint32_t i = 0; i < output_count; i++) { + // get model output size + ret = + hbDNNGetOutputTensorProperties(&output_properties_[i], dnn_handle_, i); + + const char *name; + ret = hbDNNGetOutputName(&name, dnn_handle_, i); + if (ret != 0) { + FDERROR << "get output tensor name fail! ret=" << ret << std::endl; + return false; + } + + // copy output proper to output tensor info + std::string temp_name = name; + std::vector temp_shape{}; + int n_dims = output_properties_[i].validShape.numDimensions; + + if ((n_dims == 4) && + (output_properties_[i].validShape.dimensionSize[3] == 1)) { + n_dims--; + } + temp_shape.resize(n_dims); + for (int j = 0; j < n_dims; j++) { + temp_shape[j] = (int)output_properties_[i].validShape.dimensionSize[j]; + } + + FDDataType temp_dtype = + HorizonTensorTypeToFDDataType(output_properties_[i].tensorType); + + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + outputs_desc_[i] = temp_input_info; + } + + return true; +} + +TensorInfo HorizonBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()); + return inputs_desc_[index]; +} + +std::vector HorizonBackend::GetInputInfos() { return inputs_desc_; } + +TensorInfo HorizonBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()); + + return outputs_desc_[index]; +} + +std::vector HorizonBackend::GetOutputInfos() { + return outputs_desc_; +} + +bool HorizonBackend::LoadModel(const char *model) { + int ret = -1; + ret = hbDNNInitializeFromFiles(&packed_dnn_handle_, &model, 1); + if (ret != 0) { + FDERROR << "horizon_init fail! ret=" << ret << std::endl; + return false; + } + return true; +} +bool HorizonBackend::Init(const RuntimeOption &runtime_option) { + // Init model from file + if (!LoadModel((char *)runtime_option.model_file.data())) { + FDERROR << "load model failed" << std::endl; + return false; + } + + // GetModelInputOutputInfos + if (!GetModelInputOutputInfos()) { + FDERROR << "get model input output infos failed" << std::endl; + return false; + } + + return true; +} + +bool HorizonBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + + // Judge whether the input and output size are the same + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[HorizonBackend] Size of the inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + return false; + } + RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + int ret = -1; + if (!infer_init_) { + // Create input tensor memory + int input_count = NumInputs(); + int output_count = NumOutputs(); + + input_mems_ = (hbDNNTensor *)malloc(sizeof(hbDNNTensor) * input_count); + output_mems_ = (hbDNNTensor *)malloc(sizeof(hbDNNTensor) * output_count); + + for (uint32_t i = 0; i < input_count; i++) { + input_mems_[i].properties = input_properties_[i]; + + input_mems_[i].properties.alignedShape = + input_mems_[i].properties.validShape; + + auto current_shape = GetInputInfo(i).shape; + auto &mem = input_mems_[i].sysMem[0]; + int intput_memSize = input_properties_[i].alignedByteSize; + + ret = hbSysAllocCachedMem(&mem, intput_memSize); + + if (ret != 0) { + FDERROR << "hbSysAllocCachedMem fails." << std::endl; + return false; + } + } + + for (uint32_t i = 0; i < output_count; i++) { + + output_mems_[i].properties = output_properties_[i]; + + auto current_shape = GetOutputInfo(i).shape; + auto &mem = output_mems_[i].sysMem[0]; + int output_memSize = output_properties_[i].alignedByteSize; + + ret = hbSysAllocCachedMem(&mem, output_memSize); + if (ret != 0) { + FDERROR << "hbSysAllocCachedMem fails." << std::endl; + return false; + } + } + infer_init_ = true; + } + // Copy input data to input tensor memory + for (uint32_t i = 0; i < NumInputs(); i++) { + if (inputs[i].Data() == nullptr) { + FDERROR << "inputs[i].Data is NULL." << std::endl; + return false; + } + auto &mem = input_mems_[i].sysMem[0]; + + memcpy(mem.virAddr, inputs[i].Data(), inputs[i].Nbytes()); + ret = hbSysFlushMem(&mem, HB_SYS_MEM_CACHE_CLEAN); + if (ret != 0) { + FDERROR << "hbSysFlushMem fails." << std::endl; + return false; + } + } + + hbDNNTaskHandle_t task_handle = nullptr; + hbDNNInferCtrlParam infer_ctrl_param; + HB_DNN_INITIALIZE_INFER_CTRL_PARAM(&infer_ctrl_param); + + RUNTIME_PROFILE_LOOP_BEGIN(1) + ret = hbDNNInfer(&task_handle, &output_mems_, input_mems_, dnn_handle_, + &infer_ctrl_param); + RUNTIME_PROFILE_LOOP_END + if (ret != 0) { + FDERROR << "hbDNNInference fails." << std::endl; + return false; + } + ret = hbDNNWaitTaskDone(task_handle, 0); + if (ret != 0) { + FDERROR << "hbDNNWaitTaskDone fails." << std::endl; + return false; + } + ret = hbDNNReleaseTask(task_handle); + if (ret != 0) { + FDERROR << "hbDNNReleaseTask fails." << std::endl; + return false; + } + // get result + outputs->resize(outputs_desc_.size()); + std::vector temp_shape(4); + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + temp_shape.resize(outputs_desc_[i].shape.size()); + for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { + temp_shape[j] = outputs_desc_[i].shape[j]; + } + (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, + outputs_desc_[i].name); + + hbSysFlushMem(&(output_mems_[i].sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE); + auto data = (float *)(output_mems_[i].sysMem[0].virAddr); + + auto shift = output_mems_[i].properties.shift.shiftData; + auto scale = output_mems_[i].properties.scale.scaleData; + + for (int j = 0; j < (*outputs)[i].Nbytes(); j++) { + if (output_mems_[i].properties.quantiType == SHIFT) { + data[j] = data[j] / (1 << shift[j]); + } else if (output_mems_[i].properties.quantiType == SCALE) { + data[j] = data[j] * scale[j]; + } + } + + memcpy((*outputs)[i].MutableData(), + (float *)output_mems_[i].sysMem[0].virAddr, (*outputs)[i].Nbytes()); + } + RUNTIME_PROFILE_LOOP_H2D_D2H_END + return true; +} + +FDDataType HorizonBackend::HorizonTensorTypeToFDDataType(int32_t type) { + if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_F16) { + return FDDataType::FP16; + } + if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_F32) { + return FDDataType::FP32; + } + if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S8) { + return FDDataType::INT8; + } + if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S16) { + return FDDataType::INT16; + } + if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S32) { + return FDDataType::INT32; + } + if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_U8) { + return FDDataType::UINT8; + } + + FDERROR << "FDDataType don't support this type" << std::endl; + return FDDataType::UNKNOWN1; +} + +hbDNNDataType HorizonBackend::FDDataTypeToHorizonTensorType(FDDataType type) { + if (type == FDDataType::FP16) { + return hbDNNDataType::HB_DNN_TENSOR_TYPE_F16; + } + if (type == FDDataType::FP32) { + return hbDNNDataType::HB_DNN_TENSOR_TYPE_F32; + } + if (type == FDDataType::INT8) { + return hbDNNDataType::HB_DNN_TENSOR_TYPE_S8; + } + if (type == FDDataType::INT16) { + return hbDNNDataType::HB_DNN_TENSOR_TYPE_S16; + } + if (type == FDDataType::INT32) { + return hbDNNDataType::HB_DNN_TENSOR_TYPE_S32; + } + if (type == FDDataType::UINT8) { + return hbDNNDataType::HB_DNN_TENSOR_TYPE_U8; + } + FDERROR << "horizon_tensor_type don't support this type" << std::endl; + + return hbDNNDataType::HB_DNN_TENSOR_TYPE_MAX; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h new file mode 100755 index 0000000000..1073b2e2d0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include "dnn/hb_dnn.h" +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/runtime/backends/backend.h" + +namespace ultrainfer { +class HorizonBackend : public BaseBackend { +public: + HorizonBackend() = default; + ~HorizonBackend(); + + // Horizon Backend implementation. + bool Init(const RuntimeOption &runtime_option); + + int NumInputs() const override { + return static_cast(inputs_desc_.size()); + } + + int NumOutputs() const override { + return static_cast(outputs_desc_.size()); + } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + +private: + hbPackedDNNHandle_t packed_dnn_handle_; + hbDNNHandle_t dnn_handle_; + hbDNNTensorProperties *input_properties_ = nullptr; + hbDNNTensorProperties *output_properties_ = nullptr; + hbDNNTensor *input_mems_; + hbDNNTensor *output_mems_; + + bool infer_init_ = false; + std::vector inputs_desc_; + std::vector outputs_desc_; + bool GetModelInputOutputInfos(); + bool LoadModel(const char *model); + + static FDDataType HorizonTensorTypeToFDDataType(int32_t type); + static hbDNNDataType FDDataTypeToHorizonTensorType(FDDataType type); +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc b/libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc new file mode 100755 index 0000000000..183e65de4a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc @@ -0,0 +1,171 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/lite/lite_backend.h" + +#include + +namespace ultrainfer { + +#if defined(__arm__) || defined(__aarch64__) +#define FD_LITE_HOST TARGET(kARM) +#elif defined(__x86_64__) +#define FD_LITE_HOST TARGET(kX86) +#endif + +std::vector +GetPlacesForCpu(const LiteBackendOption &option) { + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kInt8)}); + if (option.enable_fp16) { + paddle::lite_api::MobileConfig check_fp16_config; + if (check_fp16_config.check_fp16_valid()) { + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFP16)}); + } else { + FDWARNING << "Current CPU doesn't support float16 precision, will " + "fallback to float32." + << std::endl; + } + } + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFloat)}); + return valid_places; +} + +void LiteBackend::ConfigureCpu(const LiteBackendOption &option) { + config_.set_valid_places(GetPlacesForCpu(option)); +} + +void LiteBackend::ConfigureGpu(const LiteBackendOption &option) { + std::vector valid_places; + if (option.enable_fp16) { + valid_places.emplace_back(paddle::lite_api::Place{ + TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)}); + valid_places.emplace_back(paddle::lite_api::Place{ + TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageFolder)}); + } + valid_places.emplace_back( + paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kFloat)}); + valid_places.emplace_back(paddle::lite_api::Place{ + TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)}); + valid_places.emplace_back(paddle::lite_api::Place{ + TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageFolder)}); + valid_places.emplace_back( + paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kAny)}); + valid_places.emplace_back( + paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kInt32)}); + valid_places.emplace_back( + paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); + valid_places.emplace_back( + paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)}); + config_.set_valid_places(valid_places); +} + +void LiteBackend::ConfigureKunlunXin(const LiteBackendOption &option) { + std::vector valid_places; + // TODO(yeliang): Placing kInt8 first may cause accuracy issues of some model + // valid_places.push_back( + // paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); + if (option.enable_fp16) { + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)}); + } + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)}); + + config_.set_xpu_dev_per_thread(option.device_id); + config_.set_xpu_workspace_l3_size_per_thread( + option.kunlunxin_l3_workspace_size); + config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size, + option.kunlunxin_locked); + config_.set_xpu_l3_cache_autotune(option.kunlunxin_autotune); + config_.set_xpu_conv_autotune(option.kunlunxin_autotune, + option.kunlunxin_autotune_file); + config_.set_xpu_multi_encoder_method(option.kunlunxin_precision, + option.kunlunxin_adaptive_seqlen); + config_.set_xpu_gm_workspace_method(option.kunlunxin_gm_default_size); + if (option.kunlunxin_enable_multi_stream) { + config_.enable_xpu_multi_stream(); + } + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); +} + +void LiteBackend::ConfigureTimvx(const LiteBackendOption &option) { + config_.set_nnadapter_device_names({"verisilicon_timvx"}); + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); + ConfigureNNAdapter(option); +} + +void LiteBackend::ConfigureAscend(const LiteBackendOption &option) { + config_.set_nnadapter_device_names({"huawei_ascend_npu"}); + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); + ConfigureNNAdapter(option); +} + +void LiteBackend::ConfigureNNAdapter(const LiteBackendOption &option) { + if (!option.nnadapter_subgraph_partition_config_path.empty()) { + std::vector nnadapter_subgraph_partition_config_buffer; + if (ReadFile(option.nnadapter_subgraph_partition_config_path, + &nnadapter_subgraph_partition_config_buffer, false)) { + if (!nnadapter_subgraph_partition_config_buffer.empty()) { + std::string nnadapter_subgraph_partition_config_string( + nnadapter_subgraph_partition_config_buffer.data(), + nnadapter_subgraph_partition_config_buffer.size()); + config_.set_nnadapter_subgraph_partition_config_buffer( + nnadapter_subgraph_partition_config_string); + } + } + } + + if (!option.nnadapter_context_properties.empty()) { + config_.set_nnadapter_context_properties( + option.nnadapter_context_properties); + } + + if (!option.nnadapter_model_cache_dir.empty()) { + config_.set_nnadapter_model_cache_dir(option.nnadapter_model_cache_dir); + } + + if (!option.nnadapter_mixed_precision_quantization_config_path.empty()) { + config_.set_nnadapter_mixed_precision_quantization_config_path( + option.nnadapter_mixed_precision_quantization_config_path); + } + + if (!option.nnadapter_subgraph_partition_config_path.empty()) { + config_.set_nnadapter_subgraph_partition_config_path( + option.nnadapter_subgraph_partition_config_path); + } + + config_.set_nnadapter_dynamic_shape_info(option.nnadapter_dynamic_shape_info); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc new file mode 100755 index 0000000000..9f7167fceb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc @@ -0,0 +1,298 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/lite/lite_backend.h" +// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 +// When compiling the UltraInfer dynamic library, namely, +// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite +// static library, you need to include the fake registration +// codes of Paddle Lite. When you compile the UltraInfer static +// library and depends on the Paddle Lite static library, +// WITH_STATIC_LIB=ON, you do not need to include the fake +// registration codes for Paddle Lite, but wait until you +// use the UltraInfer static library. +#if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB))) +#warning You are compiling the UltraInfer dynamic library with \ +Paddle Lite static lib We will automatically add some registration \ +codes for ops, kernels and passes for Paddle Lite. +#include "paddle_use_kernels.h" // NOLINT +#include "paddle_use_ops.h" // NOLINT +#include "paddle_use_passes.h" // NOLINT +#endif + +#include + +namespace ultrainfer { + +void LiteBackend::BuildOption(const LiteBackendOption &option) { + option_ = option; + + if (option_.device == Device::CPU) { + ConfigureCpu(option_); + } else if (option_.device == Device::GPU) { + ConfigureGpu(option_); + } else if (option_.device == Device::TIMVX) { + ConfigureTimvx(option_); + } else if (option_.device == Device::KUNLUNXIN) { + ConfigureKunlunXin(option_); + } else if (option_.device == Device::ASCEND) { + ConfigureAscend(option_); + } + if (option_.cpu_threads > 0) { + config_.set_threads(option_.cpu_threads); + } + if (option_.power_mode > 0) { + config_.set_power_mode( + static_cast(option_.power_mode)); + } +} + +bool LiteBackend::Init(const RuntimeOption &runtime_option) { + if (initialized_) { + FDERROR << "LiteBackend is already initialized, cannot initialize again." + << std::endl; + return false; + } + + if (runtime_option.model_format != ModelFormat::PADDLE) { + FDERROR + << "PaddleLiteBackend only supports model format PADDLE, but now it's " + << runtime_option.model_format << "." << std::endl; + return false; + } + if (runtime_option.device != Device::CPU && + runtime_option.device != Device::GPU && + runtime_option.device != Device::KUNLUNXIN && + runtime_option.device != Device::ASCEND && + runtime_option.device != Device::TIMVX) { + FDERROR << "PaddleLiteBackend only supports " + "Device::CPU/Device::GPU/Device::TIMVX/Device::KUNLUNXIN/" + "Device::ASCEND, " + "but now it's " + << runtime_option.device << "." << std::endl; + return false; + } + if (runtime_option.device == Device::GPU && + !paddle::lite_api::IsOpenCLBackendValid()) { + FDERROR << "PaddleLiteBackend GPU (OpenCL) is not supported by the current " + "device." + << std::endl; + } + if (runtime_option.model_from_memory_) { + FDERROR << "PaddleLiteBackend doesn't support load model from memory, " + "please load model from disk." + << std::endl; + return false; + } + + if (runtime_option.params_file == "") { + // Use light api for Arm CPU via MobileConfig. + FDASSERT( + runtime_option.device == Device::CPU, + "In UltraInfer, Paddle Lite light API is only support for Arm CPU now!") + mobile_config_.set_model_from_file(runtime_option.model_file); + mobile_config_.set_threads(runtime_option.paddle_lite_option.cpu_threads); + mobile_config_.set_power_mode(static_cast( + runtime_option.paddle_lite_option.power_mode)); + // TODO(qiuyanjun): Add OpenCL support for mobile gpu. + // Paddle-Lite/blob/develop/lite/api/tools/benchmark/benchmark.h#L265 + // mobile_config_.set_opencl_tune( + // tune_mode, opencl_cache_dir, opencl_tuned_file); + // mobile_config_.set_opencl_precision(gpu_precision); + predictor_ = + paddle::lite_api::CreatePaddlePredictor( + mobile_config_); + } else { + // Use full api for many hardwares via CxxConfig. + config_.set_model_file(runtime_option.model_file); + config_.set_param_file(runtime_option.params_file); + BuildOption(runtime_option.paddle_lite_option); + predictor_ = + paddle::lite_api::CreatePaddlePredictor( + config_); + if (option_.optimized_model_dir != "") { + FDINFO + << "Optimzed model dir is not empty, will save optimized model to: " + << option_.optimized_model_dir << std::endl; + predictor_->SaveOptimizedModel( + option_.optimized_model_dir, + paddle::lite_api::LiteModelType::kNaiveBuffer); + } + } + + inputs_desc_.clear(); + outputs_desc_.clear(); + inputs_order_.clear(); + std::vector input_names = predictor_->GetInputNames(); + std::vector output_names = predictor_->GetOutputNames(); + for (size_t i = 0; i < input_names.size(); ++i) { + inputs_order_[input_names[i]] = i; + TensorInfo info; + auto tensor = predictor_->GetInput(i); + auto shape = tensor->shape(); + info.shape.assign(shape.begin(), shape.end()); + info.name = input_names[i]; + info.dtype = LiteDataTypeToFD(tensor->precision()); + inputs_desc_.emplace_back(info); + } + for (size_t i = 0; i < output_names.size(); ++i) { + TensorInfo info; + auto tensor = predictor_->GetOutput(i); + auto shape = tensor->shape(); + info.shape.assign(shape.begin(), shape.end()); + info.name = output_names[i]; + if (option_.device != Device::KUNLUNXIN) { + info.dtype = LiteDataTypeToFD(tensor->precision()); + } + outputs_desc_.emplace_back(info); + } + + initialized_ = true; + return true; +} + +TensorInfo LiteBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()); + return inputs_desc_[index]; +} + +std::vector LiteBackend::GetInputInfos() { return inputs_desc_; } + +TensorInfo LiteBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()); + return outputs_desc_[index]; +} + +std::vector LiteBackend::GetOutputInfos() { return outputs_desc_; } + +bool LiteBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[LiteBackend] Size of inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + return false; + } + + RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + for (size_t i = 0; i < inputs.size(); ++i) { + auto iter = inputs_order_.find(inputs[i].name); + if (iter == inputs_order_.end()) { + FDERROR << "Cannot find input with name:" << inputs[i].name + << " in loaded model." << std::endl; + return false; + } + + auto tensor = predictor_->GetInput(iter->second); + // Adjust dims only, allocate lazy. + tensor->Resize(inputs[i].shape); + if (inputs[i].dtype == FDDataType::FP32) { + tensor->CopyFromCpu( + reinterpret_cast( + const_cast(inputs[i].CpuData()))); + } else if (inputs[i].dtype == FDDataType::INT32) { + tensor->CopyFromCpu( + reinterpret_cast( + const_cast(inputs[i].CpuData()))); + } else if (inputs[i].dtype == FDDataType::INT8) { + tensor->CopyFromCpu( + reinterpret_cast( + const_cast(inputs[i].CpuData()))); + } else if (inputs[i].dtype == FDDataType::UINT8) { + tensor->CopyFromCpu( + reinterpret_cast( + const_cast(inputs[i].CpuData()))); + } else if (inputs[i].dtype == FDDataType::INT64) { +#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || \ + defined(_M_ARM64)) + tensor->CopyFromCpu( + reinterpret_cast( + const_cast(inputs[i].CpuData()))); +#else + FDASSERT(false, "FDDataType::INT64 is not support for x86/armv7 now!"); +#endif + } else { + FDASSERT(false, "Unexpected data type of %d.", inputs[i].dtype); + } + } + + RUNTIME_PROFILE_LOOP_BEGIN(1) + predictor_->Run(); + RUNTIME_PROFILE_LOOP_END + + outputs->resize(outputs_desc_.size()); + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + auto tensor = predictor_->GetOutput(i); + if (outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())) { + outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision()); + } + (*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype, + outputs_desc_[i].name); + memcpy((*outputs)[i].MutableData(), tensor->data(), + (*outputs)[i].Nbytes()); + } + RUNTIME_PROFILE_LOOP_H2D_D2H_END + return true; +} + +bool ReadFile(const std::string &filename, std::vector *contents, + bool binary) { + FILE *fp = fopen(filename.c_str(), binary ? "rb" : "r"); + if (!fp) { + FDERROR << "Cannot open file " << filename << "." << std::endl; + return false; + } + fseek(fp, 0, SEEK_END); + size_t size = ftell(fp); + fseek(fp, 0, SEEK_SET); + contents->clear(); + contents->resize(size); + size_t offset = 0; + char *ptr = reinterpret_cast(&(contents->at(0))); + while (offset < size) { + size_t already_read = fread(ptr, 1, size - offset, fp); + offset += already_read; + ptr += already_read; + } + fclose(fp); + return true; +} + +// Convert data type from paddle lite to ultrainfer +FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType &dtype) { + if (dtype == paddle::lite_api::PrecisionType::kFloat) { + return FDDataType::FP32; + } else if (dtype == paddle::lite_api::PrecisionType::kInt8) { + return FDDataType::INT8; + } else if (dtype == paddle::lite_api::PrecisionType::kInt32) { + return FDDataType::INT32; + } else if (dtype == paddle::lite_api::PrecisionType::kInt64) { + return FDDataType::INT64; + } else if (dtype == paddle::lite_api::PrecisionType::kInt16) { + return FDDataType::INT16; + } else if (dtype == paddle::lite_api::PrecisionType::kUInt8) { + return FDDataType::UINT8; + } else if (dtype == paddle::lite_api::PrecisionType::kFP64) { + return FDDataType::FP64; + } + FDASSERT(false, "Unexpected data type of %s.", + paddle::lite_api::PrecisionToStr(dtype).c_str()); + return FDDataType::FP32; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h new file mode 100755 index 0000000000..11873eada4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "paddle_api.h" // NOLINT + +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/backends/lite/option.h" +#include "ultrainfer/runtime/runtime_option.h" + +namespace ultrainfer { + +class LiteBackend : public BaseBackend { +public: + LiteBackend() {} + virtual ~LiteBackend() = default; + + bool Init(const RuntimeOption &option) override; + + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; // NOLINT + + int NumInputs() const override { return inputs_desc_.size(); } + + int NumOutputs() const override { return outputs_desc_.size(); } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + +private: + // Build CxxConfig from option for Paddle Lite full api. + void BuildOption(const LiteBackendOption &option); + // Configure many hardwares for Paddle Lite full api. + void ConfigureCpu(const LiteBackendOption &option); + void ConfigureGpu(const LiteBackendOption &option); + void ConfigureTimvx(const LiteBackendOption &option); + void ConfigureAscend(const LiteBackendOption &option); + void ConfigureKunlunXin(const LiteBackendOption &option); + void ConfigureNNAdapter(const LiteBackendOption &option); + + paddle::lite_api::CxxConfig config_; + std::shared_ptr predictor_; + paddle::lite_api::MobileConfig mobile_config_; + std::vector inputs_desc_; + std::vector outputs_desc_; + std::map inputs_order_; + LiteBackendOption option_; +}; + +// Convert data type from paddle lite to ultrainfer +FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType &dtype); + +// Helper function to read file +bool ReadFile(const std::string &filename, std::vector *contents, + bool binary = true); + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h new file mode 100755 index 0000000000..d3530be27d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_type.h" +// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 +#if (defined(WITH_LITE_STATIC) && defined(WITH_STATIC_LIB)) +// Whether to output some warning messages when using the +// FastDepoy static library, default OFF. These messages +// are only reserve for debugging. +#if defined(WITH_STATIC_WARNING) +#warning You are using the UltraInfer static library. We will automatically add some registration codes for ops, kernels and passes for Paddle Lite. // NOLINT +#endif +#if !defined(WITH_STATIC_LIB_AT_COMPILING) +#include "paddle_use_kernels.h" // NOLINT +#include "paddle_use_ops.h" // NOLINT +#include "paddle_use_passes.h" // NOLINT +#endif +#endif + +#include +#include +#include +#include +#include + +namespace ultrainfer { + +/*! Paddle Lite power mode for mobile device. */ +enum LitePowerMode { + LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode + LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode + LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode + LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode + LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode + LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode +}; + +/*! @brief Option object to configure Paddle Lite backend + */ +struct LiteBackendOption { + /// Paddle Lite power mode for mobile device. + int power_mode = 3; + // Number of threads while use CPU + int cpu_threads = 1; + /// Enable use half precision + bool enable_fp16 = false; + // Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND + Device device = Device::CPU; + // Index of inference device + int device_id = 0; + // TODO(qiuyanjun): add opencl binary path and cache settings. + std::string opencl_cache_dir = "/data/local/tmp/"; + std::string opencl_tuned_file = "/data/local/tmp/opencl_tuned_kernels.bin"; + + /// kunlunxin_l3_workspace_size + int kunlunxin_l3_workspace_size = 0xfffc00; + /// kunlunxin_locked + bool kunlunxin_locked = false; + /// kunlunxin_autotune + bool kunlunxin_autotune = true; + /// kunlunxin_autotune_file + std::string kunlunxin_autotune_file = ""; + /// kunlunxin_precision + std::string kunlunxin_precision = "int16"; + /// kunlunxin_adaptive_seqlen + bool kunlunxin_adaptive_seqlen = false; + /// kunlunxin_enable_multi_stream + bool kunlunxin_enable_multi_stream = false; + /// kunlunxin_gm_default_size + int64_t kunlunxin_gm_default_size = 0; + + /// Optimized model dir for CxxConfig + std::string optimized_model_dir = ""; + /// nnadapter_subgraph_partition_config_path + std::string nnadapter_subgraph_partition_config_path = ""; + /// nnadapter_subgraph_partition_config_buffer + std::string nnadapter_subgraph_partition_config_buffer = ""; + /// nnadapter_context_properties + std::string nnadapter_context_properties = ""; + /// nnadapter_model_cache_dir + std::string nnadapter_model_cache_dir = ""; + /// nnadapter_mixed_precision_quantization_config_path + std::string nnadapter_mixed_precision_quantization_config_path = ""; + /// nnadapter_dynamic_shape_info + std::map>> + nnadapter_dynamic_shape_info = {{"", {{0}}}}; + /// nnadapter_device_names + std::vector nnadapter_device_names = {}; +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc new file mode 100755 index 0000000000..d66c65d507 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/runtime/backends/lite/option.h" + +namespace ultrainfer { + +void BindLiteOption(pybind11::module &m) { + pybind11::class_(m, "LiteBackendOption") + .def(pybind11::init()) + .def_readwrite("power_mode", &LiteBackendOption::power_mode) + .def_readwrite("cpu_threads", &LiteBackendOption::cpu_threads) + .def_readwrite("enable_fp16", &LiteBackendOption::enable_fp16) + .def_readwrite("device", &LiteBackendOption::device) + .def_readwrite("optimized_model_dir", + &LiteBackendOption::optimized_model_dir) + .def_readwrite( + "nnadapter_subgraph_partition_config_path", + &LiteBackendOption::nnadapter_subgraph_partition_config_path) + .def_readwrite( + "nnadapter_subgraph_partition_config_buffer", + &LiteBackendOption::nnadapter_subgraph_partition_config_buffer) + .def_readwrite("nnadapter_context_properties", + &LiteBackendOption::nnadapter_context_properties) + .def_readwrite("nnadapter_model_cache_dir", + &LiteBackendOption::nnadapter_model_cache_dir) + .def_readwrite("nnadapter_mixed_precision_quantization_config_path", + &LiteBackendOption:: + nnadapter_mixed_precision_quantization_config_path) + .def_readwrite("nnadapter_dynamic_shape_info", + &LiteBackendOption::nnadapter_dynamic_shape_info) + .def_readwrite("nnadapter_device_names", + &LiteBackendOption::nnadapter_device_names) + .def_readwrite("device_id", &LiteBackendOption::device_id) + .def_readwrite("kunlunxin_l3_workspace_size", + &LiteBackendOption::kunlunxin_l3_workspace_size) + .def_readwrite("kunlunxin_locked", &LiteBackendOption::kunlunxin_locked) + .def_readwrite("kunlunxin_autotune", + &LiteBackendOption::kunlunxin_autotune) + .def_readwrite("kunlunxin_autotune_file", + &LiteBackendOption::kunlunxin_autotune_file) + .def_readwrite("kunlunxin_precision", + &LiteBackendOption::kunlunxin_precision) + .def_readwrite("kunlunxin_gm_default_size", + &LiteBackendOption::kunlunxin_gm_default_size) + .def_readwrite("kunlunxin_adaptive_seqlen", + &LiteBackendOption::kunlunxin_adaptive_seqlen) + .def_readwrite("kunlunxin_enable_multi_stream", + &LiteBackendOption::kunlunxin_enable_multi_stream); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h new file mode 100755 index 0000000000..55808e1ce2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h @@ -0,0 +1,100 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_type.h" +#include +#include +#include +#include +#include +#include +namespace ultrainfer { + +/*! @brief Option object to configure OpenVINO backend + */ +struct OpenVINOBackendOption { + std::string device = "CPU"; + int cpu_thread_num = -1; + + /// Number of streams while use OpenVINO + int num_streams = 1; + + /// Affinity mode + std::string affinity = "YES"; + + /// Performance hint mode + std::string hint = "UNDEFINED"; + + /** + * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', + * 'GPU', 'GPU.1'.... + */ + void SetDevice(const std::string &name = "CPU") { device = name; } + + /** + * @brief Set shape info for OpenVINO + */ + void SetShapeInfo( + const std::map> &_shape_infos) { + shape_infos = _shape_infos; + } + + /** + * @brief While use OpenVINO backend with intel GPU, use this interface to + * specify operators run on CPU + */ + void SetCpuOperators(const std::vector &operators) { + for (const auto &op : operators) { + cpu_operators.insert(op); + } + } + + /** + * @brief Set Affinity mode + */ + void SetAffinity(const std::string &_affinity) { + FDASSERT(_affinity == "YES" || _affinity == "NO" || _affinity == "NUMA" || + _affinity == "HYBRID_AWARE", + "The affinity mode should be one of the list " + "['YES', 'NO', 'NUMA', " + "'HYBRID_AWARE'] "); + affinity = _affinity; + } + + /** + * @brief Set the Performance Hint + */ + void SetPerformanceHint(const std::string &_hint) { + FDASSERT(_hint == "LATENCY" || _hint == "THROUGHPUT" || + _hint == "CUMULATIVE_THROUGHPUT" || _hint == "UNDEFINED", + "The performance hint should be one of the list " + "['LATENCY', 'THROUGHPUT', 'CUMULATIVE_THROUGHPUT', " + "'UNDEFINED'] "); + hint = _hint; + } + + /** + * @brief Set the number of streams + */ + void SetStreamNum(int _num_streams) { + FDASSERT(_num_streams > 0, "The stream_num must be greater than 0."); + num_streams = _num_streams; + } + + std::map> shape_infos; + std::set cpu_operators{"MulticlassNms"}; +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc new file mode 100755 index 0000000000..50f53c3692 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/runtime/backends/openvino/option.h" + +namespace ultrainfer { + +void BindOpenVINOOption(pybind11::module &m) { + pybind11::class_(m, "OpenVINOBackendOption") + .def(pybind11::init()) + .def_readwrite("cpu_thread_num", &OpenVINOBackendOption::cpu_thread_num) + .def_readwrite("num_streams", &OpenVINOBackendOption::num_streams) + .def_readwrite("affinity", &OpenVINOBackendOption::affinity) + .def_readwrite("hint", &OpenVINOBackendOption::hint) + .def("set_device", &OpenVINOBackendOption::SetDevice) + .def("set_shape_info", &OpenVINOBackendOption::SetShapeInfo) + .def("set_cpu_operators", &OpenVINOBackendOption::SetCpuOperators) + .def("set_affinity", &OpenVINOBackendOption::SetAffinity) + .def("set_performance_hint", &OpenVINOBackendOption::SetPerformanceHint) + .def("set_stream_num", &OpenVINOBackendOption::SetStreamNum); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc new file mode 100755 index 0000000000..5d4163acfb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc @@ -0,0 +1,457 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/openvino/ov_backend.h" +#ifdef ENABLE_PADDLE2ONNX +#include "paddle2onnx/converter.h" +#endif + +namespace ultrainfer { + +std::vector PartialShapeToVec(const ov::PartialShape &shape) { + std::vector res; + for (int i = 0; i < shape.size(); ++i) { + auto dim = shape[i]; + if (dim.is_dynamic()) { + res.push_back(-1); + } else { + res.push_back(dim.get_length()); + } + } + return res; +} + +ov::PartialShape VecToPartialShape(const std::vector &shape) { + std::vector dims; + for (size_t i = 0; i < shape.size(); ++i) { + dims.emplace_back(ov::Dimension(shape[i])); + } + return ov::PartialShape(dims); +} + +FDDataType OpenVINODataTypeToFD(const ov::element::Type &type) { + if (type == ov::element::f32) { + return FDDataType::FP32; + } else if (type == ov::element::f16) { + return FDDataType::FP16; + } else if (type == ov::element::f64) { + return FDDataType::FP64; + } else if (type == ov::element::i8) { + return FDDataType::INT8; + } else if (type == ov::element::u8) { + return FDDataType::UINT8; + } else if (type == ov::element::i32) { + return FDDataType::INT32; + } else if (type == ov::element::i64) { + return FDDataType::INT64; + } else { + FDASSERT(false, "Only support float/double/int8/int32/int64/float16 now."); + } + return FDDataType::FP32; +} + +ov::element::Type FDDataTypeToOV(const FDDataType &type) { + if (type == FDDataType::FP32) { + return ov::element::f32; + } else if (type == FDDataType::FP64) { + return ov::element::f64; + } else if (type == FDDataType::INT8) { + return ov::element::i8; + } else if (type == FDDataType::UINT8) { + return ov::element::u8; + } else if (type == FDDataType::INT32) { + return ov::element::i32; + } else if (type == FDDataType::INT64) { + return ov::element::i64; + } else if (type == FDDataType::FP16) { + return ov::element::f16; + } + FDASSERT(false, + "Only support float/double/int8/uint8/int32/int64/float16 now."); + return ov::element::f32; +} + +ov::Core OpenVINOBackend::core_; + +void OpenVINOBackend::InitTensorInfo( + const std::vector> &ov_outputs, + std::map *tensor_infos) { + for (size_t i = 0; i < ov_outputs.size(); ++i) { + TensorInfo info; + auto partial_shape = PartialShapeToVec(ov_outputs[i].get_partial_shape()); + info.shape.assign(partial_shape.begin(), partial_shape.end()); + info.name = ov_outputs[i].get_any_name(); + info.dtype = OpenVINODataTypeToFD(ov_outputs[i].get_element_type()); + tensor_infos->insert(std::make_pair(info.name, info)); + } +} + +bool OpenVINOBackend::Init(const RuntimeOption &option) { + if (option.model_from_memory_) { + FDERROR << "OpenVINOBackend doesn't support load model from memory, please " + "load model from disk." + << std::endl; + return false; + } + if (option.device != Device::CPU) { + FDERROR << "OpenVINOBackend only supports Device::CPU, but now its " + << option.device << "." << std::endl; + return false; + } + + if (option.model_format == ModelFormat::PADDLE) { + return InitFromPaddle(option.model_file, option.params_file, + option.openvino_option); + } else if (option.model_format == ModelFormat::ONNX) { + return InitFromOnnx(option.model_file, option.openvino_option); + } else { + FDERROR << "OpenVINOBackend only supports model format Paddle/ONNX, but " + "now its " + << option.model_format << std::endl; + return false; + } + return false; +} + +bool OpenVINOBackend::InitFromPaddle(const std::string &model_file, + const std::string ¶ms_file, + const OpenVINOBackendOption &option) { + if (initialized_) { + FDERROR << "OpenVINOBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + option_ = option; + + std::shared_ptr model = core_.read_model(model_file, params_file); + if (option_.shape_infos.size() > 0) { + std::map shape_infos; + for (const auto &item : option_.shape_infos) { + shape_infos[item.first] = VecToPartialShape(item.second); + } + model->reshape(shape_infos); + } + + if (option_.device.find("HETERO") != std::string::npos) { + auto supported_ops = core_.query_model(model, option_.device); + for (auto &&op : model->get_ops()) { + auto &affinity = supported_ops[op->get_friendly_name()]; + if (option_.cpu_operators.find(op->description()) != + option_.cpu_operators.end()) { + op->get_rt_info()["affinity"] = "CPU"; + } else { + op->get_rt_info()["affinity"] = affinity; + } + } + } + + // Get inputs/outputs information from loaded model + const std::vector> inputs = model->inputs(); + std::map input_infos; + InitTensorInfo(inputs, &input_infos); + + const std::vector> outputs = model->outputs(); + std::map output_infos; + InitTensorInfo(outputs, &output_infos); + + // OpenVINO model may not keep the same order with original model + // So here will reorder it's inputs and outputs + std::string model_content; + ReadBinaryFromFile(model_file, &model_content); + auto reader = + paddle2onnx::PaddleReader(model_content.c_str(), model_content.size()); + if (reader.num_inputs != input_infos.size()) { + FDERROR << "The number of inputs from PaddleReader:" << reader.num_inputs + << " not equal to the number of inputs from OpenVINO:" + << input_infos.size() << "." << std::endl; + return false; + } + if (reader.num_outputs != output_infos.size()) { + FDERROR << "The number of outputs from PaddleReader:" << reader.num_outputs + << " not equal to the number of outputs from OpenVINO:" + << output_infos.size() << "." << std::endl; + return false; + } + for (int i = 0; i < reader.num_inputs; ++i) { + auto iter = input_infos.find(std::string(reader.inputs[i].name)); + if (iter == input_infos.end()) { + FDERROR << "Cannot find input name:" << reader.inputs[i].name + << " from OpenVINO model." << std::endl; + return false; + } + input_infos_.push_back(iter->second); + } + for (int i = 0; i < reader.num_outputs; ++i) { + auto iter = output_infos.find(std::string(reader.outputs[i].name)); + if (iter == output_infos.end()) { + FDERROR << "Cannot find output name:" << reader.outputs[i].name + << " from OpenVINO model." << std::endl; + return false; + } + output_infos_.push_back(iter->second); + } + + ov::AnyMap properties; + if (option_.hint == "UNDEFINED") { + if (option_.device == "CPU" && option_.cpu_thread_num > 0) { + properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; + } + if (option_.num_streams == -1) { + properties["NUM_STREAMS"] = ov::streams::AUTO; + } else if (option_.num_streams == -2) { + properties["NUM_STREAMS"] = ov::streams::NUMA; + } else if (option_.num_streams > 0) { + properties["NUM_STREAMS"] = option_.num_streams; + } + + FDINFO << "number of streams:" << option_.num_streams << "." << std::endl; + if (option_.affinity == "YES") { + properties["AFFINITY"] = "CORE"; + } else if (option_.affinity == "NO") { + properties["AFFINITY"] = "NONE"; + } else if (option_.affinity == "NUMA") { + properties["AFFINITY"] = "NUMA"; + } else if (option_.affinity == "HYBRID_AWARE") { + properties["AFFINITY"] = "HYBRID_AWARE"; + } + FDINFO << "affinity:" << option_.affinity << "." << std::endl; + } else if (option_.hint == "LATENCY") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); + } else if (option_.hint == "THROUGHPUT") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); + } else if (option_.hint == "CUMULATIVE_THROUGHPUT") { + properties.emplace(ov::hint::performance_mode( + ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); + } + + FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." + << std::endl; + + compiled_model_ = core_.compile_model(model, option.device, properties); + + request_ = compiled_model_.create_infer_request(); + initialized_ = true; + return true; +} + +TensorInfo OpenVINOBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of outputs: %d.", index, + NumOutputs()); + return input_infos_[index]; +} + +std::vector OpenVINOBackend::GetInputInfos() { + return input_infos_; +} + +std::vector OpenVINOBackend::GetOutputInfos() { + return output_infos_; +} + +TensorInfo OpenVINOBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs: %d.", index, + NumOutputs()); + return output_infos_[index]; +} + +bool OpenVINOBackend::InitFromOnnx(const std::string &model_file, + const OpenVINOBackendOption &option) { + if (initialized_) { + FDERROR << "OpenVINOBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + option_ = option; + + std::shared_ptr model = core_.read_model(model_file); + if (option_.shape_infos.size() > 0) { + std::map shape_infos; + for (const auto &item : option_.shape_infos) { + shape_infos[item.first] = VecToPartialShape(item.second); + } + model->reshape(shape_infos); + } + + if (option_.device.find("HETERO") != std::string::npos) { + auto supported_ops = core_.query_model(model, option_.device); + for (auto &&op : model->get_ops()) { + auto &affinity = supported_ops[op->get_friendly_name()]; + if (option_.cpu_operators.find(op->description()) != + option_.cpu_operators.end()) { + op->get_rt_info()["affinity"] = "CPU"; + } else { + op->get_rt_info()["affinity"] = affinity; + } + } + } + + // Get inputs/outputs information from loaded model + const std::vector> inputs = model->inputs(); + std::map input_infos; + InitTensorInfo(inputs, &input_infos); + + const std::vector> outputs = model->outputs(); + std::map output_infos; + InitTensorInfo(outputs, &output_infos); + + // OpenVINO model may not keep the same order with original model + // So here will reorder it's inputs and outputs + std::string model_content; + ReadBinaryFromFile(model_file, &model_content); + auto reader = + paddle2onnx::OnnxReader(model_content.c_str(), model_content.size()); + if (reader.num_inputs != input_infos.size()) { + FDERROR << "The number of inputs from OnnxReader:" << reader.num_inputs + << " not equal to the number of inputs from OpenVINO:" + << input_infos.size() << "." << std::endl; + return false; + } + if (reader.num_outputs != output_infos.size()) { + FDERROR << "The number of outputs from OnnxReader:" << reader.num_outputs + << " not equal to the number of outputs from OpenVINO:" + << output_infos.size() << "." << std::endl; + return false; + } + for (int i = 0; i < reader.num_inputs; ++i) { + auto iter = input_infos.find(std::string(reader.inputs[i].name)); + if (iter == input_infos.end()) { + FDERROR << "Cannot find input name:" << reader.inputs[i].name + << " from OpenVINO model." << std::endl; + return false; + } + input_infos_.push_back(iter->second); + } + for (int i = 0; i < reader.num_outputs; ++i) { + auto iter = output_infos.find(std::string(reader.outputs[i].name)); + if (iter == output_infos.end()) { + FDERROR << "Cannot find output name:" << reader.outputs[i].name + << " from OpenVINO model." << std::endl; + return false; + } + output_infos_.push_back(iter->second); + } + + ov::AnyMap properties; + if (option_.hint == "UNDEFINED") { + if (option_.device == "CPU" && option_.cpu_thread_num > 0) { + properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; + } + if (option_.num_streams == -1) { + properties["NUM_STREAMS"] = ov::streams::AUTO; + } else if (option_.num_streams == -2) { + properties["NUM_STREAMS"] = ov::streams::NUMA; + } else if (option_.num_streams > 0) { + properties["NUM_STREAMS"] = option_.num_streams; + } + + FDINFO << "number of streams:" << option_.num_streams << "." << std::endl; + if (option_.affinity == "YES") { + properties["AFFINITY"] = "CORE"; + } else if (option_.affinity == "NO") { + properties["AFFINITY"] = "NONE"; + } else if (option_.affinity == "NUMA") { + properties["AFFINITY"] = "NUMA"; + } else if (option_.affinity == "HYBRID_AWARE") { + properties["AFFINITY"] = "HYBRID_AWARE"; + } + FDINFO << "affinity:" << option_.affinity << "." << std::endl; + } else if (option_.hint == "LATENCY") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); + } else if (option_.hint == "THROUGHPUT") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); + } else if (option_.hint == "CUMULATIVE_THROUGHPUT") { + properties.emplace(ov::hint::performance_mode( + ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); + } + + FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." + << std::endl; + compiled_model_ = core_.compile_model(model, option.device, properties); + + request_ = compiled_model_.create_infer_request(); + + initialized_ = true; + return true; +} + +int OpenVINOBackend::NumInputs() const { return input_infos_.size(); } + +int OpenVINOBackend::NumOutputs() const { return output_infos_.size(); } + +bool OpenVINOBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + if (inputs.size() != input_infos_.size()) { + FDERROR << "[OpenVINOBackend] Size of the inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << input_infos_.size() << ")." << std::endl; + return false; + } + + RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + for (size_t i = 0; i < inputs.size(); ++i) { + ov::Shape shape(inputs[i].shape.begin(), inputs[i].shape.end()); + ov::Tensor ov_tensor(FDDataTypeToOV(inputs[i].dtype), shape, + inputs[i].Data()); + request_.set_tensor(inputs[i].name, ov_tensor); + } + + RUNTIME_PROFILE_LOOP_BEGIN(1) + request_.start_async(); + request_.wait(); + RUNTIME_PROFILE_LOOP_END + + outputs->resize(output_infos_.size()); + for (size_t i = 0; i < output_infos_.size(); ++i) { + auto out_tensor = request_.get_output_tensor(i); + auto out_tensor_shape = out_tensor.get_shape(); + std::vector shape(out_tensor_shape.begin(), + out_tensor_shape.end()); + if (copy_to_fd) { + (*outputs)[i].Resize(shape, + OpenVINODataTypeToFD(out_tensor.get_element_type()), + output_infos_[i].name, Device::CPU); + memcpy((*outputs)[i].MutableData(), out_tensor.data(), + (*outputs)[i].Nbytes()); + } else { + (*outputs)[i].name = output_infos_[i].name; + (*outputs)[i].SetExternalData( + shape, OpenVINODataTypeToFD(out_tensor.get_element_type()), + out_tensor.data(), Device::CPU); + } + } + RUNTIME_PROFILE_LOOP_H2D_D2H_END + return true; +} + +std::unique_ptr +OpenVINOBackend::Clone(RuntimeOption &runtime_option, void *stream, + int device_id) { + std::unique_ptr new_backend = + utils::make_unique(); + auto casted_backend = dynamic_cast(new_backend.get()); + casted_backend->option_ = option_; + casted_backend->request_ = compiled_model_.create_infer_request(); + casted_backend->input_infos_.assign(input_infos_.begin(), input_infos_.end()); + casted_backend->output_infos_.assign(output_infos_.begin(), + output_infos_.end()); + return new_backend; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h new file mode 100755 index 0000000000..7119d60549 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h @@ -0,0 +1,72 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "openvino/openvino.hpp" +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/backends/openvino/option.h" +#include "ultrainfer/utils/unique_ptr.h" + +namespace ultrainfer { + +class OpenVINOBackend : public BaseBackend { +public: + static ov::Core core_; + OpenVINOBackend() {} + virtual ~OpenVINOBackend() = default; + + bool Init(const RuntimeOption &option); + + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + + int NumInputs() const override; + + int NumOutputs() const override; + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + + std::unique_ptr Clone(RuntimeOption &runtime_option, + void *stream = nullptr, + int device_id = -1) override; + +private: + bool + InitFromPaddle(const std::string &model_file, const std::string ¶ms_file, + const OpenVINOBackendOption &option = OpenVINOBackendOption()); + + bool + InitFromOnnx(const std::string &model_file, + const OpenVINOBackendOption &option = OpenVINOBackendOption()); + + void InitTensorInfo(const std::vector> &ov_outputs, + std::map *tensor_infos); + + ov::CompiledModel compiled_model_; + ov::InferRequest request_; + OpenVINOBackendOption option_; + std::vector input_infos_; + std::vector output_infos_; +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc new file mode 100755 index 0000000000..c09a2288a2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NON_64_PLATFORM + +#include "adaptive_pool2d.h" + +namespace ultrainfer { + +void AdaptivePool2dKernel::CpuAdaptivePool( + const std::vector &input_size, + const std::vector &output_size, const float *input_data, + float *output_data) { + int64_t in_bc_offset = input_size[2] * input_size[3]; + int64_t out_bc_offset = output_size[2] * output_size[3]; + for (int64_t b = 0; b < output_size[0]; b++) { + for (int64_t c = 0; c < output_size[1]; c++) { + for (int64_t h = 0; h < output_size[2]; h++) { + int64_t hstart = + std::floor(static_cast(h * input_size[2]) / output_size[2]); + int64_t hend = std::ceil(static_cast((h + 1) * input_size[2]) / + output_size[2]); + for (int64_t w = 0; w < output_size[3]; w++) { + int64_t wstart = std::floor(static_cast(w * input_size[3]) / + output_size[3]); + int64_t wend = std::ceil(static_cast((w + 1) * input_size[3]) / + output_size[3]); + int64_t out_offset = h * output_size[3] + w; + output_data[out_offset] = 0; + for (auto i = hstart; i < hend; i++) { + for (auto j = wstart; j < wend; j++) { + if (pooling_type_ == "avg") { + output_data[out_offset] += input_data[i * input_size[3] + j]; + } + if (pooling_type_ == "max") { + output_data[out_offset] = std::max( + output_data[out_offset], input_data[i * input_size[3] + j]); + } + } + } + if (pooling_type_ == "avg") { + output_data[out_offset] /= ((hend - hstart) * (wend - wstart)); + } + } + } + output_data += out_bc_offset; + input_data += in_bc_offset; + } + } +} + +void AdaptivePool2dKernel::Compute(OrtKernelContext *context) { +#if ORT_API_VERSION >= 14 + Ort::KernelContext ort_context{context}; + Ort::ConstValue input = ort_context.GetInput(0); +#else + Ort::CustomOpApi api{ort_}; + Ort::Unowned input{ + const_cast(api.KernelContext_GetInput(context, 0))}; +#endif + auto input_data = input.GetTensorData(); + auto input_dim = input.GetTensorTypeAndShapeInfo().GetShape(); + + output_size_[0] = input_dim[0]; + std::vector input_size; + for (auto i : input_dim) { + input_size.push_back(i); + } + +#if ORT_API_VERSION >= 14 + auto output = ort_context.GetOutput(0, output_size_); +#else + Ort::Unowned output{api.KernelContext_GetOutput( + context, 0, output_size_.data(), output_size_.size())}; +#endif + float *output_data = output.GetTensorMutableData(); + if (!strcmp(this->provider_, "CUDAExecutionProvider")) { +#ifdef WITH_GPU + auto compute_stream = +#if ORT_API_VERSION >= 14 + ort_context.GetGPUComputeStream(); +#else + api.KernelContext_GetGPUComputeStream(context); +#endif + CudaAdaptivePool(input_size, output_size_, output_data, input_data, + compute_stream, pooling_type_); +#else + FDWARNING << "UltraInfer didn't compile with WITH_GPU. " + << "Will force to use CPU to run." << std::endl; + CpuAdaptivePool(input_size, output_size_, input_data, output_data); +#endif + } else { + CpuAdaptivePool(input_size, output_size_, input_data, output_data); + } +} + +void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo *info) { +#if ORT_API_VERSION >= 14 + Ort::ConstKernelInfo ort_info{info}; + pooling_type_ = ort_info.GetAttribute("pooling_type"); + output_size_ = ort_info.GetAttributes("output_size"); +#else + Ort::CustomOpApi api{ort_}; + pooling_type_ = api.KernelInfoGetAttribute(info, "pooling_type"); + output_size_ = + api.KernelInfoGetAttribute>(info, "output_size"); +#endif + FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 && + output_size_[3] > 0, + "The output size of adaptive pool must be positive."); +} +} // namespace ultrainfer + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h new file mode 100755 index 0000000000..643d0b7537 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include +#include +#include +#include + +#ifndef NON_64_PLATFORM +#include "onnxruntime_cxx_api.h" // NOLINT + +#ifdef WITH_GPU +#include "ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h" +#endif + +namespace ultrainfer { +struct AdaptivePool2dKernel { +protected: + std::string pooling_type_ = "avg"; + std::vector output_size_ = {}; + OrtApi ort_; + void *compute_stream_; + const char *provider_; + +public: + AdaptivePool2dKernel(OrtApi ort, const OrtKernelInfo *info, + const char *provider) + : ort_(ort) { + GetAttribute(info); + provider_ = provider; + } + + void GetAttribute(const OrtKernelInfo *info); + + void Compute(OrtKernelContext *context); + + void CpuAdaptivePool(const std::vector &input_size, + const std::vector &output_size, + const float *input_data, float *output_data); +}; + +struct AdaptivePool2dOp + : Ort::CustomOpBase { + explicit AdaptivePool2dOp(const char *provider) : provider_(provider) {} + void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { + return new AdaptivePool2dKernel(api, info, provider_); + } + + const char *GetName() const { return "AdaptivePool2d"; } + + size_t GetInputTypeCount() const { return 1; } + + ONNXTensorElementDataType GetInputType(size_t index) const { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } + + size_t GetOutputTypeCount() const { return 1; } + + ONNXTensorElementDataType GetOutputType(size_t index) const { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } + + const char *GetExecutionProviderType() const { return provider_; } + +private: + const char *provider_; +}; + +} // namespace ultrainfer + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc new file mode 100755 index 0000000000..26f0686edc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc @@ -0,0 +1,287 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NON_64_PLATFORM + +#include "ultrainfer/runtime/backends/ort/ops/multiclass_nms.h" + +#include + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +template +bool SortScorePairDescend(const std::pair &pair1, + const std::pair &pair2) { + return pair1.first > pair2.first; +} + +void GetMaxScoreIndex(const float *scores, const int &score_size, + const float &threshold, const int &top_k, + std::vector> *sorted_indices) { + for (size_t i = 0; i < score_size; ++i) { + if (scores[i] > threshold) { + sorted_indices->push_back(std::make_pair(scores[i], i)); + } + } + // Sort the score pair according to the scores in descending order + std::stable_sort(sorted_indices->begin(), sorted_indices->end(), + SortScorePairDescend); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { + sorted_indices->resize(top_k); + } +} + +float BBoxArea(const float *box, const bool &normalized) { + if (box[2] < box[0] || box[3] < box[1]) { + // If coordinate values are is invalid + // (e.g. xmax < xmin or ymax < ymin), return 0. + return 0.f; + } else { + const float w = box[2] - box[0]; + const float h = box[3] - box[1]; + if (normalized) { + return w * h; + } else { + // If coordinate values are not within range [0, 1]. + return (w + 1) * (h + 1); + } + } +} + +float JaccardOverlap(const float *box1, const float *box2, + const bool &normalized) { + if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || + box2[3] < box1[1]) { + return 0.f; + } else { + const float inter_xmin = std::max(box1[0], box2[0]); + const float inter_ymin = std::max(box1[1], box2[1]); + const float inter_xmax = std::min(box1[2], box2[2]); + const float inter_ymax = std::min(box1[3], box2[3]); + float norm = normalized ? 0.0f : 1.0f; + float inter_w = inter_xmax - inter_xmin + norm; + float inter_h = inter_ymax - inter_ymin + norm; + const float inter_area = inter_w * inter_h; + const float bbox1_area = BBoxArea(box1, normalized); + const float bbox2_area = BBoxArea(box2, normalized); + return inter_area / (bbox1_area + bbox2_area - inter_area); + } +} + +void MultiClassNmsKernel::FastNMS(const float *boxes, const float *scores, + const int &num_boxes, + std::vector *keep_indices) { + std::vector> sorted_indices; + GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k, + &sorted_indices); + + float adaptive_threshold = nms_threshold; + while (sorted_indices.size() != 0) { + const int idx = sorted_indices.front().second; + bool keep = true; + for (size_t k = 0; k < keep_indices->size(); ++k) { + if (!keep) { + break; + } + const int kept_idx = (*keep_indices)[k]; + float overlap = + JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized); + keep = overlap <= adaptive_threshold; + } + if (keep) { + keep_indices->push_back(idx); + } + sorted_indices.erase(sorted_indices.begin()); + if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) { + adaptive_threshold *= nms_eta; + } + } +} + +int MultiClassNmsKernel::NMSForEachSample( + const float *boxes, const float *scores, int num_boxes, int num_classes, + std::map> *keep_indices) { + for (int i = 0; i < num_classes; ++i) { + if (i == background_label) { + continue; + } + const float *score_for_class_i = scores + i * num_boxes; + FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i])); + } + int num_det = 0; + for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) { + num_det += iter->second.size(); + } + + if (keep_top_k > -1 && num_det > keep_top_k) { + std::vector>> score_index_pairs; + for (const auto &it : *keep_indices) { + int label = it.first; + const float *current_score = scores + label * num_boxes; + auto &label_indices = it.second; + for (size_t j = 0; j < label_indices.size(); ++j) { + int idx = label_indices[j]; + score_index_pairs.push_back( + std::make_pair(current_score[idx], std::make_pair(label, idx))); + } + } + std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), + SortScorePairDescend>); + score_index_pairs.resize(keep_top_k); + + std::map> new_indices; + for (size_t j = 0; j < score_index_pairs.size(); ++j) { + int label = score_index_pairs[j].second.first; + int idx = score_index_pairs[j].second.second; + new_indices[label].push_back(idx); + } + new_indices.swap(*keep_indices); + num_det = keep_top_k; + } + return num_det; +} + +void MultiClassNmsKernel::Compute(OrtKernelContext *context) { +#if ORT_API_VERSION >= 14 + Ort::KernelContext ort_context{context}; + Ort::ConstValue boxes = ort_context.GetInput(0); + Ort::ConstValue scores = ort_context.GetInput(1); +#else + Ort::CustomOpApi api{ort_}; + Ort::Unowned boxes{ + const_cast(api.KernelContext_GetInput(context, 0))}; + Ort::Unowned scores{ + const_cast(api.KernelContext_GetInput(context, 1))}; +#endif + + auto boxes_data = boxes.GetTensorData(); + auto scores_data = scores.GetTensorData(); + + auto boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape(); + auto scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape(); + + int score_size = scores_dim.size(); + + int64_t batch_size = scores_dim[0]; + int64_t box_dim = boxes_dim[2]; + int64_t out_dim = box_dim + 2; + + int num_nmsed_out = 0; + FDASSERT(score_size == 3, + "Require rank of input scores be 3, but now it's %d.", score_size); + FDASSERT(boxes_dim[2] == 4, + "Require the 3-dimension of input boxes be 4, but now it's %ld.", + box_dim); + std::vector out_num_rois_dims = {batch_size}; +#if ORT_API_VERSION >= 14 + auto out_num_rois = ort_context.GetOutput(2, out_num_rois_dims); +#else + Ort::Unowned out_num_rois{api.KernelContext_GetOutput( + context, 2, out_num_rois_dims.data(), out_num_rois_dims.size())}; +#endif + int32_t *out_num_rois_data = out_num_rois.GetTensorMutableData(); + + std::vector>> all_indices; + for (size_t i = 0; i < batch_size; ++i) { + std::map> indices; // indices kept for each class + const float *current_boxes_ptr = + boxes_data + i * boxes_dim[1] * boxes_dim[2]; + const float *current_scores_ptr = + scores_data + i * scores_dim[1] * scores_dim[2]; + int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr, + boxes_dim[1], scores_dim[1], &indices); + num_nmsed_out += num; + out_num_rois_data[i] = num; + all_indices.emplace_back(indices); + } + std::vector out_box_dims = {num_nmsed_out, 6}; + std::vector out_index_dims = {num_nmsed_out, 1}; + +#if ORT_API_VERSION >= 14 + auto out_box = ort_context.GetOutput(0, out_box_dims); + auto out_index = ort_context.GetOutput(1, out_index_dims); +#else + Ort::Unowned out_box{api.KernelContext_GetOutput( + context, 0, out_box_dims.data(), out_box_dims.size())}; + Ort::Unowned out_index{api.KernelContext_GetOutput( + context, 1, out_index_dims.data(), out_index_dims.size())}; +#endif + + if (num_nmsed_out == 0) { + int32_t *out_num_rois_data = out_num_rois.GetTensorMutableData(); + for (size_t i = 0; i < batch_size; ++i) { + out_num_rois_data[i] = 0; + } + return; + } + float *out_box_data = out_box.GetTensorMutableData(); + int32_t *out_index_data = out_index.GetTensorMutableData(); + + int count = 0; + for (size_t i = 0; i < batch_size; ++i) { + const float *current_boxes_ptr = + boxes_data + i * boxes_dim[1] * boxes_dim[2]; + const float *current_scores_ptr = + scores_data + i * scores_dim[1] * scores_dim[2]; + for (const auto &it : all_indices[i]) { + int label = it.first; + const auto &indices = it.second; + const float *current_scores_class_ptr = + current_scores_ptr + label * scores_dim[2]; + for (size_t j = 0; j < indices.size(); ++j) { + int start = count * 6; + out_box_data[start] = label; + out_box_data[start + 1] = current_scores_class_ptr[indices[j]]; + + out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4]; + out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1]; + out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2]; + + out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3]; + out_index_data[count] = i * boxes_dim[1] + indices[j]; + count += 1; + } + } + } +} + +void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo *info) { +#if ORT_API_VERSION >= 14 + Ort::ConstKernelInfo ort_info{info}; + background_label = ort_info.GetAttribute("background_label"); + keep_top_k = ort_info.GetAttribute("keep_top_k"); + nms_eta = ort_info.GetAttribute("nms_eta"); + nms_threshold = ort_info.GetAttribute("nms_threshold"); + nms_top_k = ort_info.GetAttribute("nms_top_k"); + normalized = ort_info.GetAttribute("normalized"); + score_threshold = ort_info.GetAttribute("score_threshold"); +#else + Ort::CustomOpApi api{ort_}; + background_label = + api.KernelInfoGetAttribute(info, "background_label"); + keep_top_k = api.KernelInfoGetAttribute(info, "keep_top_k"); + nms_eta = api.KernelInfoGetAttribute(info, "nms_eta"); + nms_threshold = api.KernelInfoGetAttribute(info, "nms_threshold"); + nms_top_k = api.KernelInfoGetAttribute(info, "nms_top_k"); + normalized = api.KernelInfoGetAttribute(info, "normalized"); + score_threshold = api.KernelInfoGetAttribute(info, "score_threshold"); +#endif +} +} // namespace ultrainfer + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h new file mode 100755 index 0000000000..1225467f80 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#ifndef NON_64_PLATFORM +#include "onnxruntime_cxx_api.h" // NOLINT + +namespace ultrainfer { + +struct MultiClassNmsKernel { +protected: + int64_t background_label = -1; + int64_t keep_top_k = -1; + float nms_eta; + float nms_threshold = 0.7; + int64_t nms_top_k; + bool normalized; + float score_threshold; + OrtApi ort_; + +public: + MultiClassNmsKernel(OrtApi ort, const OrtKernelInfo *info) : ort_(ort) { + GetAttribute(info); + } + + void GetAttribute(const OrtKernelInfo *info); + + void Compute(OrtKernelContext *context); + void FastNMS(const float *boxes, const float *scores, const int &num_boxes, + std::vector *keep_indices); + int NMSForEachSample(const float *boxes, const float *scores, int num_boxes, + int num_classes, + std::map> *keep_indices); +}; + +struct MultiClassNmsOp + : Ort::CustomOpBase { + void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { + return new MultiClassNmsKernel(api, info); + } + + const char *GetName() const { return "MultiClassNMS"; } + + size_t GetInputTypeCount() const { return 2; } + + ONNXTensorElementDataType GetInputType(size_t index) const { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } + + size_t GetOutputTypeCount() const { return 3; } + + ONNXTensorElementDataType GetOutputType(size_t index) const { + if (index == 0) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; + } + + const char *GetExecutionProviderType() const { + return "CPUExecutionProvider"; + } +}; + +} // namespace ultrainfer + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h new file mode 100755 index 0000000000..3916b7613b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_type.h" +#include "ultrainfer/runtime/enum_variables.h" +#include +#include +#include +#include +#include +namespace ultrainfer { + +/*! @brief Option object to configure ONNX Runtime backend + */ +struct OrtBackendOption { + /// Level of graph optimization, + /// /-1: mean default(Enable all the optimization strategy) + /// /0: disable all the optimization strategy/1: enable basic strategy + /// /2:enable extend strategy/99: enable all + int graph_optimization_level = -1; + /// Number of threads to execute the operator, -1: default + int intra_op_num_threads = -1; + /// Number of threads to execute the graph, + /// -1: default. This parameter only will bring effects + /// while the `OrtBackendOption::execution_mode` set to 1. + int inter_op_num_threads = -1; + /// Execution mode for the graph, -1: default(Sequential mode) + /// /0: Sequential mode, execute the operators in graph one by one. + /// /1: Parallel mode, execute the operators in graph parallelly. + int execution_mode = -1; + /// Inference device, OrtBackend supports CPU/GPU + Device device = Device::CPU; + /// Inference device id + int device_id = 0; + void *external_stream_ = nullptr; + /// Use fp16 to infer + bool enable_fp16 = false; + + std::vector ort_disabled_ops_{}; + void DisableOrtFP16OpTypes(const std::vector &ops) { + ort_disabled_ops_.insert(ort_disabled_ops_.end(), ops.begin(), ops.end()); + } +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc new file mode 100755 index 0000000000..be96da8abf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/runtime/backends/ort/option.h" + +namespace ultrainfer { + +void BindOrtOption(pybind11::module &m) { + pybind11::class_(m, "OrtBackendOption") + .def(pybind11::init()) + .def_readwrite("graph_optimization_level", + &OrtBackendOption::graph_optimization_level) + .def_readwrite("intra_op_num_threads", + &OrtBackendOption::intra_op_num_threads) + .def_readwrite("inter_op_num_threads", + &OrtBackendOption::inter_op_num_threads) + .def_readwrite("execution_mode", &OrtBackendOption::execution_mode) + .def_readwrite("device", &OrtBackendOption::device) + .def_readwrite("device_id", &OrtBackendOption::device_id) + .def_readwrite("enable_fp16", &OrtBackendOption::enable_fp16) + .def("disable_ort_fp16_op_types", + &OrtBackendOption::DisableOrtFP16OpTypes); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc new file mode 100755 index 0000000000..c6d48b1970 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc @@ -0,0 +1,455 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/ort/ort_backend.h" + +#include "ultrainfer/core/float16.h" +#include "ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h" +#include "ultrainfer/runtime/backends/ort/ops/multiclass_nms.h" +#include "ultrainfer/runtime/backends/ort/utils.h" +#include "ultrainfer/utils/utils.h" +#ifdef ENABLE_PADDLE2ONNX +#include "paddle2onnx/converter.h" +#endif + +#include + +namespace ultrainfer { + +std::vector OrtBackend::custom_operators_ = + std::vector(); + +bool OrtBackend::BuildOption(const OrtBackendOption &option) { + option_ = option; + if (option.graph_optimization_level >= 0) { + session_options_.SetGraphOptimizationLevel( + GraphOptimizationLevel(option.graph_optimization_level)); + } + if (option.intra_op_num_threads > 0) { + session_options_.SetIntraOpNumThreads(option.intra_op_num_threads); + } + if (option.inter_op_num_threads > 0) { + session_options_.SetInterOpNumThreads(option.inter_op_num_threads); + } + if (option.execution_mode >= 0) { + session_options_.SetExecutionMode(ExecutionMode(option.execution_mode)); + } + +#ifdef WITH_DIRECTML + // If use DirectML + if (option.device == Device::DIRECTML) { + auto all_providers = Ort::GetAvailableProviders(); + bool support_dml = false; + std::string providers_msg = ""; + for (size_t i = 0; i < all_providers.size(); ++i) { + providers_msg = providers_msg + all_providers[i] + ", "; + if (all_providers[i] == "DmlExecutionProvider") { + support_dml = true; + } + } + + if (!support_dml) { + FDWARNING << "Compiled ultrainfer with onnxruntime doesn't " + "support DirectML, the available providers are " + << providers_msg << "will fallback to CPUExecutionProvider." + << "Please check if DirectML is installed successfully." + << std::endl; + option_.device = Device::CPU; + } else { + // Must set as below when use dml. + session_options_.DisableMemPattern(); + session_options_.SetExecutionMode(ExecutionMode(0)); + + // DML session_option + OrtApi const &ortApi = Ort::GetApi(); + const OrtDmlApi *ortDmlApi; + ortApi.GetExecutionProviderApi( + "DML", ORT_API_VERSION, reinterpret_cast(&ortDmlApi)); + OrtStatus *onnx_dml_status = + ortDmlApi->SessionOptionsAppendExecutionProvider_DML(session_options_, + 0); + if (onnx_dml_status != nullptr) { + FDERROR + << "DirectML is not support in your machine, the program will exit." + << std::endl; + ortApi.ReleaseStatus(onnx_dml_status); + return false; + } + } + return true; + } +#endif + + // CUDA + if (option.device == Device::GPU) { + auto all_providers = Ort::GetAvailableProviders(); + bool support_cuda = false; + std::string providers_msg = ""; + for (size_t i = 0; i < all_providers.size(); ++i) { + providers_msg = providers_msg + all_providers[i] + ", "; + if (all_providers[i] == "CUDAExecutionProvider") { + support_cuda = true; + } + } + if (!support_cuda) { + FDWARNING << "Compiled ultrainfer with onnxruntime doesn't " + "support GPU, the available providers are " + << providers_msg << "will fallback to CPUExecutionProvider." + << std::endl; + option_.device = Device::CPU; + } else { + OrtCUDAProviderOptions cuda_options; + cuda_options.device_id = option.device_id; + if (option.external_stream_) { + cuda_options.has_user_compute_stream = 1; + cuda_options.user_compute_stream = option.external_stream_; + } + session_options_.AppendExecutionProvider_CUDA(cuda_options); + } + return true; + } + return true; +} + +bool OrtBackend::Init(const RuntimeOption &option) { + if (option.device != Device::CPU && option.device != Device::GPU && + option.device != Device::DIRECTML) { + FDERROR + << "Backend::ORT only supports Device::CPU/Device::GPU, but now its " + << option.device << "." << std::endl; + return false; + } + OrtBackendOption ort_option = option.ort_option; + ort_option.device = option.device; + ort_option.device_id = option.device_id; + ort_option.external_stream_ = option.external_stream_; + + if (option.model_format == ModelFormat::PADDLE) { + if (option.model_from_memory_) { + return InitFromPaddle(option.model_file, option.params_file, ort_option); + } + std::string model_buffer, params_buffer; + FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), + "Failed to read model file."); + FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), + "Failed to read parameters file."); + return InitFromPaddle(model_buffer, params_buffer, ort_option); + } else if (option.model_format == ModelFormat::ONNX) { + if (option.model_from_memory_) { + return InitFromOnnx(option.model_file, ort_option); + } + std::string model_buffer; + FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), + "Failed to read model file."); + return InitFromOnnx(model_buffer, ort_option); + } else { + FDERROR << "Only support Paddle/ONNX model format for OrtBackend." + << std::endl; + return false; + } + return false; +} + +bool OrtBackend::InitFromPaddle(const std::string &model_buffer, + const std::string ¶ms_buffer, + const OrtBackendOption &option, bool verbose) { + if (initialized_) { + FDERROR << "OrtBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + char *model_content_ptr; + int model_content_size = 0; + bool save_external = false; +#ifdef ENABLE_PADDLE2ONNX + std::vector ops; + ops.resize(2); + strcpy(ops[0].op_name, "multiclass_nms3"); + strcpy(ops[0].export_op_name, "MultiClassNMS"); + strcpy(ops[1].op_name, "pool2d"); + strcpy(ops[1].export_op_name, "AdaptivePool2d"); + converted_to_fp16 = option.enable_fp16; + + std::vector disable_fp16_ops; + for (auto i = 0; i < option.ort_disabled_ops_.size(); i++) { + auto one_type = option.ort_disabled_ops_[i]; + char *charStr = new char[one_type.size() + 1]; + std::strcpy(charStr, one_type.c_str()); + disable_fp16_ops.push_back(charStr); + } + if (!paddle2onnx::Export( + model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(), + params_buffer.size(), &model_content_ptr, &model_content_size, 11, + true, verbose, true, true, true, ops.data(), 2, "onnxruntime", + nullptr, 0, "", &save_external, option.enable_fp16, + disable_fp16_ops.data(), option.ort_disabled_ops_.size())) { + FDERROR << "Error occured while export PaddlePaddle to ONNX format." + << std::endl; + return false; + } + + std::string onnx_model_proto(model_content_ptr, + model_content_ptr + model_content_size); + delete[] model_content_ptr; + model_content_ptr = nullptr; + if (save_external) { + model_file_name = "model.onnx"; + std::fstream f(model_file_name, std::ios::out); + FDASSERT(f.is_open(), "Can not open file: %s to save model.", + model_file_name.c_str()); + f << onnx_model_proto; + f.close(); + } + return InitFromOnnx(onnx_model_proto, option); +#else + FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to " + "call `InitFromOnnx` instead." + << std::endl; +#endif + return false; +} + +bool OrtBackend::InitFromOnnx(const std::string &model_file, + const OrtBackendOption &option) { + if (initialized_) { + FDERROR << "OrtBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + std::string onnx_model_buffer; + if (!converted_to_fp16 && option.enable_fp16) { + if (option.device == Device::CPU) { + FDWARNING << "Turning on FP16 on CPU may result in slower inference." + << std::endl; + } + char *model_content_ptr; + int model_content_size = 0; + paddle2onnx::ConvertFP32ToFP16(model_file.c_str(), model_file.size(), + &model_content_ptr, &model_content_size); + std::string onnx_model_proto(model_content_ptr, + model_content_ptr + model_content_size); + onnx_model_buffer = onnx_model_proto; + } else { + onnx_model_buffer = model_file; + } + + if (!BuildOption(option)) { + FDERROR << "Create Ort option fail." << std::endl; + return false; + } + + InitCustomOperators(); + if (model_file_name.size()) { +#ifdef WIN32 + std::wstring widestr = + std::wstring(model_file_name.begin(), model_file_name.end()); + session_ = {env_, widestr.c_str(), session_options_}; +#else + session_ = {env_, model_file_name.c_str(), session_options_}; +#endif + } else { + session_ = {env_, onnx_model_buffer.data(), onnx_model_buffer.size(), + session_options_}; + } + + binding_ = std::make_shared(session_); + + Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + Ort::Allocator allocator(session_, memory_info); + size_t n_inputs = session_.GetInputCount(); + for (size_t i = 0; i < n_inputs; ++i) { + auto input_name_ptr = session_.GetInputNameAllocated(i, allocator); + auto type_info = session_.GetInputTypeInfo(i); + std::vector shape = + type_info.GetTensorTypeAndShapeInfo().GetShape(); + ONNXTensorElementDataType data_type = + type_info.GetTensorTypeAndShapeInfo().GetElementType(); + inputs_desc_.emplace_back( + OrtValueInfo{input_name_ptr.get(), shape, data_type}); + } + + size_t n_outputs = session_.GetOutputCount(); + for (size_t i = 0; i < n_outputs; ++i) { + auto output_name_ptr = session_.GetOutputNameAllocated(i, allocator); + auto type_info = session_.GetOutputTypeInfo(i); + std::vector shape = + type_info.GetTensorTypeAndShapeInfo().GetShape(); + ONNXTensorElementDataType data_type = + type_info.GetTensorTypeAndShapeInfo().GetElementType(); + outputs_desc_.emplace_back( + OrtValueInfo{output_name_ptr.get(), shape, data_type}); + + Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0, + OrtMemTypeDefault); + binding_->BindOutput(output_name_ptr.get(), out_memory_info); + } + initialized_ = true; + return true; +} + +void OrtBackend::OrtValueToFDTensor(const Ort::Value &value, FDTensor *tensor, + const std::string &name, bool copy_to_fd) { + const auto info = value.GetTensorTypeAndShapeInfo(); + const auto data_type = info.GetElementType(); + size_t numel = info.GetElementCount(); + auto shape = info.GetShape(); + FDDataType dtype; + + if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { + dtype = FDDataType::FP32; + numel *= sizeof(float); + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { + dtype = FDDataType::INT32; + numel *= sizeof(int32_t); + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { + dtype = FDDataType::INT64; + numel *= sizeof(int64_t); + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { + dtype = FDDataType::FP64; + numel *= sizeof(double); + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) { + dtype = FDDataType::FP16; + numel *= sizeof(float16); + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) { + dtype = FDDataType::UINT8; + numel *= sizeof(uint8_t); + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) { + dtype = FDDataType::INT8; + numel *= sizeof(int8_t); + } else { + FDASSERT( + false, + "Unrecognized data type of %d while calling OrtBackend::CopyToCpu().", + data_type); + } + const void *value_ptr = value.GetTensorData(); + if (copy_to_fd) { + tensor->Resize(shape, dtype, name); + memcpy(tensor->MutableData(), value_ptr, numel); + } else { + tensor->name = name; + tensor->SetExternalData(shape, dtype, const_cast(value_ptr), + Device::CPU); + } +} + +bool OrtBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + return false; + } + + // from FDTensor to Ort Inputs + RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + for (size_t i = 0; i < inputs.size(); ++i) { + auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU); + binding_->BindInput(inputs[i].name.c_str(), ort_value); + } + + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, + OrtMemTypeDefault); + binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info); + } + + // Inference with inputs + RUNTIME_PROFILE_LOOP_BEGIN(1) + try { + session_.Run({}, *(binding_.get())); + } catch (const std::exception &e) { + FDERROR << "Failed to Infer: " << e.what() << std::endl; + return false; + } + RUNTIME_PROFILE_LOOP_END + + // Convert result after inference + std::vector ort_outputs = binding_->GetOutputValues(); + outputs->resize(ort_outputs.size()); + for (size_t i = 0; i < ort_outputs.size(); ++i) { + OrtValueToFDTensor(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name, + copy_to_fd); + } + RUNTIME_PROFILE_LOOP_H2D_D2H_END + return true; +} + +TensorInfo OrtBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()); + TensorInfo info; + info.name = inputs_desc_[index].name; + info.shape.assign(inputs_desc_[index].shape.begin(), + inputs_desc_[index].shape.end()); + info.dtype = GetFdDtype(inputs_desc_[index].dtype); + return info; +} + +std::vector OrtBackend::GetInputInfos() { + auto size = inputs_desc_.size(); + std::vector infos; + infos.reserve(size); + for (auto i = 0; i < size; i++) { + infos.emplace_back(GetInputInfo(i)); + } + return infos; +} + +TensorInfo OrtBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs: %d.", index, + NumOutputs()); + TensorInfo info; + info.name = outputs_desc_[index].name; + info.shape.assign(outputs_desc_[index].shape.begin(), + outputs_desc_[index].shape.end()); + info.dtype = GetFdDtype(outputs_desc_[index].dtype); + return info; +} + +std::vector OrtBackend::GetOutputInfos() { + std::vector infos; + for (auto i = 0; i < outputs_desc_.size(); i++) { + infos.emplace_back(GetOutputInfo(i)); + } + return infos; +} + +void OrtBackend::InitCustomOperators() { +#ifndef NON_64_PLATFORM + if (custom_operators_.size() == 0) { + MultiClassNmsOp *multiclass_nms = new MultiClassNmsOp{}; + custom_operators_.push_back(multiclass_nms); + if (option_.device == Device::GPU) { + AdaptivePool2dOp *adaptive_pool2d = + new AdaptivePool2dOp{"CUDAExecutionProvider"}; + custom_operators_.push_back(adaptive_pool2d); + } else { + AdaptivePool2dOp *adaptive_pool2d = + new AdaptivePool2dOp{"CPUExecutionProvider"}; + custom_operators_.push_back(adaptive_pool2d); + } + } + for (size_t i = 0; i < custom_operators_.size(); ++i) { + custom_op_domain_.Add(custom_operators_[i]); + } + session_options_.Add(custom_op_domain_); +#endif +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h new file mode 100755 index 0000000000..058f30e5ed --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h @@ -0,0 +1,91 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "onnxruntime_cxx_api.h" // NOLINT +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/backends/ort/option.h" + +#ifdef WITH_DIRECTML +#include "dml_provider_factory.h" // NOLINT +#endif + +namespace ultrainfer { + +struct OrtValueInfo { + std::string name; + std::vector shape; + ONNXTensorElementDataType dtype; +}; + +class OrtBackend : public BaseBackend { +public: + OrtBackend() {} + virtual ~OrtBackend() = default; + + bool BuildOption(const OrtBackendOption &option); + + bool Init(const RuntimeOption &option); + + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + + int NumInputs() const override { return inputs_desc_.size(); } + + int NumOutputs() const override { return outputs_desc_.size(); } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + static std::vector custom_operators_; + void InitCustomOperators(); + +private: + bool InitFromPaddle(const std::string &model_buffer, + const std::string ¶ms_buffer, + const OrtBackendOption &option = OrtBackendOption(), + bool verbose = false); + + bool InitFromOnnx(const std::string &model_buffer, + const OrtBackendOption &option = OrtBackendOption()); + + Ort::Env env_; + Ort::Session session_{nullptr}; + Ort::SessionOptions session_options_; + std::shared_ptr binding_; + std::vector inputs_desc_; + std::vector outputs_desc_; + + // the ONNX model file name, + // when ONNX is bigger than 2G, we will set this name + std::string model_file_name; + // recored if the model has been converted to fp16 + bool converted_to_fp16 = false; + +#ifndef NON_64_PLATFORM + Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle"); +#endif + OrtBackendOption option_; + void OrtValueToFDTensor(const Ort::Value &value, FDTensor *tensor, + const std::string &name, bool copy_to_fd); +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc new file mode 100755 index 0000000000..2892e449dc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/ort/utils.h" + +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +ONNXTensorElementDataType GetOrtDtype(const FDDataType &fd_dtype) { + if (fd_dtype == FDDataType::FP32) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } else if (fd_dtype == FDDataType::FP64) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; + } else if (fd_dtype == FDDataType::INT32) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; + } else if (fd_dtype == FDDataType::INT64) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; + } else if (fd_dtype == FDDataType::UINT8) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8; + } else if (fd_dtype == FDDataType::INT8) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; + } else if (fd_dtype == FDDataType::FP16) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; + } + FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "." + << std::endl; + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; +} + +FDDataType GetFdDtype(const ONNXTensorElementDataType &ort_dtype) { + if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { + return FDDataType::FP32; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { + return FDDataType::FP64; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { + return FDDataType::INT32; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { + return FDDataType::INT64; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) { + return FDDataType::FP16; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) { + return FDDataType::UINT8; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) { + return FDDataType::INT8; + } + FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl; + return FDDataType::FP32; +} + +Ort::Value CreateOrtValue(FDTensor &tensor, bool is_backend_cuda) { + FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU, + "Only support tensor which device is CPU or GPU for OrtBackend."); + if (tensor.device == Device::GPU && is_backend_cuda) { + Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0, + OrtMemTypeDefault); + auto ort_value = Ort::Value::CreateTensor( + memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(), + tensor.shape.size(), GetOrtDtype(tensor.dtype)); + return ort_value; + } + Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + auto ort_value = Ort::Value::CreateTensor( + memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(), + tensor.shape.size(), GetOrtDtype(tensor.dtype)); + return ort_value; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h new file mode 100755 index 0000000000..9cce53b57a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h @@ -0,0 +1,39 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "onnxruntime_cxx_api.h" // NOLINT +#include "ultrainfer/runtime/backends/backend.h" + +namespace ultrainfer { + +// Convert FDDataType to OrtDataType +ONNXTensorElementDataType GetOrtDtype(const FDDataType &fd_dtype); + +// Convert OrtDataType to FDDataType +FDDataType GetFdDtype(const ONNXTensorElementDataType &ort_dtype); + +// Create Ort::Value +// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider +// While is_backend_cuda = true, and tensor.device = Device::GPU +// Will directly share the cuda data in tensor to OrtValue +Ort::Value CreateOrtValue(FDTensor &tensor, bool is_backend_cuda = false); + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc new file mode 100755 index 0000000000..baaa72ed33 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc @@ -0,0 +1,124 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if defined(WITH_GPU) + +#include +#include + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +namespace ultrainfer { +namespace paddle_custom_ops { + +std::vector +postprocess_gpu(const std::vector &hm, + const std::vector ®, + const std::vector &height, + const std::vector &dim, + const std::vector &vel, + const std::vector &rot, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + const std::vector &post_center_range, + const std::vector &num_classes, const int down_ratio, + const float score_threshold, const float nms_iou_threshold, + const int nms_pre_max_size, const int nms_post_max_size, + const bool with_velocity); + +std::vector +centerpoint_postprocess(const std::vector &hm, + const std::vector ®, + const std::vector &height, + const std::vector &dim, + const std::vector &vel, + const std::vector &rot, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + const std::vector &post_center_range, + const std::vector &num_classes, + const int down_ratio, const float score_threshold, + const float nms_iou_threshold, + const int nms_pre_max_size, const int nms_post_max_size, + const bool with_velocity) { + if (hm[0].is_gpu()) { + return postprocess_gpu(hm, reg, height, dim, vel, rot, voxel_size, + point_cloud_range, post_center_range, num_classes, + down_ratio, score_threshold, nms_iou_threshold, + nms_pre_max_size, nms_post_max_size, with_velocity); + } else { + PD_THROW("Unsupported device type for centerpoint postprocess " + "operator."); + } +} + +std::vector> +PostProcessInferShape(const std::vector> &hm_shape, + const std::vector> ®_shape, + const std::vector> &height_shape, + const std::vector> &dim_shape, + const std::vector> &vel_shape, + const std::vector> &rot_shape, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + const std::vector &post_center_range, + const std::vector &num_classes, const int down_ratio, + const float score_threshold, + const float nms_iou_threshold, const int nms_pre_max_size, + const int nms_post_max_size, const bool with_velocity) { + if (with_velocity) { + return {{-1, 9}, {-1}, {-1}}; + } else { + return {{-1, 7}, {-1}, {-1}}; + } +} + +std::vector +PostProcessInferDtype(const std::vector &hm_dtype, + const std::vector ®_dtype, + const std::vector &height_dtype, + const std::vector &dim_dtype, + const std::vector &vel_dtype, + const std::vector &rot_dtype) { + return {reg_dtype[0], hm_dtype[0], paddle::DataType::INT64}; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer + +PD_BUILD_OP(centerpoint_postprocess) + .Inputs({paddle::Vec("HM"), paddle::Vec("REG"), paddle::Vec("HEIGHT"), + paddle::Vec("DIM"), paddle::Vec("VEL"), paddle::Vec("ROT")}) + .Outputs({"BBOXES", "SCORES", "LABELS"}) + .SetKernelFn( + PD_KERNEL(ultrainfer::paddle_custom_ops::centerpoint_postprocess)) + .Attrs({"voxel_size: std::vector", + "point_cloud_range: std::vector", + "post_center_range: std::vector", + "num_classes: std::vector", "down_ratio: int", + "score_threshold: float", "nms_iou_threshold: float", + "nms_pre_max_size: int", "nms_post_max_size: int", + "with_velocity: bool"}) + .SetInferShapeFn( + PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::PostProcessInferShape)) + .SetInferDtypeFn( + PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::PostProcessInferDtype)); + +#endif // WITH_GPU diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu new file mode 100755 index 0000000000..de92cbf9b7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu @@ -0,0 +1,295 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +namespace ultrainfer { +namespace paddle_custom_ops { + +#define CHECK_INPUT_CUDA(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") + +#define CHECK_INPUT_BATCHSIZE(x) \ + PD_CHECK(x.shape()[0] == 1, #x " batch size must be 1.") + +// #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) +__host__ __device__ static inline int DIVUP(const int m, const int n) { + return ((m) / (n) + ((m) % (n) > 0)); +} + +static const int THREADS_PER_BLOCK_NMS = sizeof(int64_t) * 8; + +void NmsLauncher(const cudaStream_t &stream, const float *bboxes, + const int *index, const int64_t *sorted_index, + const int num_bboxes, const int num_bboxes_for_nms, + const float nms_overlap_thresh, const int decode_bboxes_dims, + int64_t *mask); + +__global__ void decode_kernel( + const float *score, const float *reg, const float *height, const float *dim, + const float *vel, const float *rot, const float score_threshold, + const int feat_w, const float down_ratio, const float voxel_size_x, + const float voxel_size_y, const float point_cloud_range_x_min, + const float point_cloud_range_y_min, const float post_center_range_x_min, + const float post_center_range_y_min, const float post_center_range_z_min, + const float post_center_range_x_max, const float post_center_range_y_max, + const float post_center_range_z_max, const int num_bboxes, + const bool with_velocity, const int decode_bboxes_dims, float *bboxes, + bool *mask, int *score_idx) { + int box_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (box_idx == num_bboxes || box_idx > num_bboxes) { + return; + } + const int xs = box_idx % feat_w; + const int ys = box_idx / feat_w; + + float x = reg[box_idx]; + float y = reg[box_idx + num_bboxes]; + float z = height[box_idx]; + + bboxes[box_idx * decode_bboxes_dims] = + (x + xs) * down_ratio * voxel_size_x + point_cloud_range_x_min; + bboxes[box_idx * decode_bboxes_dims + 1] = + (y + ys) * down_ratio * voxel_size_y + point_cloud_range_y_min; + bboxes[box_idx * decode_bboxes_dims + 2] = z; + bboxes[box_idx * decode_bboxes_dims + 3] = dim[box_idx]; + bboxes[box_idx * decode_bboxes_dims + 4] = dim[box_idx + num_bboxes]; + bboxes[box_idx * decode_bboxes_dims + 5] = dim[box_idx + 2 * num_bboxes]; + if (with_velocity) { + bboxes[box_idx * decode_bboxes_dims + 6] = vel[box_idx]; + bboxes[box_idx * decode_bboxes_dims + 7] = vel[box_idx + num_bboxes]; + bboxes[box_idx * decode_bboxes_dims + 8] = + atan2f(rot[box_idx], rot[box_idx + num_bboxes]); + } else { + bboxes[box_idx * decode_bboxes_dims + 6] = + atan2f(rot[box_idx], rot[box_idx + num_bboxes]); + } + + if (score[box_idx] > score_threshold && x <= post_center_range_x_max && + y <= post_center_range_y_max && z <= post_center_range_z_max && + x >= post_center_range_x_min && y >= post_center_range_y_min && + z >= post_center_range_z_min) { + mask[box_idx] = true; + } + + score_idx[box_idx] = box_idx; +} + +void DecodeLauncher( + const cudaStream_t &stream, const float *score, const float *reg, + const float *height, const float *dim, const float *vel, const float *rot, + const float score_threshold, const int feat_w, const float down_ratio, + const float voxel_size_x, const float voxel_size_y, + const float point_cloud_range_x_min, const float point_cloud_range_y_min, + const float post_center_range_x_min, const float post_center_range_y_min, + const float post_center_range_z_min, const float post_center_range_x_max, + const float post_center_range_y_max, const float post_center_range_z_max, + const int num_bboxes, const bool with_velocity, + const int decode_bboxes_dims, float *bboxes, bool *mask, int *score_idx) { + dim3 blocks(DIVUP(num_bboxes, THREADS_PER_BLOCK_NMS)); + dim3 threads(THREADS_PER_BLOCK_NMS); + decode_kernel<<>>( + score, reg, height, dim, vel, rot, score_threshold, feat_w, down_ratio, + voxel_size_x, voxel_size_y, point_cloud_range_x_min, + point_cloud_range_y_min, post_center_range_x_min, post_center_range_y_min, + post_center_range_z_min, post_center_range_x_max, post_center_range_y_max, + post_center_range_z_max, num_bboxes, with_velocity, decode_bboxes_dims, + bboxes, mask, score_idx); +} + +std::vector +postprocess_gpu(const std::vector &hm, + const std::vector ®, + const std::vector &height, + const std::vector &dim, + const std::vector &vel, + const std::vector &rot, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + const std::vector &post_center_range, + const std::vector &num_classes, const int down_ratio, + const float score_threshold, const float nms_iou_threshold, + const int nms_pre_max_size, const int nms_post_max_size, + const bool with_velocity) { + int num_tasks = hm.size(); + int decode_bboxes_dims = 9; + if (!with_velocity) { + decode_bboxes_dims = 7; + } + float voxel_size_x = voxel_size[0]; + float voxel_size_y = voxel_size[1]; + float point_cloud_range_x_min = point_cloud_range[0]; + float point_cloud_range_y_min = point_cloud_range[1]; + + float post_center_range_x_min = post_center_range[0]; + float post_center_range_y_min = post_center_range[1]; + float post_center_range_z_min = post_center_range[2]; + float post_center_range_x_max = post_center_range[3]; + float post_center_range_y_max = post_center_range[4]; + float post_center_range_z_max = post_center_range[5]; + std::vector scores; + std::vector labels; + std::vector bboxes; + for (int task_id = 0; task_id < num_tasks; ++task_id) { + CHECK_INPUT_BATCHSIZE(hm[0]); + + int feat_h = hm[0].shape()[2]; + int feat_w = hm[0].shape()[3]; + int num_bboxes = feat_h * feat_w; + + // score and label + auto sigmoid_hm_per_task = paddle::experimental::sigmoid(hm[task_id]); + auto label_per_task = + paddle::experimental::argmax(sigmoid_hm_per_task, 1, true, false); + auto score_per_task = + paddle::experimental::max(sigmoid_hm_per_task, {1}, true); + // dim + auto exp_dim_per_task = paddle::experimental::exp(dim[task_id]); + + // decode bboxed and get mask of bboxes for nms + const float *score_ptr = score_per_task.data(); + const float *reg_ptr = reg[task_id].data(); + const float *height_ptr = height[task_id].data(); + // const float* dim_ptr = dim[task_id].data(); + const float *exp_dim_per_task_ptr = exp_dim_per_task.data(); + const float *vel_ptr = vel[task_id].data(); + const float *rot_ptr = rot[task_id].data(); + auto decode_bboxes = + paddle::empty({num_bboxes, decode_bboxes_dims}, + paddle::DataType::FLOAT32, paddle::GPUPlace()); + float *decode_bboxes_ptr = decode_bboxes.data(); + auto thresh_mask = paddle::full({num_bboxes}, 0, paddle::DataType::BOOL, + paddle::GPUPlace()); + bool *thresh_mask_ptr = thresh_mask.data(); + auto score_idx = paddle::empty({num_bboxes}, paddle::DataType::INT32, + paddle::GPUPlace()); + int *score_idx_ptr = score_idx.data(); + + DecodeLauncher(score_per_task.stream(), score_ptr, reg_ptr, height_ptr, + exp_dim_per_task_ptr, vel_ptr, rot_ptr, score_threshold, + feat_w, down_ratio, voxel_size_x, voxel_size_y, + point_cloud_range_x_min, point_cloud_range_y_min, + post_center_range_x_min, post_center_range_y_min, + post_center_range_z_min, post_center_range_x_max, + post_center_range_y_max, post_center_range_z_max, num_bboxes, + with_velocity, decode_bboxes_dims, decode_bboxes_ptr, + thresh_mask_ptr, score_idx_ptr); + + // select score by mask + auto selected_score_idx = + paddle::experimental::masked_select(score_idx, thresh_mask); + auto flattened_selected_score = + paddle::experimental::reshape(score_per_task, {num_bboxes}); + auto selected_score = paddle::experimental::masked_select( + flattened_selected_score, thresh_mask); + int num_selected = selected_score.numel(); + if (num_selected == 0 || num_selected < 0) { + auto fake_out_boxes = + paddle::full({1, decode_bboxes_dims}, 0., paddle::DataType::FLOAT32, + paddle::GPUPlace()); + auto fake_out_score = + paddle::full({1}, -1., paddle::DataType::FLOAT32, paddle::GPUPlace()); + auto fake_out_label = + paddle::full({1}, 0, paddle::DataType::INT64, paddle::GPUPlace()); + scores.push_back(fake_out_score); + labels.push_back(fake_out_label); + bboxes.push_back(fake_out_boxes); + continue; + } + + // sort score by descending + auto sort_out = paddle::experimental::argsort(selected_score, 0, true); + auto sorted_index = std::get<1>(sort_out); + int num_bboxes_for_nms = + num_selected > nms_pre_max_size ? nms_pre_max_size : num_selected; + + // nms + // in NmsLauncher, rot = - theta - pi / 2 + int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS); + auto nms_mask = paddle::empty({num_bboxes_for_nms * col_blocks}, + paddle::DataType::INT64, paddle::GPUPlace()); + int64_t *nms_mask_data = nms_mask.data(); + + NmsLauncher(score_per_task.stream(), decode_bboxes.data(), + selected_score_idx.data(), sorted_index.data(), + num_selected, num_bboxes_for_nms, nms_iou_threshold, + decode_bboxes_dims, nms_mask_data); + + const paddle::Tensor nms_mask_cpu_tensor = + nms_mask.copy_to(paddle::CPUPlace(), true); + const int64_t *nms_mask_cpu = nms_mask_cpu_tensor.data(); + + auto remv_cpu = paddle::full({col_blocks}, 0, paddle::DataType::INT64, + paddle::CPUPlace()); + int64_t *remv_cpu_data = remv_cpu.data(); + int num_to_keep = 0; + auto keep = paddle::empty({num_bboxes_for_nms}, paddle::DataType::INT32, + paddle::CPUPlace()); + int *keep_data = keep.data(); + + for (int i = 0; i < num_bboxes_for_nms; i++) { + int nblock = i / THREADS_PER_BLOCK_NMS; + int inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_cpu_data[nblock] & (1ULL << inblock))) { + keep_data[num_to_keep++] = i; + const int64_t *p = &nms_mask_cpu[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv_cpu_data[j] |= p[j]; + } + } + } + + int num_for_gather = + num_to_keep > nms_post_max_size ? nms_post_max_size : num_to_keep; + auto keep_gpu = paddle::empty({num_for_gather}, paddle::DataType::INT32, + paddle::GPUPlace()); + int *keep_gpu_ptr = keep_gpu.data(); + cudaMemcpy(keep_gpu_ptr, keep_data, num_for_gather * sizeof(int), + cudaMemcpyHostToDevice); + + auto gather_sorted_index = + paddle::experimental::gather(sorted_index, keep_gpu, 0); + auto gather_index = paddle::experimental::gather(selected_score_idx, + gather_sorted_index, 0); + + auto gather_score = + paddle::experimental::gather(selected_score, gather_sorted_index, 0); + auto flattened_label = + paddle::experimental::reshape(label_per_task, {num_bboxes}); + auto gather_label = + paddle::experimental::gather(flattened_label, gather_index, 0); + auto gather_bbox = + paddle::experimental::gather(decode_bboxes, gather_index, 0); + auto start_label = paddle::full( + {1}, num_classes[task_id], paddle::DataType::INT64, paddle::GPUPlace()); + auto added_label = paddle::experimental::add(gather_label, start_label); + scores.push_back(gather_score); + labels.push_back(added_label); + bboxes.push_back(gather_bbox); + } + + auto out_scores = paddle::experimental::concat(scores, 0); + auto out_labels = paddle::experimental::concat(labels, 0); + auto out_bboxes = paddle::experimental::concat(bboxes, 0); + return {out_bboxes, out_scores, out_labels}; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc new file mode 100755 index 0000000000..f8784fdb7a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if defined(WITH_GPU) + +#include "grid_sample_3d.h" + +#include + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +namespace ultrainfer { +namespace paddle_custom_ops { + +std::vector +GridSample3DCUDAForward(const paddle::Tensor &x, const paddle::Tensor &grid, + const std::string &mode, + const std::string &padding_mode, bool align_corners); + +std::vector GridSample3DForward(const paddle::Tensor &x, + const paddle::Tensor &grid, + const std::string &mode, + const std::string &padding_mode, + bool align_corners) { + return GridSample3DCUDAForward(x, grid, mode, padding_mode, align_corners); +} + +std::vector +GridSample3DCUDABackward(const paddle::Tensor &x, const paddle::Tensor &grid, + const paddle::Tensor &grad_out, + const std::string &mode, + const std::string &padding_mode, bool align_corners); + +std::vector +GridSample3DBackward(const paddle::Tensor &x, const paddle::Tensor &grid, + const paddle::Tensor &grad_out, const std::string &mode, + const std::string &padding_mode, bool align_corners) { + return GridSample3DCUDABackward(x, grid, grad_out, mode, padding_mode, + align_corners); +} + +std::vector> +GridSample3DInferShape(std::vector x_shape, + std::vector grid_shape) { + return { + {x_shape[0], x_shape[1], grid_shape[1], grid_shape[2], grid_shape[3]}}; +} + +std::vector> +GridSample3DInferBackShape(std::vector x_shape, + std::vector grid_shape) { + return {x_shape}; +} + +std::vector +GridSample3DInferDtype(paddle::DataType x_dtype, paddle::DataType grid_dtype) { + return {x_dtype}; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer + +PD_BUILD_OP(grid_sample_3d) + .Inputs({"x", "grid"}) + .Attrs({"mode: std::string", "padding_mode: std::string", + "align_corners: bool"}) + .Outputs({"out"}) + .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::GridSample3DForward)) + .SetInferShapeFn( + PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::GridSample3DInferShape)) + .SetInferDtypeFn( + PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::GridSample3DInferDtype)); + +PD_BUILD_GRAD_OP(grid_sample_3d) + .Inputs({"x", "grid", paddle::Grad("out")}) + .Attrs({"mode: std::string", "padding_mode: std::string", + "align_corners: bool"}) + .Outputs({paddle::Grad("x")}) + .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::GridSample3DBackward)) + .SetInferShapeFn(PD_INFER_SHAPE( + ultrainfer::paddle_custom_ops::GridSample3DInferBackShape)); + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu new file mode 100755 index 0000000000..d8847de863 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu @@ -0,0 +1,658 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "grid_sample_3d.h" + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +namespace ultrainfer { +namespace paddle_custom_ops { + +#define CHECK_INPUT_GPU(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") + +static __forceinline__ __device__ bool +InBounds3D(int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W) { + return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W; +} + +#define CUDA_KERNEL_LOOP_TYPE(i, n, index_type) \ + index_type _i_n_d_e_x = blockIdx.x * blockDim.x + threadIdx.x; \ + for (index_type i = _i_n_d_e_x; _i_n_d_e_x < (n); \ + _i_n_d_e_x += blockDim.x * gridDim.x, i = _i_n_d_e_x) + +#define CUDA_KERNEL_LOOP(i, n) CUDA_KERNEL_LOOP_TYPE(i, n, int) + +template +static __forceinline__ __device__ T Unnormalize(T coord, int size, + bool align_corners) { + if (align_corners) { + return ((coord + 1.f) / 2) * (size - 1); + } else { + return ((coord + 1.f) * size - 1) / 2; + } +} + +template +static __forceinline__ __device__ T ClipIndexes(T in, int max_value) { + return min(static_cast(max_value), max(in, static_cast(0))); +} + +template +static __forceinline__ __device__ T ReflectIndexes(T in, int twice_low, + int twice_high) { + if (twice_low == twice_high) { + return static_cast(0); + } + T min = static_cast(twice_low) / 2; + T span = static_cast(twice_high - twice_low) / 2; + in = fabs(in - min); + T extra = fmod(in, span); + int flips = static_cast(floor(in / span)); + if (flips % 2 == 0) { + return extra + min; + } else { + return span - extra + min; + } +} + +template +static __forceinline__ __device__ T ComputePositions(T coord, int size, + PaddingMode padding_mode, + bool align_corners) { + coord = Unnormalize(coord, size, align_corners); + if (padding_mode == PaddingMode::border) { + coord = ClipIndexes(coord, size - 1); + } else if (padding_mode == PaddingMode::reflect) { + if (align_corners) { + coord = ReflectIndexes(coord, 0, 2 * (size - 1)); + } else { + coord = ReflectIndexes(coord, -1, 2 * size - 1); + } + coord = ClipIndexes(coord, size - 1); + } + return coord; +} + +template +__global__ void +GridSample3DCudaKernel(const index_t nthreads, index_t out_c, index_t out_d, + index_t out_h, index_t out_w, index_t in_d, index_t in_h, + index_t in_w, const T *input, const T *grid, T *output, + const Mode interpolation_mode, + const PaddingMode padding_mode, bool align_corners) { + // printf("size: %d, %d, %d, %d, %d, %d \n", out_c, out_d, out_w, out_h, in_d, + // in_w); + index_t inp_sW = 1; + index_t inp_sH = in_w; + index_t inp_sD = in_h * in_w; + index_t inp_sC = in_d * inp_sD; + index_t inp_sN = out_c * inp_sC; + + index_t grid_sCoor = 1; + index_t grid_sW = 3; + index_t grid_sH = out_w * grid_sW; + index_t grid_sD = out_h * grid_sH; + index_t grid_sN = out_d * grid_sD; + + index_t out_sW = 1; + index_t out_sH = out_w; + index_t out_sD = out_h * out_w; + index_t out_sC = out_d * out_sD; + index_t out_sN = out_c * out_sC; + + CUDA_KERNEL_LOOP_TYPE(index, nthreads, index_t) { + const index_t w = index % out_w; + const index_t h = (index / out_w) % out_h; + const index_t d = (index / (out_h * out_w)) % out_d; + const index_t n = index / (out_d * out_h * out_w); + const index_t grid_offset = + n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW; + // get the corresponding input x, y, z co-ordinates from grid + T ix = grid[grid_offset]; + T iy = grid[grid_offset + grid_sCoor]; + T iz = grid[grid_offset + 2 * grid_sCoor]; + ix = ComputePositions(ix, in_w, padding_mode, align_corners); + iy = ComputePositions(iy, in_h, padding_mode, align_corners); + iz = ComputePositions(iz, in_d, padding_mode, align_corners); + // printf("ix: %f, iy: %f, iz: %f \n", ix, iy, iz); + if (interpolation_mode == Mode::bilinear) { + // get corner pixel values from (x, y, z) + // for 4d, we used north-east-south-west + // for 5d, we add top-bottom + index_t ix_tnw = static_cast(std::floor(ix)); + index_t iy_tnw = static_cast(std::floor(iy)); + index_t iz_tnw = static_cast(std::floor(iz)); + + index_t ix_tne = ix_tnw + 1; + index_t iy_tne = iy_tnw; + index_t iz_tne = iz_tnw; + + index_t ix_tsw = ix_tnw; + index_t iy_tsw = iy_tnw + 1; + index_t iz_tsw = iz_tnw; + + index_t ix_tse = ix_tnw + 1; + index_t iy_tse = iy_tnw + 1; + index_t iz_tse = iz_tnw; + + index_t ix_bnw = ix_tnw; + index_t iy_bnw = iy_tnw; + index_t iz_bnw = iz_tnw + 1; + + index_t ix_bne = ix_tnw + 1; + index_t iy_bne = iy_tnw; + index_t iz_bne = iz_tnw + 1; + + index_t ix_bsw = ix_tnw; + index_t iy_bsw = iy_tnw + 1; + index_t iz_bsw = iz_tnw + 1; + + index_t ix_bse = ix_tnw + 1; + index_t iy_bse = iy_tnw + 1; + index_t iz_bse = iz_tnw + 1; + + // get surfaces to each neighbor: + T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); + T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); + T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); + T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); + T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); + T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); + T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); + T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); + + auto inp_ptr_NC = input + n * inp_sN; + auto out_ptr_NCDHW = + output + n * out_sN + d * out_sD + h * out_sH + w * out_sW; + for (index_t c = 0; c < out_c; + ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { + *out_ptr_NCDHW = static_cast(0); + if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] * + tnw; + } + if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] * + tne; + } + if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] * + tsw; + } + if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] * + tse; + } + if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] * + bnw; + } + if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] * + bne; + } + if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] * + bsw; + } + if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] * + bse; + } + } + } else if (interpolation_mode == Mode::nearest) { + index_t ix_nearest = static_cast(std::round(ix)); + index_t iy_nearest = static_cast(std::round(iy)); + index_t iz_nearest = static_cast(std::round(iz)); + + // assign nearest neighor pixel value to output pixel + auto inp_ptr_NC = input + n * inp_sN; + auto out_ptr_NCDHW = + output + n * out_sN + d * out_sD + h * out_sH + w * out_sW; + for (index_t c = 0; c < out_c; + ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { + if (InBounds3D(iz_nearest, iy_nearest, ix_nearest, in_d, in_h, in_w)) { + *out_ptr_NCDHW = + inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH + + ix_nearest * inp_sW]; + } else { + *out_ptr_NCDHW = static_cast(0); + } + } + } + } +} + +std::vector +GridSample3DCUDAForward(const paddle::Tensor &x, const paddle::Tensor &grid, + const std::string &mode, + const std::string &padding_mode, bool align_corners) { + CHECK_INPUT_GPU(x); + CHECK_INPUT_GPU(grid); + PaddingMode enum_padding_mode; + Mode enum_mode; + if (padding_mode == "border") { + enum_padding_mode = PaddingMode::border; + } else if (padding_mode == "reflection") { + enum_padding_mode = PaddingMode::reflect; + } else { + enum_padding_mode = PaddingMode::zeros; + } + + if (mode == "nearest") { + enum_mode = Mode::nearest; + } else { + enum_mode = Mode::bilinear; + } + const int n = grid.shape()[0]; + const int out_d = grid.shape()[1]; + const int out_h = grid.shape()[2]; + const int out_w = grid.shape()[3]; + const int c = x.shape()[1]; + const int in_d = x.shape()[2]; + const int in_h = x.shape()[3]; + const int in_w = x.shape()[4]; + + auto output = paddle::full({n, c, out_d, out_h, out_w}, 0, + paddle::DataType::FLOAT32, paddle::GPUPlace()); + const int count = static_cast(n * out_d * out_h * out_w); + + int max_threads_per_block = 512; + int block_num = (count - 1) / max_threads_per_block + 1; + // printf("size: %d, %d, %d, %d, %d, %d \n", n, c, out_d, out_h, count, + // block_num); + GridSample3DCudaKernel + <<>>( + count, c, out_d, out_h, out_w, in_d, in_h, in_w, x.data(), + grid.data(), output.data(), enum_mode, + enum_padding_mode, align_corners); + + cudaError_t error_check; + error_check = cudaGetLastError(); + if (error_check != cudaSuccess) { + printf("%s\n", cudaGetErrorString(error_check)); + } + // printf("size: %d, %d, %d, %d, %d, %d \n", n, c, out_d, out_h, count, + // block_num); + return {output}; +} + +template +static __forceinline__ __device__ T UnnormalizeWithMask(T coord, int size, + bool align_corners, + T *grad_in) { + if (align_corners) { + *grad_in = static_cast(size - 1) / 2; + return ((coord + 1.f) / 2) * (size - 1); + } else { + *grad_in = static_cast(size) / 2; + return ((coord + 1.f) * size - 1) / 2; + } +} + +template +static __forceinline__ __device__ T ClipIndexesWithMask(T in, int clip_limit, + T *grad_in) { + if (in <= static_cast(0)) { + *grad_in = static_cast(0); + return static_cast(0); + } else { + T max = static_cast(clip_limit - 1); + if (in >= max) { + *grad_in = static_cast(0); + return max; + } else { + *grad_in = static_cast(1); + return in; + } + } +} + +template +static __forceinline__ __device__ T ReflectIndexesWithMask(T in, int twice_low, + int twice_high, + T *grad_in) { + if (twice_low == twice_high) { + *grad_in = static_cast(0); + return static_cast(0); + } + int grad_in_mult_; + T min = static_cast(twice_low) / 2; + T span = static_cast(twice_high - twice_low) / 2; + in = in - min; + if (in < static_cast(0)) { + grad_in_mult_ = -1; + in = -in; + } else { + grad_in_mult_ = 1; + } + T extra = fmod(in, span); + int flips = static_cast(floor(in / span)); + if (flips % 2 == 0) { + *grad_in = static_cast(grad_in_mult_); + return extra + min; + } else { + *grad_in = static_cast(-grad_in_mult_); + return span - extra + min; + } +} + +template +static __forceinline__ __device__ T +ComputePositionsWithMask(T coord, int size, PaddingMode padding_mode, + bool align_corners, T *grad_in) { + T grad_clip, grad_refl; + coord = UnnormalizeWithMask(coord, size, align_corners, grad_in); + if (padding_mode == PaddingMode::border) { + coord = ClipIndexesWithMask(coord, size, &grad_clip); + *grad_in = (*grad_in) * grad_clip; + } else if (padding_mode == PaddingMode::reflect) { + if (align_corners) { + coord = ReflectIndexesWithMask(coord, 0, 2 * (size - 1), &grad_refl); + } else { + coord = ReflectIndexesWithMask(coord, -1, 2 * size - 1, &grad_refl); + } + coord = ClipIndexesWithMask(coord, size, &grad_clip); + *grad_in = (*grad_in) * grad_refl * grad_clip; + } + + return coord; +} + +template +static __forceinline__ __device__ void +AtomicAdd3D(T *data, int64_t d, int64_t h, int64_t w, int64_t sD, int64_t sH, + int64_t sW, int64_t D, int64_t H, int64_t W, T delta) { + if (InBounds3D(d, h, w, D, H, W)) { + atomicAdd(data + d * sD + h * sH + w * sW, delta); + } +} + +template +__global__ void GridSample3DCudaBackwardKernel( + const index_t nthreads, const T *grad_output, const T *input, const T *grid, + index_t out_c, index_t out_d, index_t out_h, index_t out_w, index_t in_d, + index_t in_h, index_t in_w, T *grad_input, T *grad_grid, const Mode mode, + const PaddingMode padding_mode, bool align_corners) { + index_t inp_sW = 1; + index_t inp_sH = in_w; + index_t inp_sD = in_h * in_w; + index_t inp_sC = in_d * inp_sD; + index_t inp_sN = out_c * inp_sC; + + index_t grid_sCoor = 1; + index_t grid_sW = 3; + index_t grid_sH = out_w * grid_sW; + index_t grid_sD = out_h * grid_sH; + index_t grid_sN = out_d * grid_sD; + + index_t gOut_sW = 1; + index_t gOut_sH = out_w; + index_t gOut_sD = out_h * out_w; + index_t gOut_sC = out_d * gOut_sD; + index_t gOut_sN = out_c * gOut_sC; + + CUDA_KERNEL_LOOP_TYPE(index, nthreads, index_t) { + const index_t w = index % out_w; + const index_t h = (index / out_w) % out_h; + const index_t d = (index / (out_h * out_w)) % out_d; + const index_t n = index / (out_d * out_h * out_w); + const auto grid_offset = + n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW; + + // get the corresponding input x, y, z co-ordinates from grid + T ix = grid[grid_offset]; + T iy = grid[grid_offset + grid_sCoor]; + T iz = grid[grid_offset + 2 * grid_sCoor]; + + // multipliers for gradients on ix, iy, and iz + T gix_mult, giy_mult, giz_mult; + ix = ComputePositionsWithMask(ix, in_w, padding_mode, align_corners, + &gix_mult); + iy = ComputePositionsWithMask(iy, in_h, padding_mode, align_corners, + &giy_mult); + iz = ComputePositionsWithMask(iz, in_d, padding_mode, align_corners, + &giz_mult); + + if (mode == Mode::bilinear) { + // get corner pixel values from (x, y, z) + // for 4d, we used north-east-south-west + // for 5d, we add top-bottom + index_t ix_tnw = static_cast(std::floor(ix)); + index_t iy_tnw = static_cast(std::floor(iy)); + index_t iz_tnw = static_cast(std::floor(iz)); + + index_t ix_tne = ix_tnw + 1; + index_t iy_tne = iy_tnw; + index_t iz_tne = iz_tnw; + + index_t ix_tsw = ix_tnw; + index_t iy_tsw = iy_tnw + 1; + index_t iz_tsw = iz_tnw; + + index_t ix_tse = ix_tnw + 1; + index_t iy_tse = iy_tnw + 1; + index_t iz_tse = iz_tnw; + + index_t ix_bnw = ix_tnw; + index_t iy_bnw = iy_tnw; + index_t iz_bnw = iz_tnw + 1; + + index_t ix_bne = ix_tnw + 1; + index_t iy_bne = iy_tnw; + index_t iz_bne = iz_tnw + 1; + + index_t ix_bsw = ix_tnw; + index_t iy_bsw = iy_tnw + 1; + index_t iz_bsw = iz_tnw + 1; + + index_t ix_bse = ix_tnw + 1; + index_t iy_bse = iy_tnw + 1; + index_t iz_bse = iz_tnw + 1; + + // get surfaces to each neighbor: + T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); + T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); + T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); + T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); + T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); + T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); + T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); + T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); + + T gix = static_cast(0), giy = static_cast(0), + giz = static_cast(0); + index_t gOut_offset = + n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; + index_t inp_offset_NC = n * inp_sN; + T *gInp_ptr_NC = grad_input + n * inp_sN; + for (index_t c = 0; c < out_c; ++c, gOut_offset += gOut_sC, + gInp_ptr_NC += inp_sC, inp_offset_NC += inp_sC) { + T gOut = grad_output[gOut_offset]; + + AtomicAdd3D(gInp_ptr_NC, iz_tnw, iy_tnw, ix_tnw, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, tnw * gOut); + AtomicAdd3D(gInp_ptr_NC, iz_tne, iy_tne, ix_tne, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, tne * gOut); + AtomicAdd3D(gInp_ptr_NC, iz_tsw, iy_tsw, ix_tsw, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, tsw * gOut); + AtomicAdd3D(gInp_ptr_NC, iz_tse, iy_tse, ix_tse, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, tse * gOut); + AtomicAdd3D(gInp_ptr_NC, iz_bnw, iy_bnw, ix_bnw, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, bnw * gOut); + AtomicAdd3D(gInp_ptr_NC, iz_bne, iy_bne, ix_bne, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, bne * gOut); + AtomicAdd3D(gInp_ptr_NC, iz_bsw, iy_bsw, ix_bsw, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, bsw * gOut); + AtomicAdd3D(gInp_ptr_NC, iz_bse, iy_bse, ix_bse, inp_sD, inp_sH, inp_sW, + in_d, in_h, in_w, bse * gOut); + + // calculate grad_grid + if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) { + T tnw_val = input[inp_offset_NC + iz_tnw * inp_sD + iy_tnw * inp_sH + + ix_tnw * inp_sW]; + gix -= tnw_val * (iy_bse - iy) * (iz_bse - iz) * gOut; + giy -= tnw_val * (ix_bse - ix) * (iz_bse - iz) * gOut; + giz -= tnw_val * (ix_bse - ix) * (iy_bse - iy) * gOut; + } + if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) { + T tne_val = input[inp_offset_NC + iz_tne * inp_sD + iy_tne * inp_sH + + ix_tne * inp_sW]; + gix += tne_val * (iy_bsw - iy) * (iz_bsw - iz) * gOut; + giy -= tne_val * (ix - ix_bsw) * (iz_bsw - iz) * gOut; + giz -= tne_val * (ix - ix_bsw) * (iy_bsw - iy) * gOut; + } + if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) { + T tsw_val = input[inp_offset_NC + iz_tsw * inp_sD + iy_tsw * inp_sH + + ix_tsw * inp_sW]; + gix -= tsw_val * (iy - iy_bne) * (iz_bne - iz) * gOut; + giy += tsw_val * (ix_bne - ix) * (iz_bne - iz) * gOut; + giz -= tsw_val * (ix_bne - ix) * (iy - iy_bne) * gOut; + } + if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) { + T tse_val = input[inp_offset_NC + iz_tse * inp_sD + iy_tse * inp_sH + + ix_tse * inp_sW]; + gix += tse_val * (iy - iy_bnw) * (iz_bnw - iz) * gOut; + giy += tse_val * (ix - ix_bnw) * (iz_bnw - iz) * gOut; + giz -= tse_val * (ix - ix_bnw) * (iy - iy_bnw) * gOut; + } + if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) { + T bnw_val = input[inp_offset_NC + iz_bnw * inp_sD + iy_bnw * inp_sH + + ix_bnw * inp_sW]; + gix -= bnw_val * (iy_tse - iy) * (iz - iz_tse) * gOut; + giy -= bnw_val * (ix_tse - ix) * (iz - iz_tse) * gOut; + giz += bnw_val * (ix_tse - ix) * (iy_tse - iy) * gOut; + } + if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) { + T bne_val = input[inp_offset_NC + iz_bne * inp_sD + iy_bne * inp_sH + + ix_bne * inp_sW]; + gix += bne_val * (iy_tsw - iy) * (iz - iz_tsw) * gOut; + giy -= bne_val * (ix - ix_tsw) * (iz - iz_tsw) * gOut; + giz += bne_val * (ix - ix_tsw) * (iy_tsw - iy) * gOut; + } + if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) { + T bsw_val = input[inp_offset_NC + iz_bsw * inp_sD + iy_bsw * inp_sH + + ix_bsw * inp_sW]; + gix -= bsw_val * (iy - iy_tne) * (iz - iz_tne) * gOut; + giy += bsw_val * (ix_tne - ix) * (iz - iz_tne) * gOut; + giz += bsw_val * (ix_tne - ix) * (iy - iy_tne) * gOut; + } + if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) { + T bse_val = input[inp_offset_NC + iz_bse * inp_sD + iy_bse * inp_sH + + ix_bse * inp_sW]; + gix += bse_val * (iy - iy_tnw) * (iz - iz_tnw) * gOut; + giy += bse_val * (ix - ix_tnw) * (iz - iz_tnw) * gOut; + giz += bse_val * (ix - ix_tnw) * (iy - iy_tnw) * gOut; + } + } + if (grad_grid != nullptr) { + T *gGrid_ptr_NDHW = grad_grid + index * grid_sW; + gGrid_ptr_NDHW[0] = gix_mult * gix; + gGrid_ptr_NDHW[1] = giy_mult * giy; + gGrid_ptr_NDHW[2] = giz_mult * giz; + } + } else if (mode == Mode::nearest) { + auto ix_nearest = static_cast(std::round(ix)); + auto iy_nearest = static_cast(std::round(iy)); + auto iz_nearest = static_cast(std::round(iz)); + + // assign nearest neighor pixel value to output pixel + index_t gOut_offset = + n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; + T *gInp_ptr_NC = grad_input + n * inp_sN; + for (index_t c = 0; c < out_c; + ++c, gOut_offset += gOut_sC, gInp_ptr_NC += inp_sC) { + AtomicAdd3D(gInp_ptr_NC, iz_nearest, iy_nearest, ix_nearest, inp_sD, + inp_sH, inp_sW, in_d, in_h, in_w, grad_output[gOut_offset]); + } + if (grad_grid != nullptr) { + T *gGrid_ptr_NDHW = grad_grid + index * grid_sW; + gGrid_ptr_NDHW[0] = static_cast(0); + gGrid_ptr_NDHW[1] = static_cast(0); + gGrid_ptr_NDHW[2] = static_cast(0); + } + } + } +} + +std::vector +GridSample3DCUDABackward(const paddle::Tensor &x, const paddle::Tensor &grid, + const paddle::Tensor &grad_out, + const std::string &mode, + const std::string &padding_mode, bool align_corners) { + PaddingMode enum_padding_mode; + Mode enum_mode; + if (padding_mode == "border") { + enum_padding_mode = PaddingMode::border; + } else if (padding_mode == "reflection") { + enum_padding_mode = PaddingMode::reflect; + } else { + enum_padding_mode = PaddingMode::zeros; + } + + if (mode == "nearest") { + enum_mode = Mode::nearest; + } else { + enum_mode = Mode::bilinear; + } + + const int out_d = grid.shape()[1]; + const int out_h = grid.shape()[2]; + const int out_w = grid.shape()[3]; + const int n = x.shape()[0]; + const int c = x.shape()[1]; + const int in_d = x.shape()[2]; + const int in_h = x.shape()[3]; + const int in_w = x.shape()[4]; + + auto grid_grad_output = + paddle::empty({n, out_d, out_h, out_w, 3}, paddle::DataType::FLOAT32, + paddle::GPUPlace()); + auto x_grad_output = + paddle::full({n, c, in_d, in_h, in_w}, 0, paddle::DataType::FLOAT32, + paddle::GPUPlace()); + + const int count = static_cast(n * out_d * out_h * out_w); + + int max_threads_per_block = 512; + int block_num = (count - 1) / max_threads_per_block + 1; + + GridSample3DCudaBackwardKernel + <<>>( + count, grad_out.data(), x.data(), grid.data(), c, + out_d, out_h, out_w, in_d, in_h, in_w, x_grad_output.data(), + grid_grad_output.data(), enum_mode, enum_padding_mode, + align_corners); + + return {x_grad_output}; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h new file mode 100755 index 0000000000..9374cb75e8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#pragma once + +#include +#include +#include + +namespace ultrainfer { +namespace paddle_custom_ops { + +#define HOST_DEVICE __host__ __device__ +#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__ + +enum class Mode { bilinear, nearest }; + +enum class PaddingMode { zeros, border, reflect }; + +} // namespace paddle_custom_ops +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc new file mode 100755 index 0000000000..4404e5f345 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc @@ -0,0 +1,272 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +3D Rotated IoU Calculation (CPU) +Written by Shaoshuai Shi +All Rights Reserved 2020. +*/ + +#include "iou3d_cpu.h" +#include +#include +#include + +namespace ultrainfer { +namespace paddle_custom_ops { + +static inline float min(float a, float b) { return a > b ? b : a; } + +static inline float max(float a, float b) { return a > b ? a : b; } + +#if defined(_WIN32) +#if defined(EPS) +#undef EPS +#endif +#define EPS 1e-8 +#else +static const float EPS = 1e-8; +#endif + +struct Point { + float x, y; + Point() {} + Point(double _x, double _y) { x = _x, y = _y; } + + void set(float _x, float _y) { + x = _x; + y = _y; + } + + Point operator+(const Point &b) const { return Point(x + b.x, y + b.y); } + + Point operator-(const Point &b) const { return Point(x - b.x, y - b.y); } +}; + +static inline float cross(const Point &a, const Point &b) { + return a.x * b.y - a.y * b.x; +} + +static inline float cross(const Point &p1, const Point &p2, const Point &p0) { + return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y); +} + +static inline int check_rect_cross(const Point &p1, const Point &p2, + const Point &q1, const Point &q2) { + int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) && + min(q1.x, q2.x) <= max(p1.x, p2.x) && + min(p1.y, p2.y) <= max(q1.y, q2.y) && + min(q1.y, q2.y) <= max(p1.y, p2.y); + return ret; +} + +static inline int check_in_box2d(const float *box, const Point &p) { + // params: (7) [x, y, z, dx, dy, dz, heading] + const float MARGIN = 1e-2; + + float center_x = box[0], center_y = box[1]; + float angle_cos = cos(-box[6]), + angle_sin = + sin(-box[6]); // rotate the point in the opposite direction of box + float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin); + float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos; + + return (fabs(rot_x) < box[3] / 2 + MARGIN && + fabs(rot_y) < box[4] / 2 + MARGIN); +} + +static inline int intersection(const Point &p1, const Point &p0, + const Point &q1, const Point &q0, Point &ans) { + // fast exclusion + if (check_rect_cross(p0, p1, q0, q1) == 0) + return 0; + + // check cross standing + float s1 = cross(q0, p1, p0); + float s2 = cross(p1, q1, p0); + float s3 = cross(p0, q1, q0); + float s4 = cross(q1, p1, q0); + + if (!(s1 * s2 > 0 && s3 * s4 > 0)) + return 0; + + // calculate intersection of two lines + float s5 = cross(q1, p1, p0); + if (fabs(s5 - s1) > EPS) { + ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1); + ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1); + + } else { + float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y; + float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y; + float D = a0 * b1 - a1 * b0; + + ans.x = (b0 * c1 - b1 * c0) / D; + ans.y = (a1 * c0 - a0 * c1) / D; + } + + return 1; +} + +static inline void rotate_around_center(const Point ¢er, + const float angle_cos, + const float angle_sin, Point &p) { + float new_x = + (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x; + float new_y = + (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; + p.set(new_x, new_y); +} + +static inline int point_cmp(const Point &a, const Point &b, + const Point ¢er) { + return atan2(a.y - center.y, a.x - center.x) > + atan2(b.y - center.y, b.x - center.x); +} + +static inline float box_overlap(const float *box_a, const float *box_b) { + // params: box_a (7) [x, y, z, dx, dy, dz, heading] + // params: box_b (7) [x, y, z, dx, dy, dz, heading] + + // float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = + // box_a[3], a_angle = box_a[4]; + // float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = + // box_b[3], b_angle = box_b[4]; + float a_angle = box_a[6], b_angle = box_b[6]; + float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2, + a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2; + float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half; + float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half; + float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half; + float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half; + + Point center_a(box_a[0], box_a[1]); + Point center_b(box_b[0], box_b[1]); + + Point box_a_corners[5]; + box_a_corners[0].set(a_x1, a_y1); + box_a_corners[1].set(a_x2, a_y1); + box_a_corners[2].set(a_x2, a_y2); + box_a_corners[3].set(a_x1, a_y2); + + Point box_b_corners[5]; + box_b_corners[0].set(b_x1, b_y1); + box_b_corners[1].set(b_x2, b_y1); + box_b_corners[2].set(b_x2, b_y2); + box_b_corners[3].set(b_x1, b_y2); + + // get oriented corners + float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle); + float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle); + + for (int k = 0; k < 4; k++) { + rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]); + rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]); + } + + box_a_corners[4] = box_a_corners[0]; + box_b_corners[4] = box_b_corners[0]; + + // get intersection of lines + Point cross_points[16]; + Point poly_center; + int cnt = 0, flag = 0; + + poly_center.set(0, 0); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + flag = intersection(box_a_corners[i + 1], box_a_corners[i], + box_b_corners[j + 1], box_b_corners[j], + cross_points[cnt]); + if (flag) { + poly_center = poly_center + cross_points[cnt]; + cnt++; + } + } + } + + // check corners + for (int k = 0; k < 4; k++) { + if (check_in_box2d(box_a, box_b_corners[k])) { + poly_center = poly_center + box_b_corners[k]; + cross_points[cnt] = box_b_corners[k]; + cnt++; + } + if (check_in_box2d(box_b, box_a_corners[k])) { + poly_center = poly_center + box_a_corners[k]; + cross_points[cnt] = box_a_corners[k]; + cnt++; + } + } + + poly_center.x /= cnt; + poly_center.y /= cnt; + + // sort the points of polygon + Point temp; + for (int j = 0; j < cnt - 1; j++) { + for (int i = 0; i < cnt - j - 1; i++) { + if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) { + temp = cross_points[i]; + cross_points[i] = cross_points[i + 1]; + cross_points[i + 1] = temp; + } + } + } + + // get the overlap areas + float area = 0; + for (int k = 0; k < cnt - 1; k++) { + area += cross(cross_points[k] - cross_points[0], + cross_points[k + 1] - cross_points[0]); + } + + return fabs(area) / 2.0; +} + +static inline float iou_bev(const float *box_a, const float *box_b) { + // params: box_a (7) [x, y, z, dx, dy, dz, heading] + // params: box_b (7) [x, y, z, dx, dy, dz, heading] + float sa = box_a[3] * box_a[4]; + float sb = box_b[3] * box_b[4]; + float s_overlap = box_overlap(box_a, box_b); + return s_overlap / fmaxf(sa + sb - s_overlap, EPS); +} + +int boxes_iou_bev_cpu(paddle::Tensor boxes_a_tensor, + paddle::Tensor boxes_b_tensor, + paddle::Tensor ans_iou_tensor) { + // params boxes_a_tensor: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b_tensor: (M, 7) [x, y, z, dx, dy, dz, heading] + // params ans_iou_tensor: (N, M) + + // CHECK_CONTIGUOUS(boxes_a_tensor); + // CHECK_CONTIGUOUS(boxes_b_tensor); + + int num_boxes_a = boxes_a_tensor.shape()[0]; + int num_boxes_b = boxes_b_tensor.shape()[0]; + const float *boxes_a = boxes_a_tensor.data(); + const float *boxes_b = boxes_b_tensor.data(); + float *ans_iou = ans_iou_tensor.data(); + + for (int i = 0; i < num_boxes_a; i++) { + for (int j = 0; j < num_boxes_b; j++) { + ans_iou[i * num_boxes_b + j] = iou_bev(boxes_a + i * 7, boxes_b + j * 7); + } + } + return 1; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h new file mode 100755 index 0000000000..09fe5b9ccc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h @@ -0,0 +1,35 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace paddle_custom_ops { + +ULTRAINFER_DECL int boxes_iou_bev_cpu(paddle::Tensor boxes_a_tensor, + paddle::Tensor boxes_b_tensor, + paddle::Tensor ans_iou_tensor); + +} // namespace paddle_custom_ops +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc new file mode 100755 index 0000000000..627e603cff --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc @@ -0,0 +1,241 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) +Written by Shaoshuai Shi +All Rights Reserved 2019-2020. +*/ + +#if defined(WITH_GPU) + +#include +#include + +#include "iou3d_nms.h" + +namespace ultrainfer { +namespace paddle_custom_ops { + +#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") +// #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) +static inline int DIVUP(const int m, const int n) { + return ((m) / (n) + ((m) % (n) > 0)); +} + +#define CHECK_ERROR(ans) \ + { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, + bool abort = true) { + if (code != cudaSuccess) { + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); + if (abort) + exit(code); + } +} + +#define D(x) \ + PD_THROW('\n', x, \ + "\n--------------------------------- where is the error ? " \ + "---------------------------------------\n"); + +static const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; + +void boxesoverlapLauncher(const int num_a, const float *boxes_a, + const int num_b, const float *boxes_b, + float *ans_overlap); +void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, + const float *boxes_b, float *ans_iou); +void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num, + float nms_overlap_thresh); +void nmsNormalLauncher(const float *boxes, unsigned long long *mask, + int boxes_num, float nms_overlap_thresh); + +int boxes_overlap_bev_gpu(paddle::Tensor boxes_a, paddle::Tensor boxes_b, + paddle::Tensor ans_overlap) { + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] + // params ans_overlap: (N, M) + + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_overlap); + + int num_a = boxes_a.shape()[0]; + int num_b = boxes_b.shape()[0]; + + const float *boxes_a_data = boxes_a.data(); + const float *boxes_b_data = boxes_b.data(); + float *ans_overlap_data = ans_overlap.data(); + + boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data, + ans_overlap_data); + + return 1; +} + +int boxes_iou_bev_gpu(paddle::Tensor boxes_a, paddle::Tensor boxes_b, + paddle::Tensor ans_iou) { + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] + // params ans_overlap: (N, M) + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_iou); + + int num_a = boxes_a.shape()[0]; + int num_b = boxes_b.shape()[0]; + + const float *boxes_a_data = boxes_a.data(); + const float *boxes_b_data = boxes_b.data(); + float *ans_iou_data = ans_iou.data(); + + boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data); + + return 1; +} + +std::vector nms_gpu(const paddle::Tensor &boxes, + float nms_overlap_thresh) { + // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading] + // params keep: (N) + CHECK_INPUT(boxes); + // CHECK_CONTIGUOUS(keep); + auto keep = paddle::empty({boxes.shape()[0]}, paddle::DataType::INT32, + paddle::CPUPlace()); + auto num_to_keep_tensor = + paddle::empty({1}, paddle::DataType::INT32, paddle::CPUPlace()); + int *num_to_keep_data = num_to_keep_tensor.data(); + + int boxes_num = boxes.shape()[0]; + const float *boxes_data = boxes.data(); + int *keep_data = keep.data(); + + int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + + unsigned long long *mask_data = NULL; + CHECK_ERROR(cudaMalloc((void **)&mask_data, + boxes_num * col_blocks * sizeof(unsigned long long))); + nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); + + // unsigned long long mask_cpu[boxes_num * col_blocks]; + // unsigned long long *mask_cpu = new unsigned long long [boxes_num * + // col_blocks]; + std::vector mask_cpu(boxes_num * col_blocks); + + // printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); + CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, + boxes_num * col_blocks * sizeof(unsigned long long), + cudaMemcpyDeviceToHost)); + + cudaFree(mask_data); + + // WARN(qiuyanjun): codes below will throw a compile error on windows with + // msvc. Thus, we choosed to use std::vectored to store the result instead. + // unsigned long long remv_cpu[col_blocks]; + // memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); + std::vector remv_cpu(col_blocks, 0); + + int num_to_keep = 0; + + for (int i = 0; i < boxes_num; i++) { + int nblock = i / THREADS_PER_BLOCK_NMS; + int inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_cpu[nblock] & (1ULL << inblock))) { + keep_data[num_to_keep++] = i; + unsigned long long *p = &mask_cpu[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv_cpu[j] |= p[j]; + } + } + } + + num_to_keep_data[0] = num_to_keep; + + if (cudaSuccess != cudaGetLastError()) + printf("Error!\n"); + + return {keep, num_to_keep_tensor}; +} + +int nms_normal_gpu(paddle::Tensor boxes, paddle::Tensor keep, + float nms_overlap_thresh) { + // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading] + // params keep: (N) + + CHECK_INPUT(boxes); + // CHECK_CONTIGUOUS(keep); + + int boxes_num = boxes.shape()[0]; + const float *boxes_data = boxes.data(); + // WARN(qiuyanjun): long type for Tensor::data() API is not exported by + // paddle, it will raise some link error on windows with msvc. Please check: + // https://github.com/PaddlePaddle/Paddle/blob/release/2.5/paddle/phi/api/lib/tensor.cc +#if defined(_WIN32) + int *keep_data = keep.data(); +#else + long *keep_data = keep.data(); +#endif + + int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + + unsigned long long *mask_data = NULL; + CHECK_ERROR(cudaMalloc((void **)&mask_data, + boxes_num * col_blocks * sizeof(unsigned long long))); + nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); + + // unsigned long long mask_cpu[boxes_num * col_blocks]; + // unsigned long long *mask_cpu = new unsigned long long [boxes_num * + // col_blocks]; + std::vector mask_cpu(boxes_num * col_blocks); + + // printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); + CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, + boxes_num * col_blocks * sizeof(unsigned long long), + cudaMemcpyDeviceToHost)); + + cudaFree(mask_data); + + // WARN(qiuyanjun): codes below will throw a compile error on windows with + // msvc. Thus, we choosed to use std::vectored to store the result instead. + // unsigned long long remv_cpu[col_blocks]; + // memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); + std::vector remv_cpu(col_blocks, 0); + + int num_to_keep = 0; + + for (int i = 0; i < boxes_num; i++) { + int nblock = i / THREADS_PER_BLOCK_NMS; + int inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_cpu[nblock] & (1ULL << inblock))) { + keep_data[num_to_keep++] = i; + unsigned long long *p = &mask_cpu[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv_cpu[j] |= p[j]; + } + } + } + if (cudaSuccess != cudaGetLastError()) + printf("Error!\n"); + + return num_to_keep; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h new file mode 100755 index 0000000000..d9c9485366 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h @@ -0,0 +1,45 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +#include "ultrainfer/utils/utils.h" + +#if defined(WITH_GPU) +namespace ultrainfer { +namespace paddle_custom_ops { + +ULTRAINFER_DECL int boxes_overlap_bev_gpu(paddle::Tensor boxes_a, + paddle::Tensor boxes_b, + paddle::Tensor ans_overlap); +ULTRAINFER_DECL int boxes_iou_bev_gpu(paddle::Tensor boxes_a, + paddle::Tensor boxes_b, + paddle::Tensor ans_iou); +ULTRAINFER_DECL std::vector nms_gpu(const paddle::Tensor &boxes, + float nms_overlap_thresh); +ULTRAINFER_DECL int nms_normal_gpu(paddle::Tensor boxes, paddle::Tensor keep, + float nms_overlap_thresh); + +} // namespace paddle_custom_ops +} // namespace ultrainfer + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc new file mode 100755 index 0000000000..ac0df58a00 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +#include + +#include "iou3d_cpu.h" +#include "iou3d_nms.h" + +namespace ultrainfer { +namespace paddle_custom_ops { + +std::vector> +NMSInferShape(std::vector boxes_shape) { + int64_t keep_num = 1; + return {{boxes_shape[0]}, {keep_num}}; +} + +std::vector NMSInferDtype(paddle::DataType boxes_dtype) { + return {paddle::DataType::INT64, paddle::DataType::INT64}; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer + +#if defined(WITH_GPU) + +PD_BUILD_OP(nms_gpu) + .Inputs({"boxes"}) + .Outputs({"keep", "num_to_keep"}) + .Attrs({"nms_overlap_thresh: float"}) + .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::nms_gpu)) + .SetInferDtypeFn( + PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::NMSInferDtype)) + .SetInferShapeFn( + PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::NMSInferShape)); + +#endif diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu new file mode 100755 index 0000000000..fee08b8dfb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu @@ -0,0 +1,588 @@ +/* +3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) +Written by Shaoshuai Shi +All Rights Reserved 2019-2020. +*/ +#include + +namespace ultrainfer { +namespace paddle_custom_ops { + +// #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) +__host__ __device__ static inline int DIVUP(const int m, const int n) { + return ((m) / (n) + ((m) % (n) > 0)); +} + +static const int THREADS_PER_BLOCK = 16; +static const int THREADS_PER_BLOCK_NMS = sizeof(int64_t) * 8; +#if defined(_WIN32) +#if defined(EPS) +#undef EPS +#endif +#define EPS 1e-8 +#else +static const float EPS = 1e-8; +#endif + +struct Point { + float x, y; + __device__ Point() {} + __device__ Point(double _x, double _y) { x = _x, y = _y; } + + __device__ void set(float _x, float _y) { + x = _x; + y = _y; + } + + __device__ Point operator+(const Point &b) const { + return Point(x + b.x, y + b.y); + } + + __device__ Point operator-(const Point &b) const { + return Point(x - b.x, y - b.y); + } +}; + +__device__ inline float cross(const Point &a, const Point &b) { + return a.x * b.y - a.y * b.x; +} + +__device__ inline float cross(const Point &p1, const Point &p2, + const Point &p0) { + return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y); +} + +__device__ int check_rect_cross(const Point &p1, const Point &p2, + const Point &q1, const Point &q2) { + int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) && + min(q1.x, q2.x) <= max(p1.x, p2.x) && + min(p1.y, p2.y) <= max(q1.y, q2.y) && + min(q1.y, q2.y) <= max(p1.y, p2.y); + return ret; +} + +__device__ inline int check_in_box2d(const float *box, const Point &p) { + // params: (7) [x, y, z, dx, dy, dz, heading] + const float MARGIN = 1e-2; + // Align with the setting of mmdet3d + // const float MARGIN = 1e-5; + + float center_x = box[0], center_y = box[1]; + float angle_cos = cos(-box[6]), + angle_sin = + sin(-box[6]); // rotate the point in the opposite direction of box + float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin); + float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos; + + return (fabs(rot_x) < box[3] / 2 + MARGIN && + fabs(rot_y) < box[4] / 2 + MARGIN); + // Align with the implement of mmdet3d + // float rot_x = + // (p.x - center_x) * angle_cos + (p.y - center_y) * angle_sin + center_x; + // float rot_y = + // -(p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + + // center_y; + // float x1 = center_x - box[3] / 2; + // float x2 = center_x + box[3] / 2; + // float y1 = center_y - box[4] / 2; + // float y2 = center_y + box[4] / 2; + // return (rot_x > x1 - MARGIN && rot_x < x2 + MARGIN && rot_y > y1 - MARGIN + // && + // rot_y < y2 + MARGIN); +} + +__device__ inline int intersection(const Point &p1, const Point &p0, + const Point &q1, const Point &q0, + Point &ans) { + // fast exclusion + if (check_rect_cross(p0, p1, q0, q1) == 0) + return 0; + + // check cross standing + float s1 = cross(q0, p1, p0); + float s2 = cross(p1, q1, p0); + float s3 = cross(p0, q1, q0); + float s4 = cross(q1, p1, q0); + + if (!(s1 * s2 > 0 && s3 * s4 > 0)) + return 0; + + // calculate intersection of two lines + float s5 = cross(q1, p1, p0); + if (fabs(s5 - s1) > EPS) { + ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1); + ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1); + + } else { + float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y; + float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y; + float D = a0 * b1 - a1 * b0; + + ans.x = (b0 * c1 - b1 * c0) / D; + ans.y = (a1 * c0 - a0 * c1) / D; + } + + return 1; +} + +__device__ inline void rotate_around_center(const Point ¢er, + const float angle_cos, + const float angle_sin, Point &p) { + // float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * + // (-angle_sin) + center.x; + // float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + + // center.y; + // p.set(new_x, new_y); + // Aligh with the implement of mmdet3d + float new_x = + (p.x - center.x) * angle_cos + (p.y - center.y) * angle_sin + center.x; + float new_y = + -(p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; + p.set(new_x, new_y); +} + +__device__ inline int point_cmp(const Point &a, const Point &b, + const Point ¢er) { + return atan2(a.y - center.y, a.x - center.x) > + atan2(b.y - center.y, b.x - center.x); +} + +__device__ inline float box_overlap(const float *box_a, const float *box_b) { + // params box_a: [x, y, z, dx, dy, dz, heading] + // params box_b: [x, y, z, dx, dy, dz, heading] + + float a_angle = box_a[6], b_angle = box_b[6]; + float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2, + a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2; + float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half; + float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half; + float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half; + float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half; + + Point center_a(box_a[0], box_a[1]); + Point center_b(box_b[0], box_b[1]); + +#ifdef DEBUG + printf( + "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n", + a_x1, a_y1, a_x2, a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle); + printf("center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y, + center_b.x, center_b.y); +#endif + + Point box_a_corners[5]; + box_a_corners[0].set(a_x1, a_y1); + box_a_corners[1].set(a_x2, a_y1); + box_a_corners[2].set(a_x2, a_y2); + box_a_corners[3].set(a_x1, a_y2); + + Point box_b_corners[5]; + box_b_corners[0].set(b_x1, b_y1); + box_b_corners[1].set(b_x2, b_y1); + box_b_corners[2].set(b_x2, b_y2); + box_b_corners[3].set(b_x1, b_y2); + + // get oriented corners + float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle); + float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle); + + for (int k = 0; k < 4; k++) { +#ifdef DEBUG + printf("before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, + box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, + box_b_corners[k].y); +#endif + rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]); + rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]); +#ifdef DEBUG + printf("corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x, + box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y); +#endif + } + + box_a_corners[4] = box_a_corners[0]; + box_b_corners[4] = box_b_corners[0]; + + // get intersection of lines + Point cross_points[16]; + Point poly_center; + int cnt = 0, flag = 0; + + poly_center.set(0, 0); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + flag = intersection(box_a_corners[i + 1], box_a_corners[i], + box_b_corners[j + 1], box_b_corners[j], + cross_points[cnt]); + if (flag) { + poly_center = poly_center + cross_points[cnt]; + cnt++; +#ifdef DEBUG + printf( + "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, " + "%.3f)->(%.3f, %.3f) \n", + cross_points[cnt - 1].x, cross_points[cnt - 1].y, + box_a_corners[i].x, box_a_corners[i].y, box_a_corners[i + 1].x, + box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y, + box_b_corners[i + 1].x, box_b_corners[i + 1].y); +#endif + } + } + } + + // check corners + for (int k = 0; k < 4; k++) { + if (check_in_box2d(box_a, box_b_corners[k])) { + poly_center = poly_center + box_b_corners[k]; + cross_points[cnt] = box_b_corners[k]; + cnt++; +#ifdef DEBUG + printf("b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x, + cross_points[cnt - 1].y); +#endif + } + if (check_in_box2d(box_b, box_a_corners[k])) { + poly_center = poly_center + box_a_corners[k]; + cross_points[cnt] = box_a_corners[k]; + cnt++; +#ifdef DEBUG + printf("a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x, + cross_points[cnt - 1].y); +#endif + } + } + + poly_center.x /= cnt; + poly_center.y /= cnt; + + // sort the points of polygon + Point temp; + for (int j = 0; j < cnt - 1; j++) { + for (int i = 0; i < cnt - j - 1; i++) { + if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) { + temp = cross_points[i]; + cross_points[i] = cross_points[i + 1]; + cross_points[i + 1] = temp; + } + } + } + +#ifdef DEBUG + printf("cnt=%d\n", cnt); + for (int i = 0; i < cnt; i++) { + printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x, + cross_points[i].y); + } +#endif + + // get the overlap areas + float area = 0; + for (int k = 0; k < cnt - 1; k++) { + area += cross(cross_points[k] - cross_points[0], + cross_points[k + 1] - cross_points[0]); + } + + return fabs(area) / 2.0; +} + +__device__ inline float iou_bev(const float *box_a, const float *box_b) { + // params box_a: [x, y, z, dx, dy, dz, heading] + // params box_b: [x, y, z, dx, dy, dz, heading] + float sa = box_a[3] * box_a[4]; + float sb = box_b[3] * box_b[4]; + float s_overlap = box_overlap(box_a, box_b); + return s_overlap / fmaxf(sa + sb - s_overlap, EPS); +} + +__global__ void boxes_overlap_kernel(const int num_a, const float *boxes_a, + const int num_b, const float *boxes_b, + float *ans_overlap) { + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] + const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; + const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; + + if (a_idx >= num_a || b_idx >= num_b) { + return; + } + const float *cur_box_a = boxes_a + a_idx * 7; + const float *cur_box_b = boxes_b + b_idx * 7; + float s_overlap = box_overlap(cur_box_a, cur_box_b); + ans_overlap[a_idx * num_b + b_idx] = s_overlap; +} + +__global__ void boxes_iou_bev_kernel(const int num_a, const float *boxes_a, + const int num_b, const float *boxes_b, + float *ans_iou) { + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] + const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; + const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; + + if (a_idx >= num_a || b_idx >= num_b) { + return; + } + + const float *cur_box_a = boxes_a + a_idx * 7; + const float *cur_box_b = boxes_b + b_idx * 7; + float cur_iou_bev = iou_bev(cur_box_a, cur_box_b); + ans_iou[a_idx * num_b + b_idx] = cur_iou_bev; +} + +__global__ void nms_kernel(const int boxes_num, const float nms_overlap_thresh, + const float *boxes, unsigned long long *mask) { + // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading] + // params: mask (N, N/THREADS_PER_BLOCK_NMS) + + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + + __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 7 + 0] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0]; + block_boxes[threadIdx.x * 7 + 1] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1]; + block_boxes[threadIdx.x * 7 + 2] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2]; + block_boxes[threadIdx.x * 7 + 3] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3]; + block_boxes[threadIdx.x * 7 + 4] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4]; + block_boxes[threadIdx.x * 7 + 5] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5]; + block_boxes[threadIdx.x * 7 + 6] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const float *cur_box = boxes + cur_box_idx * 7; + + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh) { + t |= 1ULL << i; + } + } + int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +__device__ inline float iou_normal(float const *const a, float const *const b) { + // params: a: [x, y, z, dx, dy, dz, heading] + // params: b: [x, y, z, dx, dy, dz, heading] + + float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2), + right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2); + float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2), + bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2); + float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f); + float interS = width * height; + float Sa = a[3] * a[4]; + float Sb = b[3] * b[4]; + return interS / fmaxf(Sa + Sb - interS, EPS); +} + +__global__ void nms_normal_kernel(const int boxes_num, + const float nms_overlap_thresh, + const float *boxes, + unsigned long long *mask) { + // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading] + // params: mask (N, N/THREADS_PER_BLOCK_NMS) + + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + + __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 7 + 0] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0]; + block_boxes[threadIdx.x * 7 + 1] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1]; + block_boxes[threadIdx.x * 7 + 2] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2]; + block_boxes[threadIdx.x * 7 + 3] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3]; + block_boxes[threadIdx.x * 7 + 4] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4]; + block_boxes[threadIdx.x * 7 + 5] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5]; + block_boxes[threadIdx.x * 7 + 6] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const float *cur_box = boxes + cur_box_idx * 7; + + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (iou_normal(cur_box, block_boxes + i * 7) > nms_overlap_thresh) { + t |= 1ULL << i; + } + } + int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +void boxesoverlapLauncher(const int num_a, const float *boxes_a, + const int num_b, const float *boxes_b, + float *ans_overlap) { + dim3 blocks( + DIVUP(num_b, THREADS_PER_BLOCK), + DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); + + boxes_overlap_kernel<<>>(num_a, boxes_a, num_b, boxes_b, + ans_overlap); +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} + +void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, + const float *boxes_b, float *ans_iou) { + dim3 blocks( + DIVUP(num_b, THREADS_PER_BLOCK), + DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); + + boxes_iou_bev_kernel<<>>(num_a, boxes_a, num_b, boxes_b, + ans_iou); +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} + +void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num, + float nms_overlap_thresh) { + dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), + DIVUP(boxes_num, THREADS_PER_BLOCK_NMS)); + dim3 threads(THREADS_PER_BLOCK_NMS); + nms_kernel<<>>(boxes_num, nms_overlap_thresh, boxes, mask); +} + +void nmsNormalLauncher(const float *boxes, unsigned long long *mask, + int boxes_num, float nms_overlap_thresh) { + dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), + DIVUP(boxes_num, THREADS_PER_BLOCK_NMS)); + dim3 threads(THREADS_PER_BLOCK_NMS); + nms_normal_kernel<<>>(boxes_num, nms_overlap_thresh, boxes, + mask); +} + +__global__ void nms_kernel_centerpoint(const int num_bboxes, + const int num_bboxes_for_nms, + const float nms_overlap_thresh, + const int decode_bboxes_dims, + const float *bboxes, const int *index, + const int64_t *sorted_index, + int64_t *mask) { + // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading] + // params: mask (N, N/THREADS_PER_BLOCK_NMS) + + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + fminf(num_bboxes_for_nms - row_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + const int col_size = + fminf(num_bboxes_for_nms - col_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + + __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7]; + + if (threadIdx.x < col_size) { + int box_idx = + index[sorted_index[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x]]; + block_boxes[threadIdx.x * 7 + 0] = bboxes[box_idx * decode_bboxes_dims]; + block_boxes[threadIdx.x * 7 + 1] = bboxes[box_idx * decode_bboxes_dims + 1]; + block_boxes[threadIdx.x * 7 + 2] = bboxes[box_idx * decode_bboxes_dims + 2]; + block_boxes[threadIdx.x * 7 + 3] = bboxes[box_idx * decode_bboxes_dims + 4]; + block_boxes[threadIdx.x * 7 + 4] = bboxes[box_idx * decode_bboxes_dims + 3]; + block_boxes[threadIdx.x * 7 + 5] = bboxes[box_idx * decode_bboxes_dims + 5]; + block_boxes[threadIdx.x * 7 + 6] = + -bboxes[box_idx * decode_bboxes_dims + decode_bboxes_dims - 1] - + 3.141592653589793 / 2; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const int act_box_idx = index[sorted_index[cur_box_idx]]; + float cur_box[7]; + cur_box[0] = bboxes[act_box_idx * decode_bboxes_dims]; + cur_box[1] = bboxes[act_box_idx * decode_bboxes_dims + 1]; + cur_box[2] = bboxes[act_box_idx * decode_bboxes_dims + 2]; + cur_box[3] = bboxes[act_box_idx * decode_bboxes_dims + 4]; + cur_box[4] = bboxes[act_box_idx * decode_bboxes_dims + 3]; + cur_box[5] = bboxes[act_box_idx * decode_bboxes_dims + 5]; + cur_box[6] = + -bboxes[act_box_idx * decode_bboxes_dims + decode_bboxes_dims - 1] - + 3.141592653589793 / 2; + + int i = 0; + int64_t t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh) { + t |= 1ULL << i; + } + } + int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS); + mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +void NmsLauncher(const cudaStream_t &stream, const float *bboxes, + const int *index, const int64_t *sorted_index, + const int num_bboxes, const int num_bboxes_for_nms, + const float nms_overlap_thresh, const int decode_bboxes_dims, + int64_t *mask) { + dim3 blocks(DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS), + DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS)); + dim3 threads(THREADS_PER_BLOCK_NMS); + nms_kernel_centerpoint<<>>( + num_bboxes, num_bboxes_for_nms, nms_overlap_thresh, decode_bboxes_dims, + bboxes, index, sorted_index, mask); +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc new file mode 100755 index 0000000000..af8f23d694 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc @@ -0,0 +1,208 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +namespace ultrainfer { +namespace paddle_custom_ops { + +template +bool hard_voxelize_cpu_kernel( + const T *points, const float point_cloud_range_x_min, + const float point_cloud_range_y_min, const float point_cloud_range_z_min, + const float voxel_size_x, const float voxel_size_y, + const float voxel_size_z, const int grid_size_x, const int grid_size_y, + const int grid_size_z, const int64_t num_points, const int num_point_dim, + const int max_num_points_in_voxel, const int max_voxels, T *voxels, + T_int *coords, T_int *num_points_per_voxel, T_int *grid_idx_to_voxel_idx, + T_int *num_voxels) { + std::fill(voxels, + voxels + max_voxels * max_num_points_in_voxel * num_point_dim, + static_cast(0)); + + num_voxels[0] = 0; + int voxel_idx, grid_idx, curr_num_point; + int coord_x, coord_y, coord_z; + for (int point_idx = 0; point_idx < num_points; ++point_idx) { + coord_x = floor( + (points[point_idx * num_point_dim + 0] - point_cloud_range_x_min) / + voxel_size_x); + coord_y = floor( + (points[point_idx * num_point_dim + 1] - point_cloud_range_y_min) / + voxel_size_y); + coord_z = floor( + (points[point_idx * num_point_dim + 2] - point_cloud_range_z_min) / + voxel_size_z); + + if (coord_x < 0 || coord_x > grid_size_x || coord_x == grid_size_x) { + continue; + } + if (coord_y < 0 || coord_y > grid_size_y || coord_y == grid_size_y) { + continue; + } + if (coord_z < 0 || coord_z > grid_size_z || coord_z == grid_size_z) { + continue; + } + + grid_idx = + coord_z * grid_size_y * grid_size_x + coord_y * grid_size_x + coord_x; + voxel_idx = grid_idx_to_voxel_idx[grid_idx]; + if (voxel_idx == -1) { + voxel_idx = num_voxels[0]; + if (num_voxels[0] == max_voxels || num_voxels[0] > max_voxels) { + continue; + } + num_voxels[0]++; + grid_idx_to_voxel_idx[grid_idx] = voxel_idx; + coords[voxel_idx * 3 + 0] = coord_z; + coords[voxel_idx * 3 + 1] = coord_y; + coords[voxel_idx * 3 + 2] = coord_x; + } + curr_num_point = num_points_per_voxel[voxel_idx]; + if (curr_num_point < max_num_points_in_voxel) { + for (int j = 0; j < num_point_dim; ++j) { + voxels[voxel_idx * max_num_points_in_voxel * num_point_dim + + curr_num_point * num_point_dim + j] = + points[point_idx * num_point_dim + j]; + } + num_points_per_voxel[voxel_idx] = curr_num_point + 1; + } + } + return true; +} + +std::vector +hard_voxelize_cpu(const paddle::Tensor &points, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + const int max_num_points_in_voxel, const int max_voxels) { + auto num_points = points.shape()[0]; + auto num_point_dim = points.shape()[1]; + + const float voxel_size_x = voxel_size[0]; + const float voxel_size_y = voxel_size[1]; + const float voxel_size_z = voxel_size[2]; + const float point_cloud_range_x_min = point_cloud_range[0]; + const float point_cloud_range_y_min = point_cloud_range[1]; + const float point_cloud_range_z_min = point_cloud_range[2]; + int grid_size_x = static_cast( + round((point_cloud_range[3] - point_cloud_range[0]) / voxel_size_x)); + int grid_size_y = static_cast( + round((point_cloud_range[4] - point_cloud_range[1]) / voxel_size_y)); + int grid_size_z = static_cast( + round((point_cloud_range[5] - point_cloud_range[2]) / voxel_size_z)); + + auto voxels = + paddle::empty({max_voxels, max_num_points_in_voxel, num_point_dim}, + paddle::DataType::FLOAT32, paddle::CPUPlace()); + + auto coords = paddle::full({max_voxels, 3}, 0, paddle::DataType::INT32, + paddle::CPUPlace()); + auto *coords_data = coords.data(); + + auto num_points_per_voxel = paddle::full( + {max_voxels}, 0, paddle::DataType::INT32, paddle::CPUPlace()); + auto *num_points_per_voxel_data = num_points_per_voxel.data(); + std::fill(num_points_per_voxel_data, + num_points_per_voxel_data + num_points_per_voxel.size(), + static_cast(0)); + + auto num_voxels = + paddle::full({1}, 0, paddle::DataType::INT32, paddle::CPUPlace()); + auto *num_voxels_data = num_voxels.data(); + + auto grid_idx_to_voxel_idx = + paddle::full({grid_size_z, grid_size_y, grid_size_x}, -1, + paddle::DataType::INT32, paddle::CPUPlace()); + auto *grid_idx_to_voxel_idx_data = grid_idx_to_voxel_idx.data(); + + PD_DISPATCH_FLOATING_TYPES( + points.type(), "hard_voxelize_cpu_kernel", ([&] { + hard_voxelize_cpu_kernel( + points.data(), point_cloud_range_x_min, + point_cloud_range_y_min, point_cloud_range_z_min, voxel_size_x, + voxel_size_y, voxel_size_z, grid_size_x, grid_size_y, grid_size_z, + num_points, num_point_dim, max_num_points_in_voxel, max_voxels, + voxels.data(), coords_data, num_points_per_voxel_data, + grid_idx_to_voxel_idx_data, num_voxels_data); + })); + + return {voxels, coords, num_points_per_voxel, num_voxels}; +} + +#if defined(PADDLE_WITH_CUDA) && defined(WITH_GPU) +std::vector +hard_voxelize_cuda(const paddle::Tensor &points, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + int max_num_points_in_voxel, int max_voxels); +#endif + +std::vector +hard_voxelize(const paddle::Tensor &points, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + const int max_num_points_in_voxel, const int max_voxels) { + if (points.is_cpu()) { + return hard_voxelize_cpu(points, voxel_size, point_cloud_range, + max_num_points_in_voxel, max_voxels); +#if defined(PADDLE_WITH_CUDA) && defined(WITH_GPU) + } else if (points.is_gpu() || points.is_gpu_pinned()) { + return hard_voxelize_cuda(points, voxel_size, point_cloud_range, + max_num_points_in_voxel, max_voxels); +#endif + } else { + PD_THROW("Unsupported device type for hard_voxelize " + "operator."); + } +} + +std::vector> +HardInferShape(std::vector points_shape, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + const int &max_num_points_in_voxel, const int &max_voxels) { + return {{max_voxels, max_num_points_in_voxel, points_shape[1]}, + {max_voxels, 3}, + {max_voxels}, + {1}}; +} + +std::vector HardInferDtype(paddle::DataType points_dtype) { + return {points_dtype, paddle::DataType::INT32, paddle::DataType::INT32, + paddle::DataType::INT32}; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer + +PD_BUILD_OP(hard_voxelize) + .Inputs({"POINTS"}) + .Outputs({"VOXELS", "COORS", "NUM_POINTS_PER_VOXEL", "num_voxels"}) + .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::hard_voxelize)) + .Attrs({"voxel_size: std::vector", + "point_cloud_range: std::vector", + "max_num_points_in_voxel: int", "max_voxels: int"}) + .SetInferShapeFn( + PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::HardInferShape)) + .SetInferDtypeFn( + PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::HardInferDtype)); diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu new file mode 100755 index 0000000000..4ab363da0b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu @@ -0,0 +1,357 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x) +#include "paddle/include/experimental/ext_all.h" +#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x) +#include "paddle/include/paddle/extension.h" +#else +#include "paddle/extension.h" +#endif + +namespace ultrainfer { +namespace paddle_custom_ops { + +#define CHECK_INPUT_CUDA(x) \ + PD_CHECK(x.is_gpu() || x.is_gpu_pinned(), #x " must be a GPU Tensor.") + +#define CUDA_KERNEL_LOOP(i, n) \ + for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + +template +__global__ void init_num_point_grid( + const T *points, const float point_cloud_range_x_min, + const float point_cloud_range_y_min, const float point_cloud_range_z_min, + const float voxel_size_x, const float voxel_size_y, + const float voxel_size_z, const int grid_size_x, const int grid_size_y, + const int grid_size_z, const int64_t num_points, const int num_point_dim, + T_int *num_points_in_grid, int *points_valid) { + int64_t point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx > num_points || point_idx == num_points) { + return; + } + int coord_x = + floor((points[point_idx * num_point_dim + 0] - point_cloud_range_x_min) / + voxel_size_x); + int coord_y = + floor((points[point_idx * num_point_dim + 1] - point_cloud_range_y_min) / + voxel_size_y); + int coord_z = + floor((points[point_idx * num_point_dim + 2] - point_cloud_range_z_min) / + voxel_size_z); + + if (coord_x < 0 || coord_x > grid_size_x || coord_x == grid_size_x) { + return; + } + if (coord_y < 0 || coord_y > grid_size_y || coord_y == grid_size_y) { + return; + } + if (coord_z < 0 || coord_z > grid_size_z || coord_z == grid_size_z) { + return; + } + + int grid_idx = + coord_z * grid_size_y * grid_size_x + coord_y * grid_size_x + coord_x; + num_points_in_grid[grid_idx] = 0; + points_valid[grid_idx] = num_points; +} + +template +__global__ void map_point_to_grid_kernel( + const T *points, const float point_cloud_range_x_min, + const float point_cloud_range_y_min, const float point_cloud_range_z_min, + const float voxel_size_x, const float voxel_size_y, + const float voxel_size_z, const int grid_size_x, const int grid_size_y, + const int grid_size_z, const int64_t num_points, const int num_point_dim, + const int max_num_points_in_voxel, T_int *points_to_grid_idx, + T_int *points_to_num_idx, T_int *num_points_in_grid, int *points_valid) { + int64_t point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx > num_points || point_idx == num_points) { + return; + } + int coord_x = + floor((points[point_idx * num_point_dim + 0] - point_cloud_range_x_min) / + voxel_size_x); + int coord_y = + floor((points[point_idx * num_point_dim + 1] - point_cloud_range_y_min) / + voxel_size_y); + int coord_z = + floor((points[point_idx * num_point_dim + 2] - point_cloud_range_z_min) / + voxel_size_z); + + if (coord_x < 0 || coord_x > grid_size_x || coord_x == grid_size_x) { + return; + } + if (coord_y < 0 || coord_y > grid_size_y || coord_y == grid_size_y) { + return; + } + if (coord_z < 0 || coord_z > grid_size_z || coord_z == grid_size_z) { + return; + } + + int grid_idx = + coord_z * grid_size_y * grid_size_x + coord_y * grid_size_x + coord_x; + T_int num = atomicAdd(num_points_in_grid + grid_idx, 1); + if (num < max_num_points_in_voxel) { + points_to_num_idx[point_idx] = num; + points_to_grid_idx[point_idx] = grid_idx; + atomicMin(points_valid + grid_idx, static_cast(point_idx)); + } +} + +template +__global__ void update_points_flag(const int *points_valid, + const T_int *points_to_grid_idx, + const int num_points, int *points_flag) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = tid; i < num_points; i += gridDim.x * blockDim.x) { + T_int grid_idx = points_to_grid_idx[i]; + if (grid_idx >= 0) { + int id = points_valid[grid_idx]; + if (id != num_points && id == i) { + points_flag[i] = 1; + } + } + } +} + +template +__global__ void +get_voxel_idx_kernel(const int *points_flag, const T_int *points_to_grid_idx, + const int *points_flag_prefix_sum, const int num_points, + const int max_voxels, T_int *num_voxels, + T_int *grid_idx_to_voxel_idx) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = tid; i < num_points; i += gridDim.x * blockDim.x) { + if (points_flag[i] == 1) { + T_int grid_idx = points_to_grid_idx[i]; + int num = points_flag_prefix_sum[i]; + if (num < max_voxels) { + grid_idx_to_voxel_idx[grid_idx] = num; + } + } + if (i == num_points - 1) { + int num = points_flag_prefix_sum[i] + points_flag[i]; + if (num < max_voxels) { + num_voxels[0] = num; + } else { + num_voxels[0] = max_voxels; + } + } + } +} + +template +__global__ void init_voxels_kernel(const int64_t num, T *voxels) { + int64_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx > num || idx == num) { + return; + } + voxels[idx] = static_cast(0); +} + +template +__global__ void +assign_voxels_kernel(const T *points, const T_int *points_to_grid_idx, + const T_int *points_to_num_idx, + const T_int *grid_idx_to_voxel_idx, + const int64_t num_points, const int num_point_dim, + const int max_num_points_in_voxel, T *voxels) { + int64_t point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx > num_points || point_idx == num_points) { + return; + } + T_int grid_idx = points_to_grid_idx[point_idx]; + T_int num_idx = points_to_num_idx[point_idx]; + if (grid_idx > -1 && num_idx > -1) { + T_int voxel_idx = grid_idx_to_voxel_idx[grid_idx]; + if (voxel_idx > -1) { + for (int64_t i = 0; i < num_point_dim; ++i) { + voxels[voxel_idx * max_num_points_in_voxel * num_point_dim + + num_idx * num_point_dim + i] = + points[point_idx * num_point_dim + i]; + } + } + } +} + +template +__global__ void +assign_coords_kernel(const T_int *grid_idx_to_voxel_idx, + const T_int *num_points_in_grid, const int num_grids, + const int grid_size_x, const int grid_size_y, + const int grid_size_z, const int max_num_points_in_voxel, + T *coords, T *num_points_per_voxel) { + int64_t grid_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (grid_idx > num_grids || grid_idx == num_grids) { + return; + } + T_int voxel_idx = grid_idx_to_voxel_idx[grid_idx]; + if (voxel_idx > -1) { + T_int coord_z = grid_idx / grid_size_x / grid_size_y; + T_int coord_y = + (grid_idx - coord_z * grid_size_x * grid_size_y) / grid_size_x; + T_int coord_x = + grid_idx - coord_z * grid_size_x * grid_size_y - coord_y * grid_size_x; + coords[voxel_idx * 3 + 0] = coord_z; + coords[voxel_idx * 3 + 1] = coord_y; + coords[voxel_idx * 3 + 2] = coord_x; + num_points_per_voxel[voxel_idx] = + min(num_points_in_grid[grid_idx], max_num_points_in_voxel); + } +} + +std::vector +hard_voxelize_cuda(const paddle::Tensor &points, + const std::vector &voxel_size, + const std::vector &point_cloud_range, + int max_num_points_in_voxel, int max_voxels) { + // check device + CHECK_INPUT_CUDA(points); + + int64_t num_points = points.shape()[0]; + int64_t num_point_dim = points.shape()[1]; + + const float voxel_size_x = voxel_size[0]; + const float voxel_size_y = voxel_size[1]; + const float voxel_size_z = voxel_size[2]; + const float point_cloud_range_x_min = point_cloud_range[0]; + const float point_cloud_range_y_min = point_cloud_range[1]; + const float point_cloud_range_z_min = point_cloud_range[2]; + int grid_size_x = static_cast( + round((point_cloud_range[3] - point_cloud_range[0]) / voxel_size_x)); + int grid_size_y = static_cast( + round((point_cloud_range[4] - point_cloud_range[1]) / voxel_size_y)); + int grid_size_z = static_cast( + round((point_cloud_range[5] - point_cloud_range[2]) / voxel_size_z)); + int num_grids = grid_size_x * grid_size_y * grid_size_z; + + auto voxels = + paddle::empty({max_voxels, max_num_points_in_voxel, num_point_dim}, + paddle::DataType::FLOAT32, paddle::GPUPlace()); + + auto coords = paddle::full({max_voxels, 3}, 0, paddle::DataType::INT32, + paddle::GPUPlace()); + auto *coords_data = coords.data(); + + auto num_points_per_voxel = paddle::full( + {max_voxels}, 0, paddle::DataType::INT32, paddle::GPUPlace()); + auto *num_points_per_voxel_data = num_points_per_voxel.data(); + + auto points_to_grid_idx = paddle::full( + {num_points}, -1, paddle::DataType::INT32, paddle::GPUPlace()); + auto *points_to_grid_idx_data = points_to_grid_idx.data(); + + auto points_to_num_idx = paddle::full( + {num_points}, -1, paddle::DataType::INT32, paddle::GPUPlace()); + auto *points_to_num_idx_data = points_to_num_idx.data(); + + auto num_points_in_grid = + paddle::empty({grid_size_z, grid_size_y, grid_size_x}, + paddle::DataType::INT32, paddle::GPUPlace()); + auto *num_points_in_grid_data = num_points_in_grid.data(); + + auto grid_idx_to_voxel_idx = + paddle::full({grid_size_z, grid_size_y, grid_size_x}, -1, + paddle::DataType::INT32, paddle::GPUPlace()); + auto *grid_idx_to_voxel_idx_data = grid_idx_to_voxel_idx.data(); + + auto num_voxels = + paddle::full({1}, 0, paddle::DataType::INT32, paddle::GPUPlace()); + auto *num_voxels_data = num_voxels.data(); + + auto points_valid = + paddle::empty({grid_size_z, grid_size_y, grid_size_x}, + paddle::DataType::INT32, paddle::GPUPlace()); + int *points_valid_data = points_valid.data(); + auto points_flag = paddle::full({num_points}, 0, paddle::DataType::INT32, + paddle::GPUPlace()); + + // 1. Find the grid index for each point, compute the + // number of points in each grid + int64_t threads = 512; + int64_t blocks = (num_points + threads - 1) / threads; + + PD_DISPATCH_FLOATING_TYPES( + points.type(), "init_num_point_grid", ([&] { + init_num_point_grid + <<>>( + points.data(), point_cloud_range_x_min, + point_cloud_range_y_min, point_cloud_range_z_min, voxel_size_x, + voxel_size_y, voxel_size_z, grid_size_x, grid_size_y, + grid_size_z, num_points, num_point_dim, num_points_in_grid_data, + points_valid_data); + })); + + PD_DISPATCH_FLOATING_TYPES( + points.type(), "map_point_to_grid_kernel", ([&] { + map_point_to_grid_kernel + <<>>( + points.data(), point_cloud_range_x_min, + point_cloud_range_y_min, point_cloud_range_z_min, voxel_size_x, + voxel_size_y, voxel_size_z, grid_size_x, grid_size_y, + grid_size_z, num_points, num_point_dim, max_num_points_in_voxel, + points_to_grid_idx_data, points_to_num_idx_data, + num_points_in_grid_data, points_valid_data); + })); + + // 2. Find the number of non-zero voxels + int *points_flag_data = points_flag.data(); + + threads = 512; + blocks = (num_points + threads - 1) / threads; + update_points_flag<<>>( + points_valid_data, points_to_grid_idx_data, num_points, points_flag_data); + + auto points_flag_prefix_sum = + paddle::experimental::cumsum(points_flag, 0, false, true, false); + int *points_flag_prefix_sum_data = points_flag_prefix_sum.data(); + get_voxel_idx_kernel<<>>( + points_flag_data, points_to_grid_idx_data, points_flag_prefix_sum_data, + num_points, max_voxels, num_voxels_data, grid_idx_to_voxel_idx_data); + + // 3. Store points to voxels coords and num_points_per_voxel + int64_t num = max_voxels * max_num_points_in_voxel * num_point_dim; + threads = 512; + blocks = (num + threads - 1) / threads; + PD_DISPATCH_FLOATING_TYPES(points.type(), "init_voxels_kernel", ([&] { + init_voxels_kernel + <<>>( + num, voxels.data()); + })); + + threads = 512; + blocks = (num_points + threads - 1) / threads; + PD_DISPATCH_FLOATING_TYPES( + points.type(), "assign_voxels_kernel", ([&] { + assign_voxels_kernel + <<>>( + points.data(), points_to_grid_idx_data, + points_to_num_idx_data, grid_idx_to_voxel_idx_data, num_points, + num_point_dim, max_num_points_in_voxel, voxels.data()); + })); + + // 4. Store coords, num_points_per_voxel + blocks = (num_grids + threads - 1) / threads; + assign_coords_kernel<<>>( + grid_idx_to_voxel_idx_data, num_points_in_grid_data, num_grids, + grid_size_x, grid_size_y, grid_size_z, max_num_points_in_voxel, + coords_data, num_points_per_voxel_data); + + return {voxels, coords, num_points_per_voxel, num_voxels}; +} + +} // namespace paddle_custom_ops +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h new file mode 100755 index 0000000000..d57a888782 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h @@ -0,0 +1,169 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_type.h" +#include "ultrainfer/runtime/backends/tensorrt/option.h" +#include +#include +#include +#include + +namespace ultrainfer { + +/*! @brief Option object to configure GraphCore IPU + */ +struct IpuOption { + /// IPU device id + int ipu_device_num; + /// the batch size in the graph, only work when graph has no batch shape info + int ipu_micro_batch_size; + /// enable pipelining + bool ipu_enable_pipelining; + /// the number of batches per run in pipelining + int ipu_batches_per_step; + /// enable fp16 + bool ipu_enable_fp16; + /// the number of graph replication + int ipu_replica_num; + /// the available memory proportion for matmul/conv + float ipu_available_memory_proportion; + /// enable fp16 partial for matmul, only work with fp16 + bool ipu_enable_half_partial; +}; + +/*! @brief Option object to configure KUNLUNXIN XPU + */ +struct XpuOption { + /// kunlunxin device id + int kunlunxin_device_id = 0; + /// EnableXpu + /// kunlunxin_l3_workspace_size + int kunlunxin_l3_workspace_size = 0xfffc00; + /// kunlunxin_locked + bool kunlunxin_locked = false; + /// kunlunxin_autotune + bool kunlunxin_autotune = true; + /// kunlunxin_autotune_file + std::string kunlunxin_autotune_file = ""; + /// kunlunxin_precision + std::string kunlunxin_precision = "int16"; + /// kunlunxin_adaptive_seqlen + bool kunlunxin_adaptive_seqlen = false; + /// kunlunxin_enable_multi_stream + bool kunlunxin_enable_multi_stream = false; + /// SetXpuConfig + /// quant post dynamic weight bits + int kunlunxin_quant_post_dynamic_weight_bits = -1; + /// quant post dynamic op types + std::vector kunlunxin_quant_post_dynamic_op_types = {}; +}; + +/*! @brief Option object to configure Paddle Inference backend + */ +struct PaddleBackendOption { + /// Print log information while initialize Paddle Inference backend + bool enable_log_info = false; + /// Enable MKLDNN while inference on CPU + bool enable_mkldnn = true; + /// Use Paddle Inference + TensorRT to inference model on GPU + bool enable_trt = false; + /// Whether enable memory optimize, default true + bool enable_memory_optimize = true; + /// Whether enable ir debug, default false + bool switch_ir_debug = false; + /// Whether enable ir optimize, default true + bool switch_ir_optimize = true; + /// Whether the load model is quantized model + bool is_quantize_model = false; + std::string inference_precision = "float32"; + bool enable_inference_cutlass = false; + + /* + * @brief IPU option, this will configure the IPU hardware, if inference model + * in IPU + */ + IpuOption ipu_option; + /* + * @brief XPU option, this will configure the KUNLUNXIN XPU hardware, if + * inference model in XPU + */ + XpuOption xpu_option; + + /// Collect shape for model while enable_trt is true + bool collect_trt_shape = false; + /// Collect shape for model by device (for some custom ops) + bool collect_trt_shape_by_device = false; + /// Cache input shape for mkldnn while the input data will change dynamiclly + int mkldnn_cache_size = -1; + /// initialize memory size(MB) for GPU + int gpu_mem_init_size = 100; + /// The option to enable fixed size optimization for transformer model + bool enable_fixed_size_opt = false; + /// min_subgraph_size for paddle-trt + int trt_min_subgraph_size = 3; + +#if PADDLEINFERENCE_VERSION_MAJOR == 2 + bool enable_new_ir = false; +#else + bool enable_new_ir = true; +#endif + + /// Disable type of operators run on TensorRT + void DisableTrtOps(const std::vector &ops) { + trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end()); + } + + /// Delete pass by name + void DeletePass(const std::string &pass_name) { + delete_pass_names.push_back(pass_name); + } + + void SetIpuConfig(bool enable_fp16, int replica_num, + float available_memory_proportion, + bool enable_half_partial) { + ipu_option.ipu_enable_fp16 = enable_fp16; + ipu_option.ipu_replica_num = replica_num; + ipu_option.ipu_available_memory_proportion = available_memory_proportion; + ipu_option.ipu_enable_half_partial = enable_half_partial; + } + + void SetXpuConfig( + int quant_post_dynamic_weight_bits = -1, + const std::vector &quant_post_dynamic_op_types = {}) { + xpu_option.kunlunxin_quant_post_dynamic_weight_bits = + quant_post_dynamic_weight_bits; + xpu_option.kunlunxin_quant_post_dynamic_op_types = + quant_post_dynamic_op_types; + } + + // The belowing parameters may be removed, please do not + // read or write them directly + TrtBackendOption trt_option; + bool enable_pinned_memory = false; + void *external_stream_ = nullptr; + Device device = Device::CPU; + /// device id for CPU/GPU + int device_id = 0; + std::vector trt_disabled_ops_{}; + int cpu_thread_num = 8; + std::vector delete_pass_names = {}; + std::string model_file = ""; // Path of model file + std::string params_file = ""; // Path of parameters file, can be empty + + // load model and paramters from memory + bool model_from_memory_ = false; +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc new file mode 100755 index 0000000000..0bd104470a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/runtime/backends/paddle/option.h" + +namespace ultrainfer { + +void BindIpuOption(pybind11::module &m) { + pybind11::class_(m, "IpuOption") + .def(pybind11::init()) + .def_readwrite("ipu_device_num", &IpuOption::ipu_device_num) + .def_readwrite("ipu_micro_batch_size", &IpuOption::ipu_micro_batch_size) + .def_readwrite("ipu_enable_pipelining", &IpuOption::ipu_enable_pipelining) + .def_readwrite("ipu_batches_per_step", &IpuOption::ipu_batches_per_step) + .def_readwrite("ipu_enable_fp16", &IpuOption::ipu_enable_fp16) + .def_readwrite("ipu_replica_num", &IpuOption::ipu_replica_num) + .def_readwrite("ipu_available_memory_proportion", + &IpuOption::ipu_available_memory_proportion) + .def_readwrite("ipu_enable_half_partial", + &IpuOption::ipu_enable_half_partial); +} + +void BindPaddleOption(pybind11::module &m) { + BindIpuOption(m); + pybind11::class_(m, "PaddleBackendOption") + .def(pybind11::init()) + .def_readwrite("enable_fixed_size_opt", + &PaddleBackendOption::enable_fixed_size_opt) + .def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info) + .def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn) + .def_readwrite("enable_trt", &PaddleBackendOption::enable_trt) + .def_readwrite("enable_memory_optimize", + &PaddleBackendOption::enable_memory_optimize) + .def_readwrite("switch_ir_debug", &PaddleBackendOption::switch_ir_debug) + .def_readwrite("ipu_option", &PaddleBackendOption::ipu_option) + .def_readwrite("xpu_option", &PaddleBackendOption::xpu_option) + .def_readwrite("trt_option", &PaddleBackendOption::trt_option) + .def_readwrite("collect_trt_shape", + &PaddleBackendOption::collect_trt_shape) + .def_readwrite("collect_trt_shape_by_device", + &PaddleBackendOption::collect_trt_shape_by_device) + .def_readwrite("mkldnn_cache_size", + &PaddleBackendOption::mkldnn_cache_size) + .def_readwrite("gpu_mem_init_size", + &PaddleBackendOption::gpu_mem_init_size) + .def_readwrite("is_quantize_model", + &PaddleBackendOption::is_quantize_model) + .def_readwrite("inference_precision", + &PaddleBackendOption::inference_precision) + .def_readwrite("enable_inference_cutlass", + &PaddleBackendOption::enable_inference_cutlass) + .def_readwrite("trt_min_subgraph_size", + &PaddleBackendOption::trt_min_subgraph_size) + .def_readwrite("enable_new_ir", &PaddleBackendOption::enable_new_ir) + .def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps) + .def("delete_pass", &PaddleBackendOption::DeletePass) + .def("set_ipu_config", &PaddleBackendOption::SetIpuConfig); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc new file mode 100755 index 0000000000..3ee46c43f5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc @@ -0,0 +1,650 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/paddle/paddle_backend.h" + +#include + +#include "ultrainfer/utils/path.h" + +namespace ultrainfer { + +void PaddleBackend::BuildOption(const PaddleBackendOption &option) { + option_ = option; + if (option.device == Device::GPU) { + auto inference_precision = paddle_infer::PrecisionType::kFloat32; + if (option_.inference_precision == "float32") { + FDINFO << "Will inference_precision float32" << std::endl; + inference_precision = paddle_infer::PrecisionType::kFloat32; + } else if (option_.inference_precision == "float16") { + FDINFO << "Will inference_precision float16" << std::endl; + inference_precision = paddle_infer::PrecisionType::kHalf; + } else if (option_.inference_precision == "bfloat16") { + FDINFO << "Will inference_precision bfloat16" << std::endl; + inference_precision = paddle_infer::PrecisionType::kBf16; + } else if (option_.inference_precision == "int8") { + FDINFO << "Will inference_precision int8" << std::endl; + inference_precision = paddle_infer::PrecisionType::kInt8; + } else { + FDERROR << "paddle inference only support precision in float32," + << " float16, bfloat16 and int8" << std::endl; + } + config_.Exp_DisableMixedPrecisionOps({"feed", "fetch"}); + config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id, + inference_precision); + // config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id); + if (option_.switch_ir_debug) { + FDINFO << "Will Enable ir_debug for Paddle Backend." << std::endl; + config_.SwitchIrDebug(); + } + if (option_.enable_inference_cutlass) { +#ifdef PADDLEINFERENCE_API_COMPAT_2_4_x + FDWARNING + << "Your are using Paddle infernence 2.4.x, cutlass is not supported!" + << std::endl; +#else + FDINFO << "Will enable_inference_cutlass" << std::endl; + config_.Exp_EnableUseCutlass(); +#endif + } + if (option_.external_stream_) { + FDINFO << "Will use external stream for Paddle Backend." << std::endl; + config_.SetExecStream(option_.external_stream_); + } + if (option.enable_trt) { + if (!option.trt_option.enable_fp16) { + FDINFO << "Will try to use tensorrt inference with Paddle Backend." + << std::endl; + } + config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_); + auto precision = paddle_infer::PrecisionType::kFloat32; + if (option.trt_option.enable_fp16) { + FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend." + << std::endl; + precision = paddle_infer::PrecisionType::kHalf; + } + bool use_static = false; + if (option.trt_option.serialize_file != "") { + FDWARNING + << "Detect that tensorrt cache file has been set to " + << option.trt_option.serialize_file + << ", but while enable paddle2trt, please notice that the cache " + "file will save to the directory where paddle model saved." + << std::endl; + use_static = true; + std::string opt_cache_dir = + GetDirFromPath(option.trt_option.serialize_file); + + config_.SetOptimCacheDir(opt_cache_dir); + } + config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, + option.trt_option.max_batch_size, + option.trt_min_subgraph_size, precision, + use_static); + + SetTRTDynamicShapeToConfig(option); + if (option_.enable_fixed_size_opt) { + paddle_infer::experimental::InternalUtils::SetTransformerMaskid( + &config_, "opt"); + } + } + } else if (option.device == Device::IPU) { +#ifdef WITH_IPU + config_.EnableIpu(option.ipu_option.ipu_device_num, + option.ipu_option.ipu_micro_batch_size, + option.ipu_option.ipu_enable_pipelining, + option.ipu_option.ipu_batches_per_step); + config_.SetIpuConfig(option.ipu_option.ipu_enable_fp16, + option.ipu_option.ipu_replica_num, + option.ipu_option.ipu_available_memory_proportion, + option.ipu_option.ipu_enable_half_partial); +#else + FDWARNING << "The UltraInfer is not compiled with IPU device, so will " + "fallback to CPU with Paddle Inference Backend." + << std::endl; +#endif + } else if (option.device == Device::KUNLUNXIN) { +#ifdef WITH_KUNLUNXIN + // Note(qiuyanjun): For Paddle XPU L3 Cache, please set + // export XPU_PADDLE_L3_SIZE=67104768 (XPU R200) + // export FLAGS_fuse_multi_transformer_quant_type="float" + config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size, + option.xpu_option.kunlunxin_locked, + option.xpu_option.kunlunxin_autotune, + option.xpu_option.kunlunxin_autotune_file, + option.xpu_option.kunlunxin_precision, + option.xpu_option.kunlunxin_adaptive_seqlen, + option.xpu_option.kunlunxin_enable_multi_stream); + config_.SetXpuConfig( + option.xpu_option.kunlunxin_quant_post_dynamic_weight_bits, + option.xpu_option.kunlunxin_quant_post_dynamic_op_types); + config_.SetXpuDeviceId(option.xpu_option.kunlunxin_device_id); +#else + FDWARNING + << "The UltraInfer is not compiled with KUNLUNXIN device, so will " + "fallback to CPU with Paddle Inference Backend." + << std::endl; +#endif + } else { + config_.DisableGpu(); + if (option.enable_mkldnn) { + config_.EnableMKLDNN(); + config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size); + } else { +#if defined(PADDLEINFERENCE_API_COMPAT_2_6_x) || \ + (PADDLEINFERENCE_VERSION_MAJOR != 2) + config_.DisableMKLDNN(); +#endif + } + } + + if (!option.enable_log_info) { + config_.DisableGlogInfo(); + } + if (option.cpu_thread_num <= 0) { + config_.SetCpuMathLibraryNumThreads(8); + } else { + config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num); + } + // Note: SwitchIrOptim is enabled by default for paddle inference + // backend. So, we don't need to set it manually. + // config_.SwitchIrOptim(option.switch_ir_optimize); + + if (option.enable_new_ir) { +#if PADDLEINFERENCE_VERSION_MAJOR == 2 + FDWARNING << "UltraInfer was compiled with Paddle Inference v2.0+ " + "which does not support the new IR." + << std::endl; +#else + if (option.device == Device::GPU && option.enable_trt) { + FDWARNING << "Currently, Paddle-TensorRT does not support the new IR, " + "and the old IR will be used." + << std::endl; + } else { + config_.EnableNewIR(); + config_.EnableNewExecutor(); + if (option.device == Device::CPU || option.device == Device::GPU) { + config_.SetOptimizationLevel(3); + } + } +#endif + } +} + +bool PaddleBackend::Init(const RuntimeOption &runtime_option) { + if (!(Supported(runtime_option.model_format, Backend::PDINFER) && + Supported(runtime_option.device, Backend::PDINFER))) { + return false; + } + + auto option = runtime_option; + // Collect basic paddle inference option and trt option. + option.paddle_infer_option.model_file = runtime_option.model_file; + option.paddle_infer_option.params_file = runtime_option.params_file; + option.paddle_infer_option.model_from_memory_ = + runtime_option.model_from_memory_; + option.paddle_infer_option.device = runtime_option.device; + option.paddle_infer_option.device_id = runtime_option.device_id; + option.paddle_infer_option.enable_pinned_memory = + runtime_option.enable_pinned_memory; + option.paddle_infer_option.external_stream_ = runtime_option.external_stream_; + option.paddle_infer_option.trt_option = runtime_option.trt_option; + option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id; + // Note(qiuyanjun): For Ipu option and XPU option, please check the + // details of RuntimeOption::UseIpu() and RuntimeOption::UseKunlunXin(). + // Futhermore, please check paddle_infer_option.SetIpuConfig() and + // paddle_infer_option.SetXpuConfig() for more details of extra configs. + return InitFromPaddle(option.model_file, option.params_file, + option.model_from_memory_, option.paddle_infer_option); +} + +bool PaddleBackend::InitFromPaddle(const std::string &model, + const std::string ¶ms, + bool model_from_memory, + const PaddleBackendOption &option) { + if (initialized_) { + FDERROR << "PaddleBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + if (model_from_memory) { + config_.SetModelBuffer(model.c_str(), model.size(), params.c_str(), + params.size()); + } else { + config_.SetModel(model, params); + } + if (option.enable_memory_optimize) { + config_.EnableMemoryOptim(); + } + BuildOption(option); + // The input/output information get from predictor is not right, use + // PaddleReader instead now + std::string model_content = model; + if (!model_from_memory) { + FDASSERT(ReadBinaryFromFile(model, &model_content), + "Failed to read file %s.", model.c_str()); + } + + if (option.is_quantize_model) { + if (option.device == Device::GPU) { + FDWARNING << "The loaded model is a quantized model, while inference on " + "GPU, please use TensorRT backend to get better performance." + << std::endl; + if (option.enable_trt) { + bool use_static = false; + if (option.trt_option.serialize_file != "") { + FDWARNING + << "Detect that tensorrt cache file has been set to " + << option.trt_option.serialize_file + << ", but while enable paddle2trt, please notice that the cache " + "file will save to the directory where paddle model saved." + << std::endl; + use_static = true; + } +#if PADDLEINFERENCE_VERSION_MAJOR != 2 + config_.EnableTensorRtEngine( + option.trt_option.max_workspace_size, + option.trt_option.max_batch_size, option.trt_min_subgraph_size, + paddle_infer::PrecisionType::kInt8, use_static, false, true); +#else + config_.EnableTensorRtEngine( + option.trt_option.max_workspace_size, + option.trt_option.max_batch_size, option.trt_min_subgraph_size, + paddle_infer::PrecisionType::kInt8, use_static, false); +#endif + SetTRTDynamicShapeToConfig(option); + } + } + if (option.enable_mkldnn) { + config_.EnableMkldnnInt8(); + } else { + FDWARNING << "The loaded model is a quantized model, while inference on " + "CPU, please enable MKLDNN to get better performance." + << std::endl; + } + } + if (option.collect_trt_shape) { + // Set the shape info file. + std::string curr_model_dir = "./"; + if (!option.model_from_memory_) { + curr_model_dir = GetDirFromPath(option.model_file); + } + std::string shape_range_info = + PathJoin(curr_model_dir, "shape_range_info.pbtxt"); + if (!CheckFileExists(shape_range_info)) { + FDINFO << "Start generating shape range info file." << std::endl; + paddle_infer::Config analysis_config; + if (model_from_memory) { + analysis_config.SetModelBuffer(model.c_str(), model.size(), + params.c_str(), params.size()); + } else { + analysis_config.SetModel(model, params); + } + if (option.collect_trt_shape_by_device) { + if (option.device == Device::GPU) { + analysis_config.EnableUseGpu(option.gpu_mem_init_size, + option.device_id, + paddle_infer::PrecisionType::kFloat32); + } + } + analysis_config.CollectShapeRangeInfo(shape_range_info); + auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config); + std::map> max_shape; + std::map> min_shape; + std::map> opt_shape; + GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape); + std::map> max_input_data; + std::map> min_input_data; + std::map> opt_input_data; + if (!option.trt_option.min_input_data.empty()) { + GetInputDataFromOption(option, &max_input_data, &min_input_data, + &opt_input_data); + } + // Need to run once to get the shape range info file. + CollectShapeRun(predictor_tmp.get(), max_shape, max_input_data); + CollectShapeRun(predictor_tmp.get(), min_shape, min_input_data); + CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data); + CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data); + FDINFO << "Finish generating shape range info file." << std::endl; + } + FDINFO << "Start loading shape range info file " << shape_range_info + << " to set TensorRT dynamic shape." << std::endl; + config_.EnableTunedTensorRtDynamicShape(shape_range_info, true); + } + // Note(zhoushunjie): The pass deletion should be executed just before + // creating predictor. + if (!option.delete_pass_names.empty()) { + auto pass_builder = config_.pass_builder(); + for (int i = 0; i < option.delete_pass_names.size(); i++) { + FDINFO << "Delete pass : " << option.delete_pass_names[i] << std::endl; + pass_builder->DeletePass(option.delete_pass_names[i]); + } + } + if (option.enable_log_info) { + FDINFO << "Finish paddle inference config with summary as: " << std::endl + << config_.Summary() << std::endl; + } + predictor_ = paddle_infer::CreatePredictor(config_); + auto input_names = predictor_->GetInputNames(); + auto output_names = predictor_->GetOutputNames(); + auto input_dtypes = predictor_->GetInputTypes(); + +#ifdef PADDLEINFERENCE_API_COMPAT_2_4_x + // Note: GetInputTensorShape, GetOutputTensorShape and GetOutputTypes + // are not supported when Paddle Inference API version is 2.4.x. + std::map> input_shapes; + std::map> output_shapes; + std::map output_dtypes; + // Get the all the input shape info. + for (size_t i = 0; i < input_names.size(); ++i) { + std::vector shape; + auto handle = predictor_->GetInputHandle(input_names[i]); + for (int j = 0; j < handle->shape().size(); ++j) { + shape.push_back( + static_cast(handle->shape()[j])); // int32 -> int64 + } + input_shapes[input_names[i]] = shape; + } + // Get the all the output shape and dtype info. + for (size_t i = 0; i < output_names.size(); ++i) { + std::vector shape; + auto handle = predictor_->GetOutputHandle(output_names[i]); + for (int j = 0; j < handle->shape().size(); ++j) { + shape.push_back( + static_cast(handle->shape()[j])); // int32 -> int64 + } + output_shapes[output_names[i]] = shape; + output_dtypes[output_names[i]] = handle->type(); + } +#else + auto input_shapes = predictor_->GetInputTensorShape(); + auto output_shapes = predictor_->GetOutputTensorShape(); + auto output_dtypes = predictor_->GetOutputTypes(); +#endif + + inputs_desc_.resize(input_names.size()); + for (int i = 0; i < input_names.size(); ++i) { + inputs_desc_[i].name = input_names[i]; + auto iter = input_shapes.find(inputs_desc_[i].name); + FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.", + inputs_desc_[i].name.c_str()); + inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end()); + auto iter1 = input_dtypes.find(inputs_desc_[i].name); + FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.", + inputs_desc_[i].name.c_str()); + inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second); + } + outputs_desc_.resize(output_names.size()); + for (int i = 0; i < output_names.size(); ++i) { + outputs_desc_[i].name = output_names[i]; + auto iter = output_shapes.find(outputs_desc_[i].name); + FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.", + outputs_desc_[i].name.c_str()); + outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end()); + auto iter1 = output_dtypes.find(outputs_desc_[i].name); + FDASSERT(iter1 != output_dtypes.end(), + "Cannot find data type for output %s.", + outputs_desc_[i].name.c_str()); + outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second); + } + + initialized_ = true; + return true; +} + +TensorInfo PaddleBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()); + return inputs_desc_[index]; +} + +std::vector PaddleBackend::GetInputInfos() { return inputs_desc_; } + +TensorInfo PaddleBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()); + return outputs_desc_[index]; +} + +std::vector PaddleBackend::GetOutputInfos() { + return outputs_desc_; +} + +bool PaddleBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + return false; + } + // output share backend memory only support CPU or GPU + if (option_.device == Device::IPU) { + copy_to_fd = true; + } + + RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + for (size_t i = 0; i < inputs.size(); ++i) { + auto handle = predictor_->GetInputHandle(inputs[i].name); + ShareTensorFromFDTensor(handle.get(), inputs[i]); + } + // prebinded output only support for GPU + // if (!copy_to_fd) { + // for (size_t i = 0; i < (*outputs).size(); ++i) { + // auto output_name = (*outputs)[i].name; + // // if a output is not prebinded, + // // the name of output is expected to be empty. + // // We skip here + // if (output_name.empty()) { + // continue; + // } + // // Record the prebinded output_name. + // // Those outputs do not need PaddleTensorToFDTensor + // // after predictor_.Run() + // auto handle = predictor_->GetOutputHandle(output_name); + // ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]); + // } + // } + + RUNTIME_PROFILE_LOOP_BEGIN(1) + predictor_->Run(); + RUNTIME_PROFILE_LOOP_END + + outputs->resize(outputs_desc_.size()); + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name); + if (copy_to_fd) { + (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory; + } + PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd); + } + RUNTIME_PROFILE_LOOP_H2D_D2H_END + return true; +} + +std::unique_ptr PaddleBackend::Clone(RuntimeOption &runtime_option, + void *stream, int device_id) { + std::unique_ptr new_backend = + utils::make_unique(); + auto casted_backend = dynamic_cast(new_backend.get()); + if (device_id > 0 && (option_.device == Device::GPU) && + device_id != option_.device_id) { + auto clone_option = option_; + clone_option.device_id = device_id; + clone_option.external_stream_ = stream; + FDASSERT(casted_backend->InitFromPaddle( + runtime_option.model_file, runtime_option.params_file, + runtime_option.model_from_memory_, clone_option), + "Clone model from Paddle failed while initialize PaddleBackend."); + FDWARNING << "The target device id:" << device_id + << " is different from current device id:" << option_.device_id + << ", cannot share memory with current engine." << std::endl; + return new_backend; + } + casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end()); + casted_backend->outputs_desc_.assign(outputs_desc_.begin(), + outputs_desc_.end()); + casted_backend->predictor_ = std::move(predictor_->Clone(stream)); + return new_backend; +} + +void PaddleBackend::SetTRTDynamicShapeToConfig( + const PaddleBackendOption &option) { + std::map> max_shape; + std::map> min_shape; + std::map> opt_shape; + GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape); + if (min_shape.size() > 0) { + FDINFO << "Start setting trt dynamic shape." << std::endl; + config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape); + FDINFO << "Finish setting trt dynamic shape." << std::endl; + } +} + +void PaddleBackend::GetDynamicShapeFromOption( + const PaddleBackendOption &option, + std::map> *max_shape, + std::map> *min_shape, + std::map> *opt_shape) const { + auto print_shape = [](const std::vector &shape) -> std::string { + std::ostringstream oss; + oss << "["; + for (int i = 0; i < shape.size(); ++i) { + oss << shape[i]; + if (i < shape.size() - 1) { + oss << ", "; + } + } + oss << "]"; + return oss.str(); + }; + for (const auto &item : option.trt_option.min_shape) { + auto max_iter = option.trt_option.max_shape.find(item.first); + auto opt_iter = option.trt_option.opt_shape.find(item.first); + FDASSERT(max_iter != option.trt_option.max_shape.end(), + "Cannot find %s in TrtBackendOption::min_shape.", + item.first.c_str()); + FDASSERT(opt_iter != option.trt_option.opt_shape.end(), + "Cannot find %s in TrtBackendOption::opt_shape.", + item.first.c_str()); + (*max_shape)[item.first].assign(max_iter->second.begin(), + max_iter->second.end()); + (*opt_shape)[item.first].assign(opt_iter->second.begin(), + opt_iter->second.end()); + (*min_shape)[item.first].assign(item.second.begin(), item.second.end()); + FDINFO << item.first + << ": the max shape = " << print_shape(max_iter->second) + << ", the min shape = " << print_shape(item.second) + << ", the opt shape = " << print_shape(opt_iter->second) + << std::endl; + } +} + +void PaddleBackend::GetInputDataFromOption( + const PaddleBackendOption &option, + std::map> *max_input_data, + std::map> *min_input_data, + std::map> *opt_input_data) const { + for (const auto &item : option.trt_option.min_input_data) { + auto max_iter = option.trt_option.max_input_data.find(item.first); + auto opt_iter = option.trt_option.opt_input_data.find(item.first); + FDASSERT(max_iter != option.trt_option.max_input_data.end(), + "Cannot find %s in TrtBackendOption::min_input_data.", + item.first.c_str()); + FDASSERT(opt_iter != option.trt_option.opt_input_data.end(), + "Cannot find %s in TrtBackendOption::opt_input_data.", + item.first.c_str()); + (*max_input_data)[item.first].assign(max_iter->second.begin(), + max_iter->second.end()); + (*opt_input_data)[item.first].assign(opt_iter->second.begin(), + opt_iter->second.end()); + (*min_input_data)[item.first].assign(item.second.begin(), + item.second.end()); + } +} + +void PaddleBackend::CollectShapeRun( + paddle_infer::Predictor *predictor, + const std::map> &shape, + const std::map> &data) const { + auto input_names = predictor->GetInputNames(); + auto input_type = predictor->GetInputTypes(); + for (const auto &name : input_names) { + FDASSERT(shape.find(name) != shape.end() && + input_type.find(name) != input_type.end(), + "When collect_trt_shape is true, please define max/opt/min shape " + "for model's input:[\"%s\"] by " + "(C++)RuntimeOption.trt_option.SetShape/" + "(Python)RuntimeOption.trt_option.set_shape.", + name.c_str()); + auto tensor = predictor->GetInputHandle(name); + auto shape_value = shape.at(name); + int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1, + std::multiplies()); + tensor->Reshape(shape_value); + + if (data.find(name) != data.end()) { + FDASSERT(data.at(name).size() == shape_num, + "The data num and accumulate of shape must be equal for input: " + "[\"%s\"], " + " When Use the (C++)RuntimeOption.trt_option.SetInputData/ " + " (Python)RuntimeOption.trt_option.set_input_data/", + name.c_str()); + } + + auto dtype = input_type[name]; + switch (dtype) { + case paddle_infer::DataType::FLOAT32: { + if (data.find(name) != data.end()) { + tensor->CopyFromCpu(data.at(name).data()); + } else { + std::vector input_data(shape_num, 1.0); + tensor->CopyFromCpu(input_data.data()); + } + break; + } + case paddle_infer::DataType::INT32: { + if (data.find(name) != data.end()) { + std::vector input_data(data.at(name).begin(), data.at(name).end()); + tensor->CopyFromCpu(input_data.data()); + } else { + std::vector input_data(shape_num, 1); + tensor->CopyFromCpu(input_data.data()); + } + break; + } + case paddle_infer::DataType::INT64: { + if (data.find(name) != data.end()) { + std::vector input_data(data.at(name).begin(), + data.at(name).end()); + tensor->CopyFromCpu(input_data.data()); + } else { + std::vector input_data(shape_num, 1); + tensor->CopyFromCpu(input_data.data()); + } + break; + } + default: { + FDASSERT(false, "Input data Paddle backend only supports " + "FP32/INT32/INT64 currently."); + break; + } + } + } + predictor->Run(); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h new file mode 100755 index 0000000000..874b8b7f2b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/backends/paddle/option.h" +#ifdef ENABLE_PADDLE2ONNX +#include "paddle2onnx/converter.h" +#endif +#include "paddle/include/paddle_inference_api.h" // NOLINT +#include "ultrainfer/utils/unique_ptr.h" + +namespace ultrainfer { + +// convert FD device to paddle place type +paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device); + +// Share memory buffer with paddle_infer::Tensor from ultrainfer::FDTensor +void ShareTensorFromFDTensor(paddle_infer::Tensor *tensor, FDTensor &fd_tensor); + +void ShareOutTensorFromFDTensor(paddle_infer::Tensor *tensor, + FDTensor &fd_tensor); + +// convert paddle_infer::Tensor to ultrainfer::FDTensor +// if copy_to_fd is true, copy memory data to FDTensor +/// else share memory to FDTensor +void PaddleTensorToFDTensor(std::unique_ptr &tensor, + FDTensor *fd_tensor, bool copy_to_fd); + +// Convert data type from paddle inference to ultrainfer +FDDataType PaddleDataTypeToFD(const paddle_infer::DataType &dtype); + +// Convert data type from paddle2onnx::PaddleReader to ultrainfer +FDDataType ReaderDataTypeToFD(int32_t dtype); + +class PaddleBackend : public BaseBackend { +public: + PaddleBackend() {} + virtual ~PaddleBackend() = default; + bool Init(const RuntimeOption &option); + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + + int NumInputs() const override { return inputs_desc_.size(); } + + int NumOutputs() const override { return outputs_desc_.size(); } + + std::unique_ptr Clone(RuntimeOption &runtime_option, + void *stream = nullptr, + int device_id = -1) override; + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + +private: + void BuildOption(const PaddleBackendOption &option); + + bool + InitFromPaddle(const std::string &model, const std::string ¶ms, + bool model_from_memory, + const PaddleBackendOption &option = PaddleBackendOption()); + + void + CollectShapeRun(paddle_infer::Predictor *predictor, + const std::map> &shape, + const std::map> &data) const; + void GetDynamicShapeFromOption( + const PaddleBackendOption &option, + std::map> *max_shape, + std::map> *min_shape, + std::map> *opt_shape) const; + void GetInputDataFromOption( + const PaddleBackendOption &option, + std::map> *max_input_data, + std::map> *min_input_data, + std::map> *opt_input_data) const; + void SetTRTDynamicShapeToConfig(const PaddleBackendOption &option); + PaddleBackendOption option_; + paddle_infer::Config config_; + std::shared_ptr predictor_; + std::vector inputs_desc_; + std::vector outputs_desc_; +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc new file mode 100755 index 0000000000..f5e1e01741 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc @@ -0,0 +1,236 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/core/float16.h" +#include "ultrainfer/runtime/backends/paddle/paddle_backend.h" + +namespace ultrainfer { +paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) { + if (device == Device::GPU) { + return paddle_infer::PlaceType::kGPU; + } else if (device == Device::KUNLUNXIN) { + return paddle_infer::PlaceType::kXPU; + } + return paddle_infer::PlaceType::kCPU; +} + +void ShareTensorFromFDTensor(paddle_infer::Tensor *tensor, + FDTensor &fd_tensor) { + std::vector shape(fd_tensor.shape.begin(), fd_tensor.shape.end()); + tensor->Reshape(shape); + auto place = ConvertFDDeviceToPlace(fd_tensor.device); + if (fd_tensor.dtype == FDDataType::FP32) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT32) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT64) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT8) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } + return; + } else if (fd_tensor.dtype == FDDataType::UINT8) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } + return; + } + FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.", + Str(fd_tensor.dtype).c_str()); +} + +void ShareOutTensorFromFDTensor(paddle_infer::Tensor *tensor, + FDTensor &fd_tensor) { + std::vector shape(fd_tensor.shape.begin(), fd_tensor.shape.end()); + auto place = ConvertFDDeviceToPlace(fd_tensor.device); + if (fd_tensor.dtype == FDDataType::FP32) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.MutableData()), + shape, place); + } else { + tensor->CopyToCpu(static_cast(fd_tensor.MutableData())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT32) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.MutableData()), + shape, place); + } else { + tensor->CopyToCpu(static_cast(fd_tensor.MutableData())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT64) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.MutableData()), + shape, place); + } else { + tensor->CopyToCpu(static_cast(fd_tensor.MutableData())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT8) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } + return; + } else if (fd_tensor.dtype == FDDataType::UINT8) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } + return; + } + FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.", + Str(fd_tensor.dtype).c_str()); +} + +void PaddleTensorToFDTensor(std::unique_ptr &tensor, + FDTensor *fd_tensor, bool copy_to_fd) { + auto fd_dtype = PaddleDataTypeToFD(tensor->type()); + std::vector shape; + auto tmp_shape = tensor->shape(); + shape.assign(tmp_shape.begin(), tmp_shape.end()); + if (copy_to_fd) { + fd_tensor->Resize(shape, fd_dtype, tensor->name()); + if (fd_tensor->dtype == FDDataType::FP32) { + tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); + return; + } else if (fd_tensor->dtype == FDDataType::INT32) { + tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); + return; + } else if (fd_tensor->dtype == FDDataType::INT64) { + tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); + return; + } else if (fd_tensor->dtype == FDDataType::INT8) { + tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); + return; + } else if (fd_tensor->dtype == FDDataType::UINT8) { + tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); + return; + } + FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.", + Str(fd_tensor->dtype).c_str()); + } else { + paddle_infer::PlaceType place; + int size = 0; + // TODO(liqi): The tensor->data interface of paddle don't return device id + // and don't support return void*. + void *out_data = nullptr; + if (fd_dtype == FDDataType::FP32) { + out_data = tensor->data(&place, &size); + } else if (fd_dtype == FDDataType::INT32) { + out_data = tensor->data(&place, &size); + } else if (fd_dtype == FDDataType::INT64) { + out_data = tensor->data(&place, &size); + } else if (fd_dtype == FDDataType::INT8) { + out_data = tensor->data(&place, &size); + } else if (fd_dtype == FDDataType::UINT8) { + out_data = tensor->data(&place, &size); + } else { + FDASSERT( + false, + "Unexpected data type(%s) while infer shared with PaddleBackend.", + Str(fd_dtype).c_str()); + } + Device device = Device::CPU; + if (place == paddle_infer::PlaceType::kGPU) { + device = Device::GPU; + } else if (place == paddle_infer::PlaceType::kXPU) { + device = Device::KUNLUNXIN; + FDASSERT(false, "Currently, copy_to_fd=false, FDTensor SetExternalData " + "is not support for Device::KUNLUNXIN now!") + } + fd_tensor->name = tensor->name(); + fd_tensor->SetExternalData(shape, fd_dtype, out_data, device); + } +} + +FDDataType PaddleDataTypeToFD(const paddle_infer::DataType &dtype) { + auto fd_dtype = FDDataType::FP32; + if (dtype == paddle_infer::FLOAT32) { + fd_dtype = FDDataType::FP32; + } else if (dtype == paddle_infer::INT64) { + fd_dtype = FDDataType::INT64; + } else if (dtype == paddle_infer::INT32) { + fd_dtype = FDDataType::INT32; + } else if (dtype == paddle_infer::UINT8) { + fd_dtype = FDDataType::UINT8; + } else if (dtype == paddle_infer::INT8) { + fd_dtype = FDDataType::INT8; + } else if (dtype == paddle_infer::FLOAT16) { + fd_dtype = FDDataType::FP16; + } else { + FDASSERT( + false, + "Unexpected data type: %d while call CopyTensorToCpu in PaddleBackend.", + int(dtype)); + } + return fd_dtype; +} + +FDDataType ReaderDataTypeToFD(int32_t dtype) { + auto fd_dtype = FDDataType::FP32; + if (dtype == 0) { + fd_dtype = FDDataType::FP32; + } else if (dtype == 1) { + fd_dtype = FDDataType::FP64; + } else if (dtype == 2) { + fd_dtype = FDDataType::UINT8; + } else if (dtype == 3) { + fd_dtype = FDDataType::INT8; + } else if (dtype == 4) { + fd_dtype = FDDataType::INT32; + } else if (dtype == 5) { + fd_dtype = FDDataType::INT64; + } else if (dtype == 6) { + fd_dtype = FDDataType::FP16; + } else { + FDASSERT(false, + "Unexpected data type: %d while call ReaderDataTypeToFD in " + "PaddleBackend.", + dtype); + } + return fd_dtype; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h new file mode 100755 index 0000000000..03d86c3cdf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h @@ -0,0 +1,170 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "iengine.h" // NOLINT +#include "poros_module.h" // NOLINT +#include "torch/script.h" // NOLINT + +namespace baidu { +namespace mirana { +namespace poros { + +/** + * @brief compile graph + * + * @param [in] module : original module + * @param [in] input_ivalues : prewarm datas + * @param [in] options : Inference options + * @return porosmodule + * @retval !nullptr => succeed nullptr => failed + **/ +std::unique_ptr +Compile(const torch::jit::Module &module, + const std::vector> &prewarm_datas, + const PorosOptions &options); + +class Compiler { +public: + typedef std::unordered_map engine_map_t; + typedef std::vector> ivalue_vec_t; + + Compiler() : _origin_module(NULL) {} + ~Compiler(); + + /** + * @brief initial Compiler + * + * @param [in] options : poros options + * @return int + * @retval 0 => succeed <0 => failed + **/ + int init(const PorosOptions &options); + + /** + * @brief compile whole graph + * + * @param [in] origin_module + * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue + * @param [out] optimized_module : optimized graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int compile(const torch::jit::Module &origin_module, + const ivalue_vec_t &prewarm_datas, + torch::jit::Module *optimized_module); + +private: + /** + * @brief preprocess this calculation graph + * + * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue + * @param [out] graph : preprcessed graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int preprocess_graph(const ivalue_vec_t &prewarm_datas, + std::shared_ptr &graph); + + /** + * @brief segement this calculation graph + * + * @param [in/out] graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int segment_graph(std::shared_ptr &graph); + + // Split subgraph(block) + // The divided subgraph, as a subgraph, is associated with the block + int segment_block(torch::jit::Block &block, IEngine *engine, + int current_depth); + + // Subgraph optimization + /** + * @brief Subgraph optimization + * + * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue + * @param [in] opt_graph : ivalue_vec_t, vector of IValue + * @param [out] optimized_module : optimized graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int optimize_subgraph(const ivalue_vec_t &prewarm_datas, + const std::shared_ptr &opt_graph, + torch::jit::Module *optimized_module); + + // Subgraph optimization(block) + int optimize_subblock(torch::jit::Block *block, + torch::jit::Module *optimized_module); + + /** + * @brief Compile the subgraph into a new graph based on the engine + * + * @param [in] engine : The engine used by the subgraph + * @param [in] subgraph_node : Subgraph node + * @return [out] module : Transformed model + * @retval 0 => succeed <0 => failed + **/ + int transform(IEngine *engine, torch::jit::Node &subgraph_node, + torch::jit::Module &module); + + /** + * @brief Select engine based on subgraph and options + * + * @param [in] node : Jit Node + * @return int + * @retval 0 => succeed <0 => failed + **/ + IEngine *select_engine(const torch::jit::Node *n); + + /** + * @brief destory + * + * @return void + **/ + void close(); + +private: + int _max_segment_depth{5}; // Maximum subgraph segmentation depth + ivalue_vec_t _prewarm_datas; // Prewarm datas + PorosOptions _options; + engine_map_t _engine_map; // The engine used to record the subgraph + const torch::jit::Module *_origin_module; // Origin_module + std::atomic _engine_index = {0}; // Record engine index +}; + +/** + * @brief compile graph, internal use + * + * @param [in] module : Origin module + * @param [in] input_ivalues : Prewarm datas + * @param [in] options : Inference options + * @return optimized_module + * @retval !nullptr => succeed nullptr => failed + **/ +std::unique_ptr +CompileGraph(const torch::jit::Module &module, + const std::vector> &prewarm_datas, + const PorosOptions &options); + +} // namespace poros +} // namespace mirana +} // namespace baidu diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h new file mode 100755 index 0000000000..908ac4253b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h @@ -0,0 +1,82 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +// from pytorch +#include "ATen/core/interned_strings.h" // NOLINT +#include "torch/csrc/jit/ir/ir.h" // NOLINT +#include "torch/script.h" // NOLINT + +#include "plugin_create.h" // NOLINT + +namespace baidu { +namespace mirana { +namespace poros { + +struct PorosGraph { + torch::jit::Graph *graph = NULL; + torch::jit::Node *node = NULL; +}; + +typedef uint64_t EngineID; + +class IEngine : public IPlugin, public torch::CustomClassHolder { +public: + virtual ~IEngine() {} + + /** + * @brief init, initialization must be successful if the init is successful + * @return int + * @retval 0 => success, <0 => fail + **/ + virtual int init() = 0; + + /** + * @brief During compilation, the subgraph is converted into the graph + *structure of the corresponding engine and stored inside the engine, so that + *the execute_engine at runtime can be called + * @param [in] sub_graph : subgraph + * @return [res]int + * @retval 0 => success, <0 => fail + **/ + virtual int transform(const PorosGraph &sub_graph) = 0; + + /** + * @brief Subgraph execution period logic + * @param [in] inputs : input tensor + * @return [res] output tensor + **/ + virtual std::vector + excute_engine(const std::vector &inputs) = 0; + + virtual void register_module_attribute(const std::string &name, + torch::jit::Module &module) = 0; + + // Logo + virtual const std::string who_am_i() = 0; + + // Whether the node is supported by the current engine + bool is_node_supported(const torch::jit::Node *node); + +public: + std::pair _num_io; // Number of input/output parameters + EngineID _id; +}; + +} // namespace poros +} // namespace mirana +} // namespace baidu diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h new file mode 100755 index 0000000000..1b4cb3b62f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h @@ -0,0 +1,69 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace baidu { +namespace mirana { +namespace poros { + +class IPlugin { +public: + virtual ~IPlugin() {} + virtual const std::string who_am_i() = 0; +}; + +typedef IPlugin *(*plugin_creator_t)(); +typedef std::unordered_map plugin_creator_map_t; + +IPlugin *create_plugin(const std::string &plugin_name); +IPlugin *create_plugin(const std::string &plugin_name, + const plugin_creator_map_t &plugin_creator_map); + +void create_all_plugins(const plugin_creator_map_t &plugin_creator_map, + std::unordered_map &plugin_m); +// void create_all_plugins(std::unordered_map& plugin_m); + +template IPlugin *default_plugin_creator() { + return new (std::nothrow) PluginType; +} + +void register_plugin_creator(const std::string &plugin_name, + plugin_creator_t creator); +void register_plugin_creator(const std::string &plugin_name, + plugin_creator_t creator, + plugin_creator_map_t &plugin_creator_map); + +template +void register_plugin_class(const std::string &plugin_name) { + return register_plugin_creator(plugin_name, + default_plugin_creator); +} + +// This version is recommended +template +void register_plugin_class(const std::string &plugin_name, + plugin_creator_map_t &plugin_creator_map) { + return register_plugin_creator( + plugin_name, default_plugin_creator, plugin_creator_map); +} + +} // namespace poros +} // namespace mirana +} // namespace baidu + +/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h new file mode 100755 index 0000000000..0d20ab26ac --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "torch/csrc/jit/jit_log.h" // NOLINT +#include "torch/script.h" // NOLINT +#include +// #include "ATen/Context.h" + +namespace baidu { +namespace mirana { +namespace poros { + +enum Device : int8_t { GPU = 0, CPU, XPU, UNKNOW }; + +struct PorosOptions { + Device device = GPU; + bool debug = false; + bool use_fp16 = false; + bool is_dynamic = false; + bool long_to_int = true; + uint64_t max_workspace_size = 1ULL << 30; + int32_t device_id = -1; + int32_t unconst_ops_thres = -1; + bool use_nvidia_tf32 = false; +}; + +class PorosModule : public torch::jit::Module { +public: + PorosModule(torch::jit::Module module) + : torch::jit::Module(module) {} // NOLINT + ~PorosModule() = default; + + void to_device(Device device) { _options.device = device; } + + // c10::IValue forward(std::vector inputs); + // void save(const std::string& filename); +public: + PorosOptions _options; +}; + +// via porosmodule.save +std::unique_ptr Load(const std::string &filename, + const PorosOptions &options); + +} // namespace poros +} // namespace mirana +} // namespace baidu diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h new file mode 100755 index 0000000000..9e65db1089 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_type.h" +#include +#include +#include +#include + +namespace ultrainfer { + +/*! @brief Option object to configure Poros backend + */ +struct PorosBackendOption { + Device device = Device::CPU; + int device_id = 0; + bool long_to_int = true; + // There is calculation precision in tf32 mode on A10, it can bring some + // performance improvement, but there may be diff + bool use_nvidia_tf32 = false; + // Threshold for the number of non-const ops + int32_t unconst_ops_thres = -1; + std::string poros_file = ""; + std::vector prewarm_datatypes = {FDDataType::FP32}; + // TRT options + bool enable_fp16 = false; + bool enable_int8 = false; + bool is_dynamic = false; + size_t max_batch_size = 32; + size_t max_workspace_size = 1 << 30; +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc new file mode 100755 index 0000000000..86b257c3ba --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/runtime/backends/poros/option.h" + +namespace ultrainfer { + +void BindPorosOption(pybind11::module &m) { + pybind11::class_(m, "PorosBackendOption") + .def(pybind11::init()) + .def_readwrite("long_to_int", &PorosBackendOption::long_to_int) + .def_readwrite("use_nvidia_tf32", &PorosBackendOption::use_nvidia_tf32) + .def_readwrite("unconst_ops_thres", + &PorosBackendOption::unconst_ops_thres) + .def_readwrite("prewarm_datatypes", + &PorosBackendOption::prewarm_datatypes) + .def_readwrite("enable_fp16", &PorosBackendOption::enable_fp16) + .def_readwrite("enable_int8", &PorosBackendOption::enable_int8) + .def_readwrite("is_dynamic", &PorosBackendOption::is_dynamic) + .def_readwrite("max_batch_size", &PorosBackendOption::max_batch_size) + .def_readwrite("max_workspace_size", + &PorosBackendOption::max_workspace_size); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc new file mode 100755 index 0000000000..d03b2b85be --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc @@ -0,0 +1,175 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/poros/poros_backend.h" + +#include + +namespace ultrainfer { + +TensorInfo PorosBackend::GetInputInfo(int index) { + // eager mode cann't obtain input information before infer + TensorInfo info_input; + return info_input; +} + +TensorInfo PorosBackend::GetOutputInfo(int index) { + // eager mode cann't obtain output information before infer + TensorInfo info_output; + return info_output; +} + +std::vector PorosBackend::GetInputInfos() { + // eager mode cann't obtain inputs information before infer + std::vector info_inputs; + return info_inputs; +} + +std::vector PorosBackend::GetOutputInfos() { + // eager mode cann't obtain outputs information before infer + std::vector info_outputs; + return info_outputs; +} + +void PorosBackend::BuildOption(const PorosBackendOption &option) { + _options.device = (option.device == Device::GPU) + ? baidu::mirana::poros::Device::GPU + : baidu::mirana::poros::Device::CPU; + _options.long_to_int = option.long_to_int; + _options.use_nvidia_tf32 = option.use_nvidia_tf32; + _options.device_id = option.device_id; + _options.unconst_ops_thres = option.unconst_ops_thres; + _options.is_dynamic = option.is_dynamic; + _options.max_workspace_size = option.max_workspace_size; + _options.use_fp16 = option.enable_fp16; + return; +} + +bool PorosBackend::Compile(const std::string &model_file, + std::vector> &prewarm_tensors, + const PorosBackendOption &option) { + if (initialized_) { + FDERROR << "PorosBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + BuildOption(option); + torch::jit::Module mod; + mod = torch::jit::load(model_file); + mod.eval(); + if (option.device == Device::GPU) { + mod.to(at::kCUDA); + } else { + mod.to(at::kCPU); + } + // get inputs_nums and outputs_nums + auto graph = mod.get_method("forward").graph(); + auto inputs = graph->inputs(); + // remove self node + _numinputs = inputs.size() - 1; + // FDTensor to at::Tensor + std::vector> prewarm_datas; + bool is_backend_cuda = (option.device == Device::GPU); + for (size_t i = 0; i < prewarm_tensors.size(); ++i) { + std::vector prewarm_data; + for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) { + auto tensor = CreatePorosValue(prewarm_tensors[i][j], is_backend_cuda); + prewarm_data.push_back(tensor); + } + prewarm_datas.push_back(prewarm_data); + } + // get outputs nums + auto temp_result = mod.forward(prewarm_datas[0]); + size_t outputs_nums = 0; + if (temp_result.isTensor()) { + outputs_nums += 1; + } else if (temp_result.isTuple()) { + auto temp_result_tuple = temp_result.toTuple(); + for (size_t i = 0; i < temp_result_tuple->elements().size(); ++i) { + auto poros_tensor = temp_result_tuple->elements()[i]; + if (poros_tensor.isTensor()) { + outputs_nums += 1; + } else if (poros_tensor.isList()) { + auto poros_tensor_list = poros_tensor.toList(); + outputs_nums += poros_tensor_list.size(); + } else if (poros_tensor.isTuple()) { + auto poros_tensor_tuple = poros_tensor.toTuple(); + outputs_nums += poros_tensor_tuple->elements().size(); + } else { + continue; + } + } + } + _numoutputs = outputs_nums; + _poros_module = baidu::mirana::poros::Compile(mod, prewarm_datas, _options); + if (_poros_module == nullptr) { + FDERROR << "PorosBackend initlize Failed, try initialize again." + << std::endl; + return false; + } + initialized_ = true; + return true; +} + +bool PorosBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + // Convert FD Tensor to PyTorch Tensor + std::vector poros_inputs; + bool is_backend_cuda = + _options.device == baidu::mirana::poros::Device::GPU ? true : false; + for (size_t i = 0; i < inputs.size(); ++i) { + poros_inputs.push_back(CreatePorosValue(inputs[i], is_backend_cuda)); + } + // Infer + auto poros_outputs = _poros_module->forward(poros_inputs); + // Convert PyTorch Tensor to FD Tensor + if (poros_outputs.isTensor()) { + CopyTensorToCpu(poros_outputs.toTensor(), &((*outputs)[0]), + is_backend_cuda); + } else if (poros_outputs.isTuple()) { + // deal with multi outputs + auto poros_outputs_tuple = poros_outputs.toTuple(); + size_t index = 0; + for (size_t i = 0; i < poros_outputs_tuple->elements().size(); ++i) { + auto poros_tensor = poros_outputs_tuple->elements()[i]; + if (poros_tensor.isTensor()) { + CopyTensorToCpu(poros_tensor.toTensor(), &((*outputs)[index]), + is_backend_cuda); + index += 1; + } else if (poros_tensor.isList()) { + auto poros_tensor_list = poros_tensor.toList(); + for (const auto list_idx : c10::irange(0, poros_tensor_list.size())) { + const auto &elt = poros_tensor_list.get(list_idx); + CopyTensorToCpu(elt.toTensor(), &((*outputs)[index]), + is_backend_cuda); + index += 1; + } + } else if (poros_tensor.isTuple()) { + auto poros_tensor_tuple = poros_tensor.toTuple(); + for (size_t j = 0; j < poros_tensor_tuple->elements().size(); ++j) { + CopyTensorToCpu(poros_tensor_tuple->elements()[j].toTensor(), + &((*outputs)[index]), is_backend_cuda); + index += 1; + } + } else { + continue; + } + } + } else { + FDERROR << "Convert to FDTensor Failed!!!!!" << std::endl; + } + return true; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h new file mode 100755 index 0000000000..89dd88e889 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h @@ -0,0 +1,91 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/backends/poros/common/compile.h" +#include "ultrainfer/runtime/backends/poros/common/poros_module.h" +#include "ultrainfer/runtime/backends/poros/option.h" + +namespace ultrainfer { + +// Convert data type from ultrainfer to poros +at::ScalarType GetPorosDtype(const FDDataType &fd_dtype); + +// Convert data type from poros to ultrainfer +FDDataType GetFdDtype(const at::ScalarType &dtype); + +// at::ScalarType to std::string for FDERROR +std::string AtType2String(const at::ScalarType &dtype); + +// Create at::Tensor +// is_backend_cuda specify if Poros use GPU Device +// While is_backend_cuda = true, and tensor.device = Device::GPU +at::Tensor CreatePorosValue(FDTensor &tensor, bool is_backend_cuda = false); + +// Copy memory data from at::Tensor to ultrainfer::FDTensor +void CopyTensorToCpu(const at::Tensor &tensor, FDTensor *fd_tensor, + bool is_backend_cuda = false); + +class PorosBackend : public BaseBackend { +public: + PorosBackend() {} + virtual ~PorosBackend() = default; + + void BuildOption(const PorosBackendOption &option); + + bool Init(const RuntimeOption &option) { + if (!(Supported(option.model_format, Backend::POROS) && + Supported(option.device, Backend::POROS))) { + return false; + } + if (option.model_from_memory_) { + FDERROR << "Poros backend doesn't support load model " + << "from memory, please load model from disk." << std::endl; + return false; + } + return true; + } + + bool Compile(const std::string &model_file, + std::vector> &prewarm_tensors, + const PorosBackendOption &option = PorosBackendOption()); + + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + + int NumInputs() const { return _numinputs; } + + int NumOutputs() const { return _numoutputs; } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + +private: + baidu::mirana::poros::PorosOptions _options; + std::unique_ptr _poros_module; + std::vector> _prewarm_datas; + int _numinputs = 1; + int _numoutputs = 1; +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc b/libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc new file mode 100755 index 0000000000..e3b11b743b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc @@ -0,0 +1,185 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/backends/poros/poros_backend.h" + +#ifdef WITH_GPU +#include +#endif + +namespace ultrainfer { + +std::string AtType2String(const at::ScalarType &dtype) { + std::string out; + switch (dtype) { + case at::kByte: + out = "at::kByte"; + break; + case at::kChar: + out = "at::kChar"; + break; + case at::kShort: + out = "at::kShort"; + break; + case at::kInt: + out = "at::kInt"; + break; + case at::kLong: + out = "at::kLong"; + break; + case at::kHalf: + out = "at::kHalf"; + break; + case at::kFloat: + out = "at::kFloat"; + break; + case at::kDouble: + out = "at::kDouble"; + break; + default: + out = "at::UNKNOWN"; + } + return out; +} + +at::ScalarType GetPorosDtype(const FDDataType &fd_dtype) { + if (fd_dtype == FDDataType::FP32) { + return at::kFloat; + } else if (fd_dtype == FDDataType::FP64) { + return at::kDouble; + } else if (fd_dtype == FDDataType::INT32) { + return at::kInt; + } else if (fd_dtype == FDDataType::INT64) { + return at::kLong; + } + FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "." + << std::endl; + return at::kFloat; +} + +FDDataType GetFdDtype(const at::ScalarType &poros_dtype) { + if (poros_dtype == at::kFloat) { + return FDDataType::FP32; + } else if (poros_dtype == at::kDouble) { + return FDDataType::FP64; + } else if (poros_dtype == at::kInt) { + return FDDataType::INT32; + } else if (poros_dtype == at::kLong) { + return FDDataType::INT64; + } + FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype) + << "." << std::endl; + return FDDataType::FP32; +} + +at::Tensor CreatePorosValue(FDTensor &tensor, bool is_backend_cuda) { + FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU, + "Only support tensor which device is CPU or GPU for PorosBackend."); + auto data_type = GetPorosDtype(tensor.dtype); + size_t numel = tensor.Numel(); + at::Tensor poros_value; + if (is_backend_cuda) { + poros_value = std::move( + at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous()); + } else { + poros_value = std::move( + at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous()); + } + if (data_type == at::kFloat) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(float), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(float)); + } + } else if (data_type == at::kInt) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int32_t), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int32_t)); + } + } else if (data_type == at::kLong) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int64_t), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int64_t)); + } + } else if (data_type == at::kDouble) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(double), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(double)); + } + } else { + FDASSERT(false, "Unrecognized data type while calling " + "PorosBackend::CreatePorosValue()."); + } + return poros_value; +} + +void CopyTensorToCpu(const at::Tensor &tensor, FDTensor *fd_tensor, + bool is_backend_cuda) { + const auto data_type = tensor.scalar_type(); + std::vector shape; + auto sizes = tensor.sizes(); + for (size_t i = 0; i < sizes.size(); i++) { + shape.push_back(sizes[i]); + } + auto fd_dtype = GetFdDtype(data_type); + fd_tensor->Resize(shape, fd_dtype); + size_t numel = tensor.numel(); + // at::Tensor -> FDTensor + if (data_type == at::kFloat) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float)); + } + return; + } else if (data_type == at::kInt) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t)); + } + return; + } else if (data_type == at::kLong) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t)); + } + return; + } else if (data_type == at::kDouble) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double)); + } + return; + } +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h new file mode 100755 index 0000000000..dfe40f3254 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +namespace ultrainfer { +namespace rknpu2 { +typedef enum _rknpu2_cpu_name { + RK356X = 0, /* run on RK356X. */ + RK3588 = 1, /* default,run on RK3588. */ + UNDEFINED, +} CpuName; + +/* The specification of NPU core setting.It has the following choices : + * RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will + * select the idle core inside the NPU. + * RKNN_NPU_CORE_0 : Running on the NPU0 core. + * RKNN_NPU_CORE_1: Runing on the NPU1 core. + * RKNN_NPU_CORE_2: Runing on the NPU2 core. + * RKNN_NPU_CORE_0_1: Running on both NPU0 and NPU1 core simultaneously. + * RKNN_NPU_CORE_0_1_2: Running on both NPU0, NPU1 and NPU2 simultaneously. + */ +typedef enum _rknpu2_core_mask { + RKNN_NPU_CORE_AUTO = 0, + RKNN_NPU_CORE_0 = 1, + RKNN_NPU_CORE_1 = 2, + RKNN_NPU_CORE_2 = 4, + RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1, + RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2, + RKNN_NPU_CORE_UNDEFINED, +} CoreMask; +} // namespace rknpu2 + +struct RKNPU2BackendOption { + rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588; + rknpu2::CoreMask core_mask = rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc new file mode 100755 index 0000000000..f0a50e3596 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc @@ -0,0 +1,593 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h" +namespace ultrainfer { +RKNPU2Backend::~RKNPU2Backend() { + if (tensor_attrs_init_) { + if (input_attrs_ != nullptr) { + free(input_attrs_); + } + + if (output_attrs_ != nullptr) { + free(output_attrs_); + } + } + + if (tensor_memory_init_) { + for (uint32_t i = 0; i < io_num_.n_input; i++) { + rknn_destroy_mem(ctx_, input_mems_[i]); + } + + for (uint32_t i = 0; i < io_num_.n_output; i++) { + rknn_destroy_mem(ctx_, output_mems_[i]); + } + } +} + +/* + * @name RuntimeOptionIsApplicable + * @brief This function is used to determine whether the RuntimeOption + * meets the operating conditions of RKNPU2. + * @param None + * @return bool + * @note None + */ +bool RKNPU2Backend::RuntimeOptionIsApplicable( + const RuntimeOption &runtime_option) { + if (!Supported(runtime_option.model_format, Backend::RKNPU2)) { + FDERROR << "The model format is not supported for RKNPU2." << std::endl; + return false; + } + + if (!Supported(runtime_option.device, Backend::RKNPU2)) { + FDERROR << "The device is not supported for RKNPU2." << std::endl; + return false; + } + + if (runtime_option.model_from_memory_) { + FDERROR << "RKNPU2 backend doesn't support load model from memory, please " + "load model from disk." + << std::endl; + return false; + } + return true; +} + +/* + * @name GetSDKAndDeviceVersion + * @brief Get RKNPU2 sdk and device version. + * @param None + * @return bool + * @note The private variable ctx_ must be initialized. + */ +bool RKNPU2Backend::GetSDKAndDeviceVersion() { + int ret; + ret = rknn_query(ctx_, RKNN_QUERY_SDK_VERSION, &sdk_ver_, sizeof(sdk_ver_)); + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl; + return false; + } + FDINFO << "rknpu2 runtime version: " << sdk_ver_.api_version << std::endl; + FDINFO << "rknpu2 driver version: " << sdk_ver_.drv_version << std::endl; + return true; +} + +/* + * @name BuildOption + * @brief Save option and set core mask. + * @param RKNPU2BackendOption + * @note None + */ +void RKNPU2Backend::BuildOption(const RKNPU2BackendOption &option) { + option_ = option; + + // save cpu_name + option_.cpu_name = option.cpu_name; + + // save context + option_.core_mask = option.core_mask; + + // set core mask + if (option_.cpu_name == rknpu2::CpuName::RK3588) { + if (!SetCoreMask(option_.core_mask)) { + FDERROR << "set core mask failed" << std::endl; + } + } +} + +/*************************************************************** + * @name Init + * @brief Initialize RKNN model + * @param model_file: Binary data for the RKNN model or the path of RKNN + * @return bool + * @note None + ***************************************************************/ +bool RKNPU2Backend::Init(const RuntimeOption &runtime_option) { + if (!RuntimeOptionIsApplicable(runtime_option)) { + FDERROR << "Runtime option is not applicable." << std::endl; + return false; + } + + if (!LoadModel((char *)runtime_option.model_file.data())) { + FDERROR << "Load model failed" << std::endl; + return false; + } + + if (!InitInputAndOutputNumber()) { + FDERROR << "Init input and output number failed" << std::endl; + return false; + } + + if (!GetSDKAndDeviceVersion()) { + FDERROR << "Get SDK and device version failed" << std::endl; + return false; + } + + BuildOption(runtime_option.rknpu2_option); + + if (!InitInputAndOutputInformation()) { + FDERROR << "Get model input output information failed" << std::endl; + return false; + } + + return true; +} + +/* + * @name SetCoreMask + * @brief Set NPU core for model + * @param core_mask: The specification of NPU core setting. + * @return bool + * @note Only support RK3588 + */ +bool RKNPU2Backend::SetCoreMask(const rknpu2::CoreMask &core_mask) const { + if (option_.cpu_name != rknpu2::CpuName::RK3588) { + FDINFO << "SetCoreMask only support when soc is RK3588." << std::endl; + return false; + } + + int ret = rknn_set_core_mask(ctx_, static_cast(core_mask)); + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_set_core_mask) failed! ret=" << ret + << std::endl; + return false; + } + return true; +} + +/* + * @name LoadModel + * @brief Read the model and initialize rknn context. + * @param model: Binary data for the RKNN model or the path of RKNN model. + * @return bool + * @note None + */ +bool RKNPU2Backend::LoadModel(void *model) { + int ret = RKNN_SUCC; + ret = rknn_init(&ctx_, model, 0, 0, nullptr); + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_init) failed! ret=" << ret << std::endl; + return false; + } + return true; +} + +/* + * @name InitInputAndOutputNumber + * @brief Initialize io_num_. + * @param + * @return bool + * @note The private variable ctx must be initialized to use this + * function. + */ +bool RKNPU2Backend::InitInputAndOutputNumber() { + if (io_num_init_) { + FDERROR << "The private variable io_num_ has been initialized." + << std::endl; + return false; + } + int ret = RKNN_SUCC; + ret = rknn_query(ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num_, sizeof(io_num_)); + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl; + return false; + } + io_num_init_ = true; + return true; +} + +/* + * @name InitRKNNTensorAddress + * @brief Allocate memory for input_attrs_ and output_attrs_. + * @param None + * @return bool + * @note None + */ +bool RKNPU2Backend::InitRKNNTensorAddress() { + if (tensor_attrs_init_) { + FDERROR << "Private variable input_attrs_ and output_attrs_ memory has " + "been allocated. Please do not allocate memory repeatedly or " + "memory leak may occur." + << std::endl; + return false; + } + + if (!io_num_init_) { + InitInputAndOutputNumber(); + } + + if (io_num_.n_input == 0) { + FDERROR << "The number of input tensors is 0." << std::endl; + return false; + } + + if (io_num_.n_output == 0) { + FDERROR << "The number of output tensors is 0." << std::endl; + return false; + } + + // Allocate memory for private variable input_attrs_. + input_attrs_ = + (rknn_tensor_attr *)malloc(sizeof(rknn_tensor_attr) * io_num_.n_input); + memset(input_attrs_, 0, io_num_.n_input * sizeof(rknn_tensor_attr)); + for (uint32_t i = 0; i < io_num_.n_input; i++) { + int ret = RKNN_SUCC; + input_attrs_[i].index = i; + ret = rknn_query(ctx_, RKNN_QUERY_INPUT_ATTR, &(input_attrs_[i]), + sizeof(rknn_tensor_attr)); + + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl; + return false; + } + + if ((input_attrs_[i].fmt != RKNN_TENSOR_NHWC) && + (input_attrs_[i].fmt != RKNN_TENSOR_UNDEFINED)) { + FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED" + << std::endl; + return false; + } + + DumpTensorAttr(input_attrs_[i]); + } + + // Allocate memory for private variable output_attrs_. + output_attrs_ = + (rknn_tensor_attr *)malloc(sizeof(rknn_tensor_attr) * io_num_.n_output); + memset(output_attrs_, 0, io_num_.n_output * sizeof(rknn_tensor_attr)); + for (uint32_t i = 0; i < io_num_.n_output; i++) { + int ret = RKNN_SUCC; + output_attrs_[i].index = i; + ret = rknn_query(ctx_, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs_[i]), + sizeof(rknn_tensor_attr)); + + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl; + return false; + } + + // UltraInfer Only support postprocess when output type is fp32, + // so output_attrs_.type needs to be fixed as RKNN_TENSOR_FLOAT32. + output_attrs_[i].type = RKNN_TENSOR_FLOAT32; + DumpTensorAttr(output_attrs_[i]); + } + tensor_attrs_init_ = true; + return true; +} + +/* + * @name InitInputAndOutputInformation + * @brief Get the detailed input and output information of Model + * @param None + * @return bool + * @note None + */ +bool RKNPU2Backend::InitInputAndOutputInformation() { + if (!io_num_init_) { + InitInputAndOutputNumber(); + } + + if (!tensor_attrs_init_) { + InitRKNNTensorAddress(); + } + + if (io_num_.n_input == 0) { + FDERROR << "The number of input tensors is 0." << std::endl; + return false; + } + + if (io_num_.n_output == 0) { + FDERROR << "The number of output tensors is 0." << std::endl; + return false; + } + + inputs_desc_.resize(io_num_.n_input); + outputs_desc_.resize(io_num_.n_output); + + // Get input info and copy to input tensor info + for (uint32_t i = 0; i < io_num_.n_input; i++) { + // Copy input_attrs_ to input tensor info + std::string temp_name = input_attrs_[i].name; + std::vector temp_shape{}; + temp_shape.resize(input_attrs_[i].n_dims); + for (int j = 0; j < input_attrs_[i].n_dims; j++) { + temp_shape[j] = (int)input_attrs_[i].dims[j]; + } + FDDataType temp_dtype = + ultrainfer::RKNPU2Backend::RknnTensorTypeToFDDataType( + input_attrs_[i].type); + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + inputs_desc_[i] = temp_input_info; + } + + for (uint32_t i = 0; i < io_num_.n_output; i++) { + // If the output dimension is 3, the runtime will automatically change it + // to 4. Obviously, this is wrong, and manual correction is required here. + int n_dims = static_cast(output_attrs_[i].n_dims); + if ((n_dims == 4) && (output_attrs_[i].dims[3] == 1)) { + n_dims--; + } + + // Copy output_attrs_ to output tensor + std::string temp_name = output_attrs_[i].name; + std::vector temp_shape{}; + temp_shape.resize(n_dims); + for (int j = 0; j < n_dims; j++) { + temp_shape[j] = (int)output_attrs_[i].dims[j]; + } + + // The data type of output data is changed to FP32 + FDDataType temp_dtype = FDDataType::FP32; + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + outputs_desc_[i] = temp_input_info; + } + return true; +} + +/* + * @name DumpTensorAttr + * @brief Get the model's detailed inputs and outputs + * @param rknn_tensor_attr + * @return None + * @note None + */ +void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr &attr) { + printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], " + "n_elems=%d, size=%d, fmt=%s, type=%s, " + "qnt_type=%s, zp=%d, scale=%f, pass_through=%d\n", + attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1], + attr.dims[2], attr.dims[3], attr.n_elems, attr.size, + get_format_string(attr.fmt), get_type_string(attr.type), + get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale, + attr.pass_through); +} + +TensorInfo RKNPU2Backend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()) + return inputs_desc_[index]; +} + +std::vector RKNPU2Backend::GetInputInfos() { return inputs_desc_; } + +TensorInfo RKNPU2Backend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()) + return outputs_desc_[index]; +} + +std::vector RKNPU2Backend::GetOutputInfos() { + return outputs_desc_; +} + +/* + * @name InitRKNNTensorMemory + * @brief Allocate memory for input and output tensors. + * @param std::vector& inputs + * @return None + * @note None + */ +bool RKNPU2Backend::InitRKNNTensorMemory(std::vector &inputs) { + if (tensor_memory_init_) { + FDERROR << "Private variable input_mems_ and output_mems_ memory has " + "been allocated. Please do not allocate memory repeatedly or " + "memory leak may occur." + << std::endl; + return false; + } + int ret = RKNN_SUCC; + input_mems_.resize(io_num_.n_input); + output_mems_.resize(io_num_.n_output); + for (uint32_t i = 0; i < io_num_.n_input; i++) { + // Judge whether the input and output types are the same + rknn_tensor_type input_type = + ultrainfer::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[i].dtype); + if (input_type != input_attrs_[i].type) { + FDWARNING << "The input tensor type != model's inputs type." + << "The input_type need " + << get_type_string(input_attrs_[i].type) << ",but inputs[" << i + << "].type is " << get_type_string(input_type) << std::endl; + } + + // Create input tensor memory + input_attrs_[i].type = input_type; + input_attrs_[i].size = inputs[i].Nbytes(); + input_attrs_[i].size_with_stride = inputs[i].Nbytes(); + + input_mems_[i] = rknn_create_mem(ctx_, inputs[i].Nbytes()); + if (input_mems_[i] == nullptr) { + FDERROR << "The function(rknn_create_mem) failed! ret=" << ret + << std::endl; + return false; + } + + // Set input tensor memory + ret = rknn_set_io_mem(ctx_, input_mems_[i], &input_attrs_[i]); + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_set_io_mem) failed! ret=" << ret + << std::endl; + return false; + } + } + + for (uint32_t i = 0; i < io_num_.n_output; ++i) { + // Most post-processing does not support the fp16 format. + uint32_t output_size = output_attrs_[i].n_elems * sizeof(float); + output_mems_[i] = rknn_create_mem(ctx_, output_size); + if (output_mems_[i] == nullptr) { + FDERROR << "The function(rknn_create_mem) failed! ret=" << ret + << std::endl; + return false; + } + + // Set output tensor memory + ret = rknn_set_io_mem(ctx_, output_mems_[i], &output_attrs_[i]); + if (ret != RKNN_SUCC) { + FDERROR << "The function(rknn_set_io_mem) failed! ret=" << ret + << std::endl; + return false; + } + } + + tensor_memory_init_ = true; + return true; +} + +bool RKNPU2Backend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + if (!tensor_memory_init_) { + if (!InitRKNNTensorMemory(inputs)) { + FDERROR << "Init tensor memory failed." << std::endl; + } + } + + int ret = RKNN_SUCC; + // Judge whether the input and output size are the same + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[RKNPU2Backend] Size of the inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + return false; + } + + // Copy input data to input tensor memory + for (uint32_t i = 0; i < io_num_.n_input; i++) { + uint32_t width = input_attrs_[i].dims[2]; + uint32_t stride = input_attrs_[i].w_stride; + if (width == stride) { + if (inputs[i].Data() == nullptr) { + FDERROR << "inputs[0].Data is NULL." << std::endl; + return false; + } + memcpy(input_mems_[i]->virt_addr, inputs[i].Data(), inputs[i].Nbytes()); + } else { + FDERROR << "[RKNPU2Backend] only support width == stride." << std::endl; + return false; + } + } + + // run rknn + ret = rknn_run(ctx_, nullptr); + if (ret != RKNN_SUCC) { + FDERROR << "rknn run error! ret=" << ret << std::endl; + return false; + } + + // get result + outputs->resize(outputs_desc_.size()); + std::vector temp_shape(4); + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + temp_shape.resize(outputs_desc_[i].shape.size()); + for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { + temp_shape[j] = outputs_desc_[i].shape[j]; + } + (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, + outputs_desc_[i].name); + memcpy((*outputs)[i].MutableData(), (float *)output_mems_[i]->virt_addr, + (*outputs)[i].Nbytes()); + } + + return true; +} + +/* + * @name RknnTensorTypeToFDDataType + * @brief Change RknnTensorType To FDDataType + * @param rknn_tensor_type + * @return None + * @note Most post-processing does not support the fp16 format. + * Therefore, if the input is FP16, the output will be FP32. + */ +FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) { + if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT16) { + return FDDataType::FP32; + } + if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT32) { + return FDDataType::FP32; + } + if (type == rknn_tensor_type::RKNN_TENSOR_INT8) { + return FDDataType::INT8; + } + if (type == rknn_tensor_type::RKNN_TENSOR_INT16) { + return FDDataType::INT16; + } + if (type == rknn_tensor_type::RKNN_TENSOR_INT32) { + return FDDataType::INT32; + } + if (type == rknn_tensor_type::RKNN_TENSOR_UINT8) { + return FDDataType::UINT8; + } + if (type == rknn_tensor_type::RKNN_TENSOR_BOOL) { + return FDDataType::BOOL; + } + FDERROR << "FDDataType don't support this type" << std::endl; + return FDDataType::UNKNOWN1; +} + +/* + * @name FDDataTypeToRknnTensorType + * @brief Change FDDataType To RknnTensorType + * @param FDDataType + * @return None + * @note None + */ +rknn_tensor_type +RKNPU2Backend::FDDataTypeToRknnTensorType(ultrainfer::FDDataType type) { + if (type == FDDataType::FP16) { + return rknn_tensor_type::RKNN_TENSOR_FLOAT16; + } + if (type == FDDataType::FP32) { + return rknn_tensor_type::RKNN_TENSOR_FLOAT32; + } + if (type == FDDataType::INT8) { + return rknn_tensor_type::RKNN_TENSOR_INT8; + } + if (type == FDDataType::INT16) { + return rknn_tensor_type::RKNN_TENSOR_INT16; + } + if (type == FDDataType::INT32) { + return rknn_tensor_type::RKNN_TENSOR_INT32; + } + if (type == FDDataType::UINT8) { + return rknn_tensor_type::RKNN_TENSOR_UINT8; + } + if (type == FDDataType::BOOL) { + return rknn_tensor_type::RKNN_TENSOR_BOOL; + } + FDERROR << "rknn_tensor_type don't support this type" << std::endl; + return RKNN_TENSOR_TYPE_MAX; +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h new file mode 100755 index 0000000000..d408a9951b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h @@ -0,0 +1,180 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "rknn_api.h" // NOLINT +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/backends/rknpu2/option.h" +#include +#include +#include +#include +#include + +namespace ultrainfer { +class RKNPU2Backend : public BaseBackend { +public: + /***************************** BaseBackend API *****************************/ + RKNPU2Backend() = default; + virtual ~RKNPU2Backend(); + bool Init(const RuntimeOption &runtime_option); + int NumInputs() const override { + return static_cast(inputs_desc_.size()); + } + int NumOutputs() const override { + return static_cast(outputs_desc_.size()); + } + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + /***************************** BaseBackend API *****************************/ + +private: + /* + * @name RuntimeOptionIsApplicable + * @brief This function is used to determine whether the RuntimeOption + * meets the operating conditions of RKNPU2. + * @param None + * @return bool + * @note None + */ + bool RuntimeOptionIsApplicable(const RuntimeOption &runtime_option); + + /* + * @name LoadModel + * @brief Read the model and initialize rknn context. + * @param model: Binary data for the RKNN model or the path of RKNN + * model. + * @return bool + * @note None + */ + bool LoadModel(void *model); + + /* + * @name GetSDKAndDeviceVersion + * @brief Get RKNPU2 sdk and device version. + * @param None + * @return bool + * @note The private variable ctx must be initialized to use this + * function. + */ + bool GetSDKAndDeviceVersion(); + + /* + * @name BuildOption + * @brief Save option and set core mask. + * @param RKNPU2BackendOption + * @note None + */ + void BuildOption(const RKNPU2BackendOption &option); + + /* + * @name SetCoreMask + * @brief Set NPU core for model + * @param core_mask: The specification of NPU core setting. + * @return bool + * @note Only support RK3588 + */ + bool SetCoreMask(const rknpu2::CoreMask &core_mask) const; + + /* + * @name InitInputAndOutputNumber + * @brief Initialize io_num_. + * @param + * @return bool + * @note The private variable ctx must be initialized to use this + * function. + */ + bool InitInputAndOutputNumber(); + + /* + * @name InitRKNNTensorAddress + * @brief Allocate memory for input_attrs_ and output_attrs_. + * @param None + * @return bool + * @note None + */ + bool InitRKNNTensorAddress(); + + /* + * @name InitInputAndOutputInformation + * @brief Initialize inputs_desc_ and outputs_desc_. + * @param None + * @return bool + * @note None + */ + bool InitInputAndOutputInformation(); + + /* + * @name InitRKNNTensorMemory + * @brief Allocate memory for input and output tensors. + * @param std::vector& inputs + * @return None + * @note None + */ + bool InitRKNNTensorMemory(std::vector &inputs); + + rknn_context ctx_{}; + rknn_sdk_version sdk_ver_{}; + + rknn_input_output_num io_num_{0, 0}; + + std::vector inputs_desc_; + std::vector outputs_desc_; + + rknn_tensor_attr *input_attrs_ = nullptr; + rknn_tensor_attr *output_attrs_ = nullptr; + + std::vector input_mems_; + std::vector output_mems_; + + bool io_num_init_ = false; + bool tensor_attrs_init_ = false; + bool tensor_memory_init_ = false; + + RKNPU2BackendOption option_; + + /* + * @name DumpTensorAttr + * @brief Get the model's detailed inputs and outputs + * @param rknn_tensor_attr + * @return None + * @note None + */ + void DumpTensorAttr(rknn_tensor_attr &attr); + + /* + * @name RknnTensorTypeToFDDataType + * @brief Change RknnTensorType To FDDataType + * @param rknn_tensor_type + * @return None + * @note Most post-processing does not support the fp16 format. + * Therefore, if the input is FP16, the output will be FP32. + */ + FDDataType RknnTensorTypeToFDDataType(rknn_tensor_type type); + + /* + * @name FDDataTypeToRknnTensorType + * @brief Change FDDataType To RknnTensorType + * @param FDDataType + * @return None + * @note None + */ + rknn_tensor_type FDDataTypeToRknnTensorType(FDDataType type); +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc new file mode 100755 index 0000000000..2e0a49eded --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" +#include "ultrainfer/runtime/backends/rknpu2/option.h" +namespace ultrainfer { +void BindRKNPU2Option(pybind11::module &m) { + pybind11::enum_( + m, "CpuName", pybind11::arithmetic(), "CpuName for inference.") + .value("RK356X", ultrainfer::rknpu2::CpuName::RK356X) + .value("RK3588", ultrainfer::rknpu2::CpuName::RK3588) + .value("UNDEFINED", ultrainfer::rknpu2::CpuName::UNDEFINED); + pybind11::enum_( + m, "CoreMask", pybind11::arithmetic(), "CoreMask for inference.") + .value("RKNN_NPU_CORE_AUTO", + ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO) + .value("RKNN_NPU_CORE_0", ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_0) + .value("RKNN_NPU_CORE_1", ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_1) + .value("RKNN_NPU_CORE_2", ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_2) + .value("RKNN_NPU_CORE_0_1", + ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_0_1) + .value("RKNN_NPU_CORE_0_1_2", + ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2) + .value("RKNN_NPU_CORE_UNDEFINED", + ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h new file mode 100755 index 0000000000..9c9d079237 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "ultrainfer/core/fd_type.h" +#include +#include +#include +#include +#include + +namespace ultrainfer { +struct SophgoBackendOption {}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc new file mode 100755 index 0000000000..170755d068 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc @@ -0,0 +1,304 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/runtime/backends/sophgo/sophgo_backend.h" + +#include + +namespace ultrainfer { +SophgoBackend::~SophgoBackend() { bm_dev_free(handle_); } +/*************************************************************** + * @name GetSDKAndDeviceVersion + * @brief get Sophgo sdk and device version + * @param None + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::GetSDKAndDeviceVersion() { return true; } + +/*************************************************************** + * @name Init + * @brief Initialize Sophgo model + * @param model_file: Binary data for the Sophgo model. + * params_file: None + * option: config + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::Init(const RuntimeOption &option) { + if (option.model_from_memory_) { + FDERROR << "SophgoBackend doesn't support load model from memory, please " + "load model from disk." + << std::endl; + return false; + } + if (option.model_format != ModelFormat::SOPHGO) { + FDERROR << "SophgoBackend only supports model format SOPHGO, but now it's " + << option.model_format << "." << std::endl; + return false; + } + if (option.device != Device::SOPHGOTPUD) { + FDERROR << "SophgoBackend only supports device::SOPHGOTPUD, but now it's " + << option.device << "." << std::endl; + return false; + } + + std::string model_file = option.model_file; + + // LoadModel + if (!this->LoadModel((char *)model_file.data())) { + FDERROR << "load model failed" << std::endl; + return false; + } + + // GetSDKAndDeviceVersion + if (!this->GetSDKAndDeviceVersion()) { + FDERROR << "get SDK and device version failed" << std::endl; + return false; + } + + // GetModelInputOutputInfos + if (!this->GetModelInputOutputInfos()) { + FDERROR << "get model input output infos failed" << std::endl; + return false; + } + + return true; +} + +/*************************************************************** + * @name LoadModel + * @brief read Sophgo bmodel + * @param model: Binary data for the Sophgo model. + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::LoadModel(void *model) { + unsigned int card_num = 0; + bm_status_t status = bm_get_card_num(&card_num); + status = bm_dev_request(&handle_, 0); + p_bmrt_ = bmrt_create(handle_); + assert(NULL != p_bmrt_); + + bool load_status = bmrt_load_bmodel(p_bmrt_, (char *)model); + assert(load_status); + + int network_num = bmrt_get_network_number(p_bmrt_); + + const char **net_names = NULL; + bmrt_get_network_names(p_bmrt_, &net_names); + net_name_ = net_names[0]; + free(net_names); + + net_info_ = bmrt_get_network_info(p_bmrt_, net_name_.c_str()); + assert(NULL != net_info_); + + return true; +} + +/*************************************************************** + * @name GetModelInputOutputInfos + * @brief Get the detailed input and output infos of Model + * @param None + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::GetModelInputOutputInfos() { + inputs_desc_.resize(net_info_->input_num); + bm_shape_t *input_shapes = net_info_->stages->input_shapes; + for (int idx = 0; idx < net_info_->input_num; idx++) { + std::string temp_name = (net_info_->input_names)[idx]; + std::vector temp_shape{}; + temp_shape.resize(input_shapes[idx].num_dims); + for (int i = 0; i < input_shapes[idx].num_dims; i++) { + temp_shape[i] = input_shapes[idx].dims[i]; + } + bm_data_type_t *input_dtypes = net_info_->input_dtypes; + // SophgoType to FDDataType + FDDataType temp_dtype = SophgoTensorTypeToFDDataType(*input_dtypes); + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + inputs_desc_[idx] = temp_input_info; + } + + outputs_desc_.resize(net_info_->output_num); + bm_shape_t *output_shapes = net_info_->stages->output_shapes; + for (int idx = 0; idx < net_info_->output_num; idx++) { + std::string temp_name1 = (net_info_->output_names)[idx]; + std::vector temp_shape1{}; + temp_shape1.resize(output_shapes[idx].num_dims); + for (int i = 0; i < output_shapes[idx].num_dims; i++) { + temp_shape1[i] = output_shapes[idx].dims[i]; + } + bm_data_type_t *output_dtypes = net_info_->output_dtypes; + // SophgoType to FDDataType + FDDataType temp_dtype1 = SophgoTensorTypeToFDDataType(*output_dtypes); + TensorInfo temp_output_info = {temp_name1, temp_shape1, temp_dtype1}; + outputs_desc_[idx] = temp_output_info; + } + return true; +} + +TensorInfo SophgoBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()) + return inputs_desc_[index]; +} + +std::vector SophgoBackend::GetInputInfos() { return inputs_desc_; } + +TensorInfo SophgoBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()) + return outputs_desc_[index]; +} + +std::vector SophgoBackend::GetOutputInfos() { + return outputs_desc_; +} + +bool SophgoBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + int input_size = inputs.size(); + assert(input_size != 0); + assert(input_size == NumInputs()); + bm_tensor_t input_tensors[input_size]; + bm_status_t status = BM_SUCCESS; + + RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN + bm_data_type_t *input_dtypes = net_info_->input_dtypes; + for (int i = 0; i < input_size; i++) { + status = bm_malloc_device_byte(handle_, &input_tensors[i].device_mem, + net_info_->max_input_bytes[i]); + assert(BM_SUCCESS == status); + input_tensors[i].dtype = input_dtypes[i]; + input_tensors[i].st_mode = BM_STORE_1N; + input_tensors[i].shape = net_info_->stages[0].input_shapes[i]; + unsigned int input_byte = bmrt_tensor_bytesize(&input_tensors[i]); + bm_memcpy_s2d_partial(handle_, input_tensors[i].device_mem, + (void *)inputs[i].Data(), + bmrt_tensor_bytesize(&input_tensors[i])); + } + + int output_size = NumOutputs(); + bm_tensor_t output_tensors[output_size]; + for (int i = 0; i < output_size; i++) { + status = bm_malloc_device_byte(handle_, &output_tensors[i].device_mem, + net_info_->max_output_bytes[i]); + assert(BM_SUCCESS == status); + } + + RUNTIME_PROFILE_LOOP_BEGIN(1) + bool launch_status = bmrt_launch_tensor_ex( + p_bmrt_, net_name_.c_str(), input_tensors, net_info_->input_num, + output_tensors, net_info_->output_num, true, false); + assert(launch_status); + status = bm_thread_sync(handle_); + assert(status == BM_SUCCESS); + RUNTIME_PROFILE_LOOP_END + + outputs->resize(outputs_desc_.size()); + bm_data_type_t *output_dtypes = net_info_->output_dtypes; + for (int i = 0; i < output_size; i++) { + int temp_bytesize = bmrt_tensor_bytesize(&output_tensors[i]); // Byte + float *temp_out = (float *)malloc(temp_bytesize); + bm_memcpy_d2s_partial(handle_, temp_out, output_tensors[i].device_mem, + temp_bytesize); + + std::vector temp_shape; + temp_shape.resize(outputs_desc_[i].shape.size()); + for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { + temp_shape[j] = outputs_desc_[i].shape[j]; + } + (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, + outputs_desc_[i].name); + + memcpy((*outputs)[i].MutableData(), temp_out, (*outputs)[i].Nbytes()); + free(temp_out); + } + + for (int i = 0; i < input_size; i++) { + bm_free_device(handle_, input_tensors[i].device_mem); + } + for (int i = 0; i < output_size; i++) { + bm_free_device(handle_, output_tensors[i].device_mem); + } + RUNTIME_PROFILE_LOOP_H2D_D2H_END + + return true; +} + +/*************************************************************** + * @name SophgoTensorTypeToFDDataType + * @brief Change SophgoTensorType To FDDataType + * @param bm_data_type_t + * @return None + * @note None + ***************************************************************/ +FDDataType SophgoBackend::SophgoTensorTypeToFDDataType(bm_data_type_t type) { + if (type == BM_FLOAT16) { + return FDDataType::FP32; + } + if (type == BM_FLOAT32) { + return FDDataType::FP32; + } + if (type == BM_INT8) { + return FDDataType::INT8; + } + if (type == BM_INT16) { + return FDDataType::INT16; + } + if (type == BM_INT32) { + return FDDataType::INT32; + } + if (type == BM_UINT8) { + return FDDataType::UINT8; + } + FDERROR << "FDDataType don't support this type" << std::endl; + return FDDataType::UNKNOWN1; +} + +/*************************************************************** + * @name FDDataTypeToSophgoTensorType + * @brief Change FDDataType To SophgoTensorType + * @param FDDataType + * @return None + * @note None + ***************************************************************/ +// Sophgo_tensor_type +bm_data_type_t +SophgoBackend::FDDataTypeToSophgoTensorType(ultrainfer::FDDataType type) { + if (type == FDDataType::FP16) { + return BM_FLOAT16; + } + if (type == FDDataType::FP32) { + return BM_FLOAT32; + } + if (type == FDDataType::INT8) { + return BM_INT8; + } + if (type == FDDataType::INT16) { + return BM_INT16; + } + if (type == FDDataType::INT32) { + return BM_INT32; + } + if (type == FDDataType::UINT8) { + return BM_UINT8; + } + FDERROR << "Sophgo_tensor_type don't support this type" << std::endl; + return BM_FLOAT32; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h new file mode 100755 index 0000000000..fc06faeb63 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h @@ -0,0 +1,71 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "bmlib_runtime.h" // NOLINT +#include "bmruntime_interface.h" // NOLINT +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/backends/sophgo/option.h" +#include +#include +#include +#include +#include + +namespace ultrainfer { + +class SophgoBackend : public BaseBackend { +public: + SophgoBackend() = default; + virtual ~SophgoBackend(); + bool Init(const RuntimeOption &option); + + int NumInputs() const override { + return static_cast(inputs_desc_.size()); + } + + int NumOutputs() const override { + return static_cast(outputs_desc_.size()); + } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + +private: + bool LoadModel(void *model); + bool GetSDKAndDeviceVersion(); + bool GetModelInputOutputInfos(); + + std::vector inputs_desc_; + std::vector outputs_desc_; + std::string net_name_; + + bm_handle_t handle_; + void *p_bmrt_ = nullptr; + + bool infer_init = false; + + const bm_net_info_t *net_info_ = nullptr; + + // SophgoTPU2BackendOption option_; + + static FDDataType SophgoTensorTypeToFDDataType(bm_data_type_t type); + static bm_data_type_t FDDataTypeToSophgoTensorType(FDDataType type); +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h new file mode 100755 index 0000000000..c0d6feb672 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h @@ -0,0 +1,21 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +namespace ultrainfer { +struct TVMBackendOption { + TVMBackendOption() {} +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc new file mode 100755 index 0000000000..a3fb414e4e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc @@ -0,0 +1,204 @@ +#include "ultrainfer/runtime/backends/tvm/tvm_backend.h" + +#include "yaml-cpp/yaml.h" +namespace ultrainfer { +bool TVMBackend::Init(const ultrainfer::RuntimeOption &runtime_option) { + if (!(Supported(runtime_option.model_format, Backend::TVM) && + Supported(runtime_option.device, Backend::TVM))) { + FDERROR << "TVMBackend only supports model " + "ModelFormat::TVMFormat/Backend::TVM, but now its " + << runtime_option.model_format << "/" << runtime_option.device + << std::endl; + return false; + } + + if (runtime_option.model_from_memory_) { + FDERROR << "TVMBackend doesn't support load model from memory, please " + "load model from disk." + << std::endl; + return false; + } + + if (!BuildDLDevice(runtime_option.device)) { + FDERROR << "TVMBackend only don't support run in this device." << std::endl; + return false; + } + + if (!BuildModel(runtime_option)) { + FDERROR << "TVMBackend only don't support run with this model path." + << std::endl; + return false; + } + + if (!InitInputAndOutputTensor()) { + FDERROR << "InitInputAndOutputTensor failed." << std::endl; + return false; + } + return true; +} + +bool TVMBackend::InitInputAndOutputTensor() { + input_tensor_.resize(NumInputs()); + for (int i = 0; i < NumInputs(); ++i) { + TensorInfo tensor_info = GetInputInfo(i); + tvm::ShapeTuple shape(tensor_info.shape.begin(), tensor_info.shape.end()); + input_tensor_[i] = tvm::runtime::NDArray::Empty( + shape, FDDataTypeToDLDataType(tensor_info.dtype), dev_); + } + + output_tensor_.resize(NumOutputs()); + for (int i = 0; i < NumOutputs(); ++i) { + TensorInfo tensor_info = GetOutputInfo(i); + tvm::ShapeTuple shape(tensor_info.shape.begin(), tensor_info.shape.end()); + output_tensor_[i] = tvm::runtime::NDArray::Empty( + shape, FDDataTypeToDLDataType(tensor_info.dtype), dev_); + } + return true; +} + +bool TVMBackend::BuildModel(const RuntimeOption &runtime_option) { + // load in the library + tvm::runtime::Module mod_factory = + tvm::runtime::Module::LoadFromFile(runtime_option.model_file); + + // create the graph executor module + gmod_ = mod_factory.GetFunction("default")(dev_); + + // load params + std::ifstream params_in(runtime_option.params_file, std::ios::binary); + std::string params_data((std::istreambuf_iterator(params_in)), + std::istreambuf_iterator()); + params_in.close(); + TVMByteArray params_arr; + params_arr.data = params_data.c_str(); + params_arr.size = params_data.length(); + tvm::runtime::PackedFunc load_params = gmod_.GetFunction("load_params"); + load_params(params_arr); + + // read input and output info + tvm::runtime::PackedFunc get_input_info = gmod_.GetFunction("get_input_info"); + tvm::Map input_info = get_input_info(); + auto input_info_shape = tvm::Downcast>( + input_info["shape"]); + inputs_desc_.reserve(input_info_shape.size()); + for (auto map_node : input_info_shape) { + std::string temp_name = map_node.first; + + tvm::ShapeTuple tup = map_node.second; + std::vector temp_shape{}; + temp_shape.resize(tup.size()); + for (int j = 0; j < tup.size(); ++j) { + temp_shape[j] = static_cast(tup[j]); + } + + FDDataType temp_dtype = ultrainfer::UNKNOWN1; + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + inputs_desc_.emplace_back(temp_input_info); + } + + int input_dtype_index = 0; + auto input_info_dtype = + tvm::Downcast>(input_info["dtype"]); + for (auto map_node : input_info_dtype) { + tvm::String tup = map_node.second; + inputs_desc_[input_dtype_index].dtype = TVMTensorTypeToFDDataType(tup); + input_dtype_index++; + } + + tvm::runtime::PackedFunc get_output_info = + gmod_.GetFunction("get_output_info"); + tvm::Map output_info = get_output_info(); + auto output_info_shape = + tvm::Downcast>( + output_info["shape"]); + outputs_desc_.reserve(output_info_shape.size()); + for (auto map_node : output_info_shape) { + std::string temp_name = map_node.first; + + tvm::ShapeTuple tup = map_node.second; + std::vector temp_shape{}; + temp_shape.resize(tup.size()); + for (int j = 0; j < tup.size(); ++j) { + temp_shape[j] = static_cast(tup[j]); + } + + FDDataType temp_dtype = ultrainfer::FP32; + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + outputs_desc_.emplace_back(temp_input_info); + } + + int output_dtype_index = 0; + auto output_info_dtype = + tvm::Downcast>(output_info["dtype"]); + for (auto map_node : output_info_dtype) { + tvm::String tup = map_node.second; + outputs_desc_[output_dtype_index].dtype = TVMTensorTypeToFDDataType(tup); + output_dtype_index++; + } + return true; +} + +FDDataType TVMBackend::TVMTensorTypeToFDDataType(tvm::String type) { + if (type == "float32") { + return FDDataType::FP32; + } + FDERROR << "FDDataType don't support this type" << std::endl; + return FDDataType::UNKNOWN1; +} + +bool TVMBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + for (int i = 0; i < inputs.size(); ++i) { + memcpy(input_tensor_[i]->data, inputs[i].Data(), inputs[i].Nbytes()); + } + + // get the function from the module(set input data) + tvm::runtime::PackedFunc set_input = gmod_.GetFunction("set_input"); + for (int i = 0; i < NumInputs(); ++i) { + set_input(GetInputInfo(i).name, input_tensor_[i]); + } + + // get the function from the module(run it) + tvm::runtime::PackedFunc run = gmod_.GetFunction("run"); + run(); + + // get the function from the module(get output data) + tvm::runtime::PackedFunc get_output = gmod_.GetFunction("get_output"); + for (int i = 0; i < NumOutputs(); ++i) { + get_output(i, output_tensor_[i]); + } + + // get result + outputs->resize(NumOutputs()); + std::vector temp_shape{}; + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + temp_shape.resize(outputs_desc_[i].shape.size()); + for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { + temp_shape[j] = outputs_desc_[i].shape[j]; + } + (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, + outputs_desc_[i].name); + memcpy((*outputs)[i].MutableData(), + static_cast(output_tensor_[i]->data), + (*outputs)[i].Nbytes()); + } + return true; +} + +bool TVMBackend::BuildDLDevice(ultrainfer::Device device) { + if (device == Device::CPU) { + dev_ = DLDevice{kDLCPU, 0}; + } else { + FDERROR << "TVMBackend only support run in CPU." << std::endl; + return false; + } + return true; +} + +DLDataType TVMBackend::FDDataTypeToDLDataType(ultrainfer::FDDataType dtype) { + if (dtype == FDDataType::FP32) { + return DLDataType{kDLFloat, 32, 1}; + } + return {}; +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h new file mode 100755 index 0000000000..693c0067f2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/runtime/backends/backend.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace ultrainfer { +class TVMBackend : public BaseBackend { +public: + TVMBackend() = default; + virtual ~TVMBackend() = default; + bool Init(const RuntimeOption &runtime_option) override; + int NumInputs() const override { return inputs_desc_.size(); } + int NumOutputs() const override { return outputs_desc_.size(); } + TensorInfo GetInputInfo(int index) override { return inputs_desc_[index]; } + TensorInfo GetOutputInfo(int index) override { return outputs_desc_[index]; } + std::vector GetInputInfos() override { return inputs_desc_; } + std::vector GetOutputInfos() override { return outputs_desc_; } + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + +private: + DLDevice dev_{}; + tvm::runtime::Module gmod_; + std::vector inputs_desc_; + std::vector outputs_desc_; + + bool BuildDLDevice(Device device); + bool BuildModel(const RuntimeOption &runtime_option); + bool InitInputAndOutputTensor(); + + std::vector input_tensor_; + std::vector output_tensor_; + + FDDataType TVMTensorTypeToFDDataType(tvm::String type); + DLDataType FDDataTypeToDLDataType(FDDataType dtype); +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/enum_variables.cc b/libs/ultrainfer/ultrainfer/runtime/enum_variables.cc new file mode 100755 index 0000000000..2dfcabfd93 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/enum_variables.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/enum_variables.h" + +namespace ultrainfer { +std::ostream &operator<<(std::ostream &out, const Backend &backend) { + if (backend == Backend::ORT) { + out << "Backend::ORT"; + } else if (backend == Backend::TRT) { + out << "Backend::TRT"; + } else if (backend == Backend::PDINFER) { + out << "Backend::PDINFER"; + } else if (backend == Backend::OPENVINO) { + out << "Backend::OPENVINO"; + } else if (backend == Backend::RKNPU2) { + out << "Backend::RKNPU2"; + } else if (backend == Backend::SOPHGOTPU) { + out << "Backend::SOPHGOTPU"; + } else if (backend == Backend::POROS) { + out << "Backend::POROS"; + } else if (backend == Backend::LITE) { + out << "Backend::PDLITE"; + } else if (backend == Backend::HORIZONNPU) { + out << "Backend::HORIZONNPU"; + } else if (backend == Backend::TVM) { + out << "Backend::TVM"; + } else { + out << "UNKNOWN-Backend"; + } + return out; +} + +std::ostream &operator<<(std::ostream &out, const Device &d) { + switch (d) { + case Device::CPU: + out << "Device::CPU"; + break; + case Device::GPU: + out << "Device::GPU"; + break; + case Device::RKNPU: + out << "Device::RKNPU"; + break; + case Device::SUNRISENPU: + out << "Device::SUNRISENPU"; + break; + case Device::SOPHGOTPUD: + out << "Device::SOPHGOTPUD"; + break; + case Device::TIMVX: + out << "Device::TIMVX"; + break; + case Device::KUNLUNXIN: + out << "Device::KUNLUNXIN"; + break; + case Device::ASCEND: + out << "Device::ASCEND"; + break; + case Device::DIRECTML: + out << "Device::DIRECTML"; + break; + default: + out << "Device::UNKOWN"; + } + return out; +} + +std::ostream &operator<<(std::ostream &out, const ModelFormat &format) { + if (format == ModelFormat::PADDLE) { + out << "ModelFormat::PADDLE"; + } else if (format == ModelFormat::ONNX) { + out << "ModelFormat::ONNX"; + } else if (format == ModelFormat::RKNN) { + out << "ModelFormat::RKNN"; + } else if (format == ModelFormat::SOPHGO) { + out << "ModelFormat::SOPHGO"; + } else if (format == ModelFormat::TORCHSCRIPT) { + out << "ModelFormat::TORCHSCRIPT"; + } else if (format == ModelFormat::HORIZON) { + out << "ModelFormat::HORIZON"; + } else if (format == ModelFormat::TVMFormat) { + out << "ModelFormat::TVMFormat"; + } else { + out << "UNKNOWN-ModelFormat"; + } + return out; +} + +std::vector GetAvailableBackends() { + std::vector backends; +#ifdef ENABLE_ORT_BACKEND + backends.push_back(Backend::ORT); +#endif +#ifdef ENABLE_TRT_BACKEND + backends.push_back(Backend::TRT); +#endif +#ifdef ENABLE_PADDLE_BACKEND + backends.push_back(Backend::PDINFER); +#endif +#ifdef ENABLE_POROS_BACKEND + backends.push_back(Backend::POROS); +#endif +#ifdef ENABLE_OPENVINO_BACKEND + backends.push_back(Backend::OPENVINO); +#endif +#ifdef ENABLE_LITE_BACKEND + backends.push_back(Backend::LITE); +#endif +#ifdef ENABLE_RKNPU2_BACKEND + backends.push_back(Backend::RKNPU2); +#endif +#ifdef ENABLE_HORIZON_BACKEND + backends.push_back(Backend::HORIZONNPU); +#endif +#ifdef ENABLE_SOPHGO_BACKEND + backends.push_back(Backend::SOPHGOTPU); +#endif +#ifdef ENABLE_TVM_BACKEND + backends.push_back(Backend::TVM); +#endif + return backends; +} + +bool IsBackendAvailable(const Backend &backend) { + std::vector backends = GetAvailableBackends(); + for (size_t i = 0; i < backends.size(); ++i) { + if (backend == backends[i]) { + return true; + } + } + return false; +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/enum_variables.h b/libs/ultrainfer/ultrainfer/runtime/enum_variables.h new file mode 100755 index 0000000000..3e2f234e30 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/enum_variables.h @@ -0,0 +1,148 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file enum_variables.h + \brief A brief file description. + + More details + */ + +#pragma once +#include "ultrainfer/utils/utils.h" +#include +#include + +namespace ultrainfer { + +/*! Inference backend supported in UltraInfer */ +enum Backend { + UNKNOWN, ///< Unknown inference backend + ORT, //< ONNX Runtime, support Paddle/ONNX format model, + //< CPU/ Nvidia GPU DirectML + TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only + PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU + POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU + OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only + LITE, ///< Paddle Lite, support Paddle format model, ARM CPU / ARM GPU + RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only + SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only + HORIZONNPU, ///< HORIZONNPU, support Horizon format model, Horizon NPU + TVM, ///< TVMBackend, support TVM format model, CPU / Nvidia GPU +}; + +/** + * @brief Get all the available inference backend in UltraInfer + */ +ULTRAINFER_DECL std::vector GetAvailableBackends(); + +/** + * @brief Check if the inference backend available + */ +ULTRAINFER_DECL bool IsBackendAvailable(const Backend &backend); + +enum ULTRAINFER_DECL Device { + CPU, + GPU, + RKNPU, + IPU, + TIMVX, + KUNLUNXIN, + ASCEND, + SOPHGOTPUD, + DIRECTML, + SUNRISENPU, +}; + +/*! Deep learning model format */ +enum ModelFormat { + AUTOREC, ///< Auto recognize the model format by model file name + PADDLE, ///< Model with paddlepaddle format + ONNX, ///< Model with ONNX format + RKNN, ///< Model with RKNN format + TORCHSCRIPT, ///< Model with TorchScript format + SOPHGO, ///< Model with SOPHGO format + HORIZON, ///< Model with HORIZON format + TVMFormat, ///< Model with TVM format +}; + +/// Describle all the supported backends for specified model format +static std::map> + s_default_backends_by_format = { + {ModelFormat::PADDLE, + {Backend::PDINFER, Backend::LITE, Backend::ORT, Backend::OPENVINO, + Backend::TRT}}, + {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}}, + {ModelFormat::RKNN, {Backend::RKNPU2}}, + {ModelFormat::HORIZON, {Backend::HORIZONNPU}}, + {ModelFormat::TORCHSCRIPT, {Backend::POROS}}, + {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}, + {ModelFormat::TVMFormat, {Backend::TVM}}}; + +/// Describle all the supported backends for specified device +static std::map> s_default_backends_by_device = { + {Device::CPU, + {Backend::LITE, Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::POROS, Backend::TVM}}, + {Device::GPU, + {Backend::LITE, Backend::PDINFER, Backend::ORT, Backend::TRT, + Backend::POROS, Backend::TVM}}, + {Device::RKNPU, {Backend::RKNPU2}}, + {Device::SUNRISENPU, {Backend::HORIZONNPU}}, + {Device::IPU, {Backend::PDINFER}}, + {Device::TIMVX, {Backend::LITE}}, + {Device::KUNLUNXIN, {Backend::LITE, Backend::PDINFER}}, + {Device::ASCEND, {Backend::LITE}}, + {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}}, + {Device::DIRECTML, {Backend::ORT}}}; + +inline bool Supported(ModelFormat format, Backend backend) { + auto iter = s_default_backends_by_format.find(format); + if (iter == s_default_backends_by_format.end()) { + FDERROR << "Didn't find format is registered in " + << "s_default_backends_by_format." << std::endl; + return false; + } + for (size_t i = 0; i < iter->second.size(); ++i) { + if (iter->second[i] == backend) { + return true; + } + } + std::string msg = Str(iter->second); + FDERROR << backend << " only supports " << msg << ", but now it's " << format + << "." << std::endl; + return false; +} + +inline bool Supported(Device device, Backend backend) { + auto iter = s_default_backends_by_device.find(device); + if (iter == s_default_backends_by_device.end()) { + FDERROR << "Didn't find device is registered in " + << "s_default_backends_by_device." << std::endl; + return false; + } + for (size_t i = 0; i < iter->second.size(); ++i) { + if (iter->second[i] == backend) { + return true; + } + } + std::string msg = Str(iter->second); + FDERROR << backend << " only supports " << msg << ", but now it's " << device + << "." << std::endl; + return false; +} + +ULTRAINFER_DECL std::ostream &operator<<(std::ostream &o, const Backend &b); +ULTRAINFER_DECL std::ostream &operator<<(std::ostream &o, const Device &d); +ULTRAINFER_DECL std::ostream &operator<<(std::ostream &o, const ModelFormat &f); +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/option_pybind.cc new file mode 100755 index 0000000000..b2d30df6f7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/option_pybind.cc @@ -0,0 +1,83 @@ +// Cropyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindLiteOption(pybind11::module &m); +void BindOpenVINOOption(pybind11::module &m); +void BindOrtOption(pybind11::module &m); +void BindTrtOption(pybind11::module &m); +void BindPaddleOption(pybind11::module &m); +void BindPorosOption(pybind11::module &m); +void BindRKNPU2Option(pybind11::module &m); +void BindOption(pybind11::module &m) { + BindLiteOption(m); + BindOpenVINOOption(m); + BindOrtOption(m); + BindTrtOption(m); + BindPaddleOption(m); + BindPorosOption(m); + BindRKNPU2Option(m); + + pybind11::class_(m, "RuntimeOption") + .def(pybind11::init()) + .def("set_model_path", &RuntimeOption::SetModelPath) + .def("set_model_buffer", &RuntimeOption::SetModelBuffer) + .def("use_gpu", &RuntimeOption::UseGpu) + .def("use_cpu", &RuntimeOption::UseCpu) + .def("use_rknpu2", &RuntimeOption::UseRKNPU2) + .def("use_sophgo", &RuntimeOption::UseSophgo) + .def("use_ascend", &RuntimeOption::UseAscend) + .def("use_kunlunxin", &RuntimeOption::UseKunlunXin) + .def("disable_valid_backend_check", + &RuntimeOption::DisableValidBackendCheck) + .def("enable_valid_backend_check", + &RuntimeOption::EnableValidBackendCheck) + .def_readwrite("paddle_lite_option", &RuntimeOption::paddle_lite_option) + .def_readwrite("openvino_option", &RuntimeOption::openvino_option) + .def_readwrite("ort_option", &RuntimeOption::ort_option) + .def_readwrite("trt_option", &RuntimeOption::trt_option) + .def_readwrite("poros_option", &RuntimeOption::poros_option) + .def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option) + .def("set_external_stream", &RuntimeOption::SetExternalStream) + .def("set_external_raw_stream", + [](RuntimeOption &self, size_t external_stream) { + self.SetExternalStream(reinterpret_cast(external_stream)); + }) + .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) + .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) + .def("use_poros_backend", &RuntimeOption::UsePorosBackend) + .def("use_tvm_backend", &RuntimeOption::UseTVMBackend) + .def("use_ort_backend", &RuntimeOption::UseOrtBackend) + .def("use_trt_backend", &RuntimeOption::UseTrtBackend) + .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend) + .def("use_lite_backend", &RuntimeOption::UseLiteBackend) + .def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory) + .def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory) + .def("use_ipu", &RuntimeOption::UseIpu) + .def("enable_profiling", &RuntimeOption::EnableProfiling) + .def("disable_profiling", &RuntimeOption::DisableProfiling) + .def_readwrite("model_file", &RuntimeOption::model_file) + .def_readwrite("params_file", &RuntimeOption::params_file) + .def_readwrite("model_format", &RuntimeOption::model_format) + .def_readwrite("backend", &RuntimeOption::backend) + .def_readwrite("external_stream", &RuntimeOption::external_stream_) + .def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_) + .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) + .def_readwrite("device_id", &RuntimeOption::device_id) + .def_readwrite("device", &RuntimeOption::device); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime.cc b/libs/ultrainfer/ultrainfer/runtime/runtime.cc new file mode 100755 index 0000000000..f6c7cf768e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/runtime.cc @@ -0,0 +1,431 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/runtime.h" + +#include +#include +#include +#include + +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/utils/utils.h" +#include "yaml-cpp/yaml.h" + +#ifdef ENABLE_ORT_BACKEND +#include "ultrainfer/runtime/backends/ort/ort_backend.h" +#endif + +#ifdef ENABLE_TRT_BACKEND +#include "ultrainfer/runtime/backends/tensorrt/trt_backend.h" +#endif + +#ifdef ENABLE_PADDLE_BACKEND +#include "ultrainfer/runtime/backends/paddle/paddle_backend.h" +#endif + +#ifdef ENABLE_POROS_BACKEND +#include "ultrainfer/runtime/backends/poros/poros_backend.h" +#endif + +#ifdef ENABLE_OPENVINO_BACKEND +#include "ultrainfer/runtime/backends/openvino/ov_backend.h" +#endif + +#ifdef ENABLE_LITE_BACKEND +#include "ultrainfer/runtime/backends/lite/lite_backend.h" +#endif + +#ifdef ENABLE_RKNPU2_BACKEND +#include "ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h" +#endif + +#ifdef ENABLE_SOPHGO_BACKEND +#include "ultrainfer/runtime/backends/sophgo/sophgo_backend.h" +#endif + +#ifdef ENABLE_HORIZON_BACKEND +#include "ultrainfer/runtime/backends/horizon/horizon_backend.h" +#endif + +#ifdef ENABLE_TVM_BACKEND +#include "ultrainfer/runtime/backends/tvm/tvm_backend.h" +#endif + +namespace ultrainfer { + +bool AutoSelectBackend(RuntimeOption &option) { + auto iter0 = s_default_backends_by_format.find(option.model_format); + if (iter0 == s_default_backends_by_format.end()) { + FDERROR << "Cannot found a default backend for model format: " + << option.model_format + << ", please define the inference backend in RuntimeOption." + << std::endl; + return false; + } + + auto iter1 = s_default_backends_by_device.find(option.device); + if (iter1 == s_default_backends_by_device.end()) { + FDERROR << "Cannot found a default backend for device: " << option.device + << ", please define the inference backend in RuntimeOption." + << std::endl; + return false; + } + + std::vector candidates; + for (const auto &b0 : iter0->second) { + for (const auto &b1 : iter1->second) { + if (b0 == b1) { + candidates.push_back(b0); + } + } + } + + if (candidates.size() == 0) { + FDERROR << "Cannot found availabel inference backends by model format: " + << option.model_format << " with device: " << option.device + << std::endl; + return false; + } + + for (const auto &b : candidates) { + if (IsBackendAvailable(b)) { + option.backend = b; + FDINFO << "UltraInfer will choose " << b << " to inference this model." + << std::endl; + return true; + } + } + std::string debug_message = Str(candidates); + FDERROR << "The candiate backends for " << option.model_format << " & " + << option.device << " are " << debug_message + << ", but both of them have not been compiled with current " + "UltraInfer yet." + << std::endl; + return false; +} + +bool Runtime::Init(const RuntimeOption &_option) { + option = _option; + + // Choose default backend by model format and device if backend is not + // specified + if (option.backend == Backend::UNKNOWN) { + if (!AutoSelectBackend(option)) { + return false; + } + } + + if (option.backend == Backend::ORT) { + CreateOrtBackend(); + } else if (option.backend == Backend::TRT) { + CreateTrtBackend(); + } else if (option.backend == Backend::PDINFER) { + CreatePaddleBackend(); + } else if (option.backend == Backend::OPENVINO) { + CreateOpenVINOBackend(); + } else if (option.backend == Backend::LITE) { + CreateLiteBackend(); + } else if (option.backend == Backend::RKNPU2) { + CreateRKNPU2Backend(); + } else if (option.backend == Backend::SOPHGOTPU) { + CreateSophgoNPUBackend(); + } else if (option.backend == Backend::POROS) { + CreatePorosBackend(); + } else if (option.backend == Backend::HORIZONNPU) { + CreateHorizonBackend(); + } else if (option.backend == Backend::TVM) { + CreateTVMBackend(); + } else { + std::string msg = Str(GetAvailableBackends()); + FDERROR << "The compiled UltraInfer only supports " << msg << ", " + << option.backend << " is not supported now." << std::endl; + return false; + } + backend_->benchmark_option_ = option.benchmark_option; + return true; +} + +TensorInfo Runtime::GetInputInfo(int index) { + return backend_->GetInputInfo(index); +} + +TensorInfo Runtime::GetOutputInfo(int index) { + return backend_->GetOutputInfo(index); +} + +std::vector Runtime::GetInputInfos() { + return backend_->GetInputInfos(); +} + +std::vector Runtime::GetOutputInfos() { + return backend_->GetOutputInfos(); +} + +bool Runtime::Infer(std::vector &input_tensors, + std::vector *output_tensors) { + for (auto &tensor : input_tensors) { + FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id, + "Device id of input tensor(%d) and runtime(%d) are not same.", + tensor.device_id, option.device_id); + } + return backend_->Infer(input_tensors, output_tensors); +} + +bool Runtime::Infer() { + bool result = false; + if (option.device == Device::KUNLUNXIN) { + // FDTensor SetExternalData is not support for Device::KUNLUNXIN + // now, so, we need to set copy_to_fd as 'true'. + result = backend_->Infer(input_tensors_, &output_tensors_, true); + } else { + result = backend_->Infer(input_tensors_, &output_tensors_, false); + } + + for (auto &tensor : output_tensors_) { + tensor.device_id = option.device_id; + } + return result; +} + +void Runtime::BindInputTensor(const std::string &name, FDTensor &input) { + bool is_exist = false; + for (auto &t : input_tensors_) { + if (t.name == name) { + is_exist = true; + t.SetExternalData(input.shape, input.dtype, input.MutableData(), + input.device, input.device_id); + break; + } + } + if (!is_exist) { + FDTensor new_tensor(name); + new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(), + input.device, input.device_id); + input_tensors_.emplace_back(std::move(new_tensor)); + } +} + +void Runtime::BindOutputTensor(const std::string &name, FDTensor &output) { + bool is_exist = false; + for (auto &t : output_tensors_) { + if (t.name == name) { + is_exist = true; + t.SetExternalData(output.shape, output.dtype, output.MutableData(), + output.device, output.device_id); + break; + } + } + if (!is_exist) { + FDTensor new_tensor(name); + new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(), + output.device, output.device_id); + output_tensors_.emplace_back(std::move(new_tensor)); + } +} +FDTensor *Runtime::GetOutputTensor(const std::string &name) { + for (auto &t : output_tensors_) { + if (t.name == name) { + return &t; + } + } + FDWARNING << "The output name [" << name << "] don't exist." << std::endl; + return nullptr; +} + +void Runtime::ReleaseModelMemoryBuffer() { + if (option.model_from_memory_) { + option.model_file.clear(); + option.model_file.shrink_to_fit(); + option.params_file.clear(); + option.params_file.shrink_to_fit(); + } +} + +void Runtime::CreatePaddleBackend() { +#ifdef ENABLE_PADDLE_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), + "Failed to initialized Paddle Inference backend."); +#else + FDASSERT(false, "PaddleBackend is not available, please compiled with " + "ENABLE_PADDLE_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device + << "." << std::endl; + + const char *pirApiFlag = "FLAGS_enable_pir_api"; + const char *envValue = getenv(pirApiFlag); + if (envValue == nullptr || + (std::string(envValue) != "0" && std::string(envValue) != "False")) { + FDWARNING + << "To ensure the correct operation of the Paddle backend, please set" + "the environment variable 'FLAGS_enable_pir_api' to 'False'." + << std::endl; + } +} + +void Runtime::CreateOpenVINOBackend() { +#ifdef ENABLE_OPENVINO_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize OpenVINOBackend."); +#else + FDASSERT(false, "OpenVINOBackend is not available, please compiled with " + "ENABLE_OPENVINO_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device + << "." << std::endl; +} + +void Runtime::CreateTVMBackend() { +#ifdef ENABLE_TVM_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize TVM backend."); +#else + FDASSERT(false, "TVMBackend is not available, please compiled with " + "ENABLE_TVM_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::TVM in " << option.device << "." + << std::endl; +} + +void Runtime::CreateOrtBackend() { +#ifdef ENABLE_ORT_BACKEND + backend_ = utils::make_unique(); + + FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT."); +#else + FDASSERT(false, "OrtBackend is not available, please compiled with " + "ENABLE_ORT_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "." + << std::endl; +} + +void Runtime::CreateTrtBackend() { +#ifdef ENABLE_TRT_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend."); +#else + FDASSERT(false, "TrtBackend is not available, please compiled with " + "ENABLE_TRT_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "." + << std::endl; +} + +void Runtime::CreateLiteBackend() { +#ifdef ENABLE_LITE_BACKEND + backend_ = utils::make_unique(); + + FDASSERT(backend_->Init(option), + "Load model from nb file failed while initializing LiteBackend."); +#else + FDASSERT(false, "LiteBackend is not available, please compiled with " + "ENABLE_LITE_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device + << "." << std::endl; +} + +void Runtime::CreateRKNPU2Backend() { +#ifdef ENABLE_RKNPU2_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize RKNPU2 backend."); +#else + FDASSERT(false, "RKNPU2Backend is not available, please compiled with " + "ENABLE_RKNPU2_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device + << "." << std::endl; +} + +void Runtime::CreateHorizonBackend() { +#ifdef ENABLE_HORIZON_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize Horizon backend."); +#else + FDASSERT(false, "HorizonBackend is not available, please compiled with ", + " ENABLE_HORIZON_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::HORIZONNPU in " << option.device + << "." << std::endl; +} + +void Runtime::CreateSophgoNPUBackend() { +#ifdef ENABLE_SOPHGO_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize Sophgo backend."); +#else + FDASSERT(false, "SophgoBackend is not available, please compiled with " + "ENABLE_SOPHGO_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device + << "." << std::endl; +} + +Runtime *Runtime::Clone(void *stream, int device_id) { + Runtime *runtime = new Runtime(); + if (option.backend != Backend::OPENVINO && + option.backend != Backend::PDINFER) { + runtime->Init(option); + FDWARNING << "Only OpenVINO/Paddle Inference support \ + clone engine to reduce CPU/GPU memory usage now. For " + << option.backend + << ", UltraInfer will create a new engine which \ + will not share memory with the current runtime." + << std::endl; + return runtime; + } + FDINFO << "Runtime Clone with Backend:: " << option.backend << " in " + << option.device << "." << std::endl; + runtime->option = option; + runtime->backend_ = backend_->Clone(option, stream, device_id); + return runtime; +} + +void Runtime::CreatePorosBackend() { +#ifdef ENABLE_POROS_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize Poros backend."); +#else + FDASSERT(false, "PorosBackend is not available, please compiled with " + "ENABLE_POROS_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::POROS in " << option.device + << "." << std::endl; +} + +// only for poros backend +bool Runtime::Compile(std::vector> &prewarm_tensors) { +#ifdef ENABLE_POROS_BACKEND + option.poros_option.device = option.device; + option.poros_option.device_id = option.device_id; + option.poros_option.enable_fp16 = option.trt_option.enable_fp16; + option.poros_option.max_batch_size = option.trt_option.max_batch_size; + option.poros_option.max_workspace_size = option.trt_option.max_workspace_size; + + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT( + casted_backend->Compile(option.model_file, prewarm_tensors, + option.poros_option), + "Load model from Torchscript failed while initliazing PorosBackend."); +#else + FDASSERT(false, "PorosBackend is not available, please compiled with " + "ENABLE_POROS_BACKEND=ON."); +#endif + return true; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime.h b/libs/ultrainfer/ultrainfer/runtime/runtime.h new file mode 100755 index 0000000000..da53567e16 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/runtime.h @@ -0,0 +1,126 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file runtime.h + \brief A brief file description. + + More details + */ + +#pragma once +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/runtime/backends/backend.h" +#include "ultrainfer/runtime/runtime_option.h" +#include "ultrainfer/utils/perf.h" +/** \brief All C++ UltraInfer APIs are defined inside this namespace + * + */ +namespace ultrainfer { + +/*! @brief Runtime object used to inference the loaded model on different + * devices + */ +struct ULTRAINFER_DECL Runtime { +public: + /// Intialize a Runtime object with RuntimeOption + bool Init(const RuntimeOption &_option); + + /** \brief Inference the model by the input data, and write to the output + * + * \param[in] input_tensors Notice the FDTensor::name should keep same with + * the model's input \param[in] output_tensors Inference results \return true + * if the inference successed, otherwise false + */ + bool Infer(std::vector &input_tensors, + std::vector *output_tensors); + + /** \brief No params inference the model. + * + * the input and output data need to pass through the BindInputTensor and + * GetOutputTensor interfaces. + */ + bool Infer(); + + /** \brief Get number of inputs + */ + int NumInputs() { return backend_->NumInputs(); } + /** \brief Get number of outputs + */ + int NumOutputs() { return backend_->NumOutputs(); } + /** \brief Get input information by index + */ + TensorInfo GetInputInfo(int index); + /** \brief Get output information by index + */ + TensorInfo GetOutputInfo(int index); + /** \brief Get all the input information + */ + std::vector GetInputInfos(); + /** \brief Get all the output information + */ + std::vector GetOutputInfos(); + /** \brief Bind FDTensor by name, no copy and share input memory + */ + void BindInputTensor(const std::string &name, FDTensor &input); + + /** \brief Bind FDTensor by name, no copy and share output memory. + * Please make share the correctness of tensor shape of output. + */ + void BindOutputTensor(const std::string &name, FDTensor &output); + + /** \brief Get output FDTensor by name, no copy and share backend output + * memory + */ + FDTensor *GetOutputTensor(const std::string &name); + + /** \brief Clone new Runtime when multiple instances of the same model are + * created + * + * \param[in] stream CUDA Stream, defualt param is nullptr + * \return new Runtime* by this clone + */ + Runtime *Clone(void *stream = nullptr, int device_id = -1); + + void ReleaseModelMemoryBuffer(); + + RuntimeOption option; + + /** \brief Compile TorchScript Module, only for Poros backend + * + * \param[in] prewarm_tensors Prewarm datas for compile + * \return true if compile successed, otherwise false + */ + bool Compile(std::vector> &prewarm_tensors); + /** \brief Get profile time of Runtime after the profile process is done. + */ + double GetProfileTime() { + return backend_->benchmark_result_.time_of_runtime; + } + +private: + void CreateOrtBackend(); + void CreatePaddleBackend(); + void CreateTrtBackend(); + void CreateOpenVINOBackend(); + void CreateLiteBackend(); + void CreateRKNPU2Backend(); + void CreateHorizonBackend(); + void CreateSophgoNPUBackend(); + void CreatePorosBackend(); + void CreateTVMBackend(); + std::unique_ptr backend_; + std::vector input_tensors_; + std::vector output_tensors_; +}; +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime_option.cc b/libs/ultrainfer/ultrainfer/runtime/runtime_option.cc new file mode 100755 index 0000000000..114fcfe002 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/runtime_option.cc @@ -0,0 +1,524 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/runtime/runtime.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +void RuntimeOption::SetModelPath(const std::string &model_path, + const std::string ¶ms_path, + const ModelFormat &format) { + model_file = model_path; + params_file = params_path; + model_format = format; + model_from_memory_ = false; +} + +void RuntimeOption::SetModelBuffer(const std::string &model_buffer, + const std::string ¶ms_buffer, + const ModelFormat &format) { + model_file = model_buffer; + params_file = params_buffer; + model_format = format; + model_from_memory_ = true; +} + +void RuntimeOption::UseGpu(int gpu_id) { +#if defined(WITH_GPU) || defined(WITH_OPENCL) + device = Device::GPU; + device_id = gpu_id; + +#if defined(WITH_OPENCL) && defined(ENABLE_LITE_BACKEND) + paddle_lite_option.device = device; +#endif + +#else + FDWARNING << "The UltraInfer didn't compile with GPU, will force to use CPU." + << std::endl; + device = Device::CPU; +#endif +} + +void RuntimeOption::UseCpu() { device = Device::CPU; } + +void RuntimeOption::UseRKNPU2(ultrainfer::rknpu2::CpuName rknpu2_name, + ultrainfer::rknpu2::CoreMask rknpu2_core) { + rknpu2_option.cpu_name = rknpu2_name; + rknpu2_option.core_mask = rknpu2_core; + device = Device::RKNPU; +} + +void RuntimeOption::UseHorizon() { device = Device::SUNRISENPU; } + +void RuntimeOption::UseTimVX() { + device = Device::TIMVX; + paddle_lite_option.device = device; +} + +void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, + bool locked, bool autotune, + const std::string &autotune_file, + const std::string &precision, + bool adaptive_seqlen, bool enable_multi_stream, + int64_t gm_default_size) { +#ifdef WITH_KUNLUNXIN + device = Device::KUNLUNXIN; + +#ifdef ENABLE_LITE_BACKEND + paddle_lite_option.device = device; + paddle_lite_option.device_id = kunlunxin_id; + paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size; + paddle_lite_option.kunlunxin_locked = locked; + paddle_lite_option.kunlunxin_autotune = autotune; + paddle_lite_option.kunlunxin_autotune_file = autotune_file; + paddle_lite_option.kunlunxin_precision = precision; + paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen; + paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream; + paddle_lite_option.kunlunxin_gm_default_size = gm_default_size; +#endif +#ifdef ENABLE_PADDLE_BACKEND + paddle_infer_option.device = device; + paddle_infer_option.xpu_option.kunlunxin_device_id = kunlunxin_id; + paddle_infer_option.xpu_option.kunlunxin_l3_workspace_size = + l3_workspace_size; + paddle_infer_option.xpu_option.kunlunxin_locked = locked; + paddle_infer_option.xpu_option.kunlunxin_autotune = autotune; + paddle_infer_option.xpu_option.kunlunxin_autotune_file = autotune_file; + paddle_infer_option.xpu_option.kunlunxin_precision = precision; + paddle_infer_option.xpu_option.kunlunxin_adaptive_seqlen = adaptive_seqlen; + paddle_infer_option.xpu_option.kunlunxin_enable_multi_stream = + enable_multi_stream; +// paddle_infer_option.xpu_option.kunlunxin_gm_default_size = gm_default_size; +// use paddle_infer_option.xpu_option.SetXpuConfig() for more options. +#endif + +#else + FDWARNING + << "The UltraInfer didn't compile with KUNLUNXIN, will force to use CPU." + << std::endl; + device = Device::CPU; +#endif +} + +void RuntimeOption::UseIpu(int device_num, int micro_batch_size, + bool enable_pipelining, int batches_per_step) { +#ifdef WITH_IPU + device = Device::IPU; + paddle_infer_option.ipu_option.ipu_device_num = device_num; + paddle_infer_option.ipu_option.ipu_micro_batch_size = micro_batch_size; + paddle_infer_option.ipu_option.ipu_enable_pipelining = enable_pipelining; + paddle_infer_option.ipu_option.ipu_batches_per_step = batches_per_step; +// use paddle_infer_option.ipu_option.SetIpuConfig() for more options. +#else + FDWARNING << "The UltraInfer didn't compile with IPU, will force to use CPU." + << std::endl; + device = Device::CPU; +#endif +} + +void RuntimeOption::UseAscend() { + device = Device::ASCEND; + paddle_lite_option.device = device; +} + +void RuntimeOption::UseDirectML() { device = Device::DIRECTML; } + +void RuntimeOption::UseSophgo() { + device = Device::SOPHGOTPUD; + UseSophgoBackend(); +} + +void RuntimeOption::SetExternalStream(void *external_stream) { + external_stream_ = external_stream; +} + +void RuntimeOption::SetCpuThreadNum(int thread_num) { + FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); + cpu_thread_num = thread_num; + paddle_lite_option.cpu_threads = thread_num; + ort_option.intra_op_num_threads = thread_num; + openvino_option.cpu_thread_num = thread_num; + paddle_infer_option.cpu_thread_num = thread_num; +} + +void RuntimeOption::SetOrtGraphOptLevel(int level) { + FDWARNING << "`RuntimeOption::SetOrtGraphOptLevel` will be removed in " + "v1.2.0, please modify its member variables directly, e.g " + "`runtime_option.ort_option.graph_optimization_level = 99`." + << std::endl; + std::vector supported_level{-1, 0, 1, 2}; + auto valid_level = std::find(supported_level.begin(), supported_level.end(), + level) != supported_level.end(); + FDASSERT(valid_level, "The level must be -1, 0, 1, 2."); + ort_option.graph_optimization_level = level; +} + +// use paddle inference backend +void RuntimeOption::UsePaddleBackend() { +#ifdef ENABLE_PADDLE_BACKEND + backend = Backend::PDINFER; +#else + FDASSERT(false, "The UltraInfer didn't compile with Paddle Inference."); +#endif +} + +// use onnxruntime backend +void RuntimeOption::UseOrtBackend() { +#ifdef ENABLE_ORT_BACKEND + backend = Backend::ORT; +#else + FDASSERT(false, "The UltraInfer didn't compile with OrtBackend."); +#endif +} + +// use sophgoruntime backend +void RuntimeOption::UseSophgoBackend() { +#ifdef ENABLE_SOPHGO_BACKEND + backend = Backend::SOPHGOTPU; +#else + FDASSERT(false, "The UltraInfer didn't compile with SophgoBackend."); +#endif +} + +// use poros backend +void RuntimeOption::UsePorosBackend() { +#ifdef ENABLE_POROS_BACKEND + backend = Backend::POROS; +#else + FDASSERT(false, "The UltraInfer didn't compile with PorosBackend."); +#endif +} + +void RuntimeOption::UseTrtBackend() { +#ifdef ENABLE_TRT_BACKEND + backend = Backend::TRT; +#else + FDASSERT(false, "The UltraInfer didn't compile with TrtBackend."); +#endif +} + +void RuntimeOption::UseOpenVINOBackend() { +#ifdef ENABLE_OPENVINO_BACKEND + backend = Backend::OPENVINO; +#else + FDASSERT(false, "The UltraInfer didn't compile with OpenVINO."); +#endif +} + +void RuntimeOption::UseLiteBackend() { +#ifdef ENABLE_LITE_BACKEND + backend = Backend::LITE; +#else + FDASSERT(false, "The UltraInfer didn't compile with Paddle Lite."); +#endif +} + +void RuntimeOption::UseHorizonNPUBackend() { +#ifdef ENABLE_HORIZON_BACKEND + backend = Backend::HORIZONNPU; +#else + FDASSERT(false, "The UltraInfer didn't compile with horizon"); +#endif +} + +void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) { + FDWARNING << "`RuntimeOption::SetPaddleMKLDNN` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`option.paddle_infer_option.enable_mkldnn = true`" + << std::endl; + paddle_infer_option.enable_mkldnn = pd_mkldnn; +} + +void RuntimeOption::DeletePaddleBackendPass(const std::string &pass_name) { + FDWARNING + << "`RuntimeOption::DeletePaddleBackendPass` will be removed in v1.2.0, " + "please use `option.paddle_infer_option.DeletePass` instead." + << std::endl; + paddle_infer_option.DeletePass(pass_name); +} +void RuntimeOption::EnablePaddleLogInfo() { + FDWARNING << "`RuntimeOption::EnablePaddleLogInfo` will be removed in " + "v1.2.0, please modify its member variable directly, e.g " + "`option.paddle_infer_option.enable_log_info = true`" + << std::endl; + paddle_infer_option.enable_log_info = true; +} + +void RuntimeOption::DisablePaddleLogInfo() { + FDWARNING << "`RuntimeOption::DisablePaddleLogInfo` will be removed in " + "v1.2.0, please modify its member variable directly, e.g " + "`option.paddle_infer_option.enable_log_info = false`" + << std::endl; + paddle_infer_option.enable_log_info = false; +} + +void RuntimeOption::EnablePaddleToTrt() { +#ifdef ENABLE_PADDLE_BACKEND + FDWARNING << "`RuntimeOption::EnablePaddleToTrt` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`option.paddle_infer_option.enable_trt = true`" + << std::endl; + FDINFO << "While using TrtBackend with EnablePaddleToTrt, UltraInfer will " + "change to use Paddle Inference Backend." + << std::endl; + backend = Backend::PDINFER; + paddle_infer_option.enable_trt = true; +#else + FDASSERT(false, "While using TrtBackend with EnablePaddleToTrt, require the " + "UltraInfer is compiled with Paddle Inference Backend, " + "please rebuild your UltraInfer."); +#endif +} + +void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) { + FDWARNING << "`RuntimeOption::SetPaddleMKLDNNCacheSize` will be removed in " + "v1.2.0, please modify its member variable directly, e.g " + "`option.paddle_infer_option.mkldnn_cache_size = size`." + << std::endl; + paddle_infer_option.mkldnn_cache_size = size; +} + +void RuntimeOption::SetOpenVINODevice(const std::string &name) { + FDWARNING << "`RuntimeOption::SetOpenVINODevice` will be removed in v1.2.0, " + "please use `RuntimeOption.openvino_option.SetDeivce(const " + "std::string&)` instead." + << std::endl; + openvino_option.SetDevice(name); +} + +void RuntimeOption::EnableLiteFP16() { + FDWARNING << "`RuntimeOption::EnableLiteFP16` will be removed in v1.2.0, " + "please modify its member variables directly, e.g " + "`runtime_option.paddle_lite_option.enable_fp16 = true`" + << std::endl; + paddle_lite_option.enable_fp16 = true; +} + +void RuntimeOption::DisableLiteFP16() { + FDWARNING << "`RuntimeOption::EnableLiteFP16` will be removed in v1.2.0, " + "please modify its member variables directly, e.g " + "`runtime_option.paddle_lite_option.enable_fp16 = false`" + << std::endl; + paddle_lite_option.enable_fp16 = false; +} + +void RuntimeOption::EnableLiteInt8() { + FDWARNING << "RuntimeOption::EnableLiteInt8 is a useless api, this calling " + "will not bring any effects, and will be removed in v1.2.0. if " + "you load a quantized model, it will automatically run with " + "int8 mode; otherwise it will run with float mode." + << std::endl; +} + +void RuntimeOption::DisableLiteInt8() { + FDWARNING << "RuntimeOption::DisableLiteInt8 is a useless api, this calling " + "will not bring any effects, and will be removed in v1.2.0. if " + "you load a quantized model, it will automatically run with " + "int8 mode; otherwise it will run with float mode." + << std::endl; +} + +void RuntimeOption::SetLitePowerMode(LitePowerMode mode) { + FDWARNING << "`RuntimeOption::SetLitePowerMode` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.power_mode = 3;`" + << std::endl; + paddle_lite_option.power_mode = mode; +} + +void RuntimeOption::SetLiteOptimizedModelDir( + const std::string &optimized_model_dir) { + FDWARNING + << "`RuntimeOption::SetLiteOptimizedModelDir` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.optimized_model_dir = \"...\"`" + << std::endl; + paddle_lite_option.optimized_model_dir = optimized_model_dir; +} + +void RuntimeOption::SetLiteSubgraphPartitionPath( + const std::string &nnadapter_subgraph_partition_config_path) { + FDWARNING << "`RuntimeOption::SetLiteSubgraphPartitionPath` will be removed " + "in v1.2.0, please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.nnadapter_subgraph_" + "partition_config_path = \"...\";` " + << std::endl; + paddle_lite_option.nnadapter_subgraph_partition_config_path = + nnadapter_subgraph_partition_config_path; +} + +void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer( + const std::string &nnadapter_subgraph_partition_config_buffer) { + FDWARNING + << "`RuntimeOption::SetLiteSubgraphPartitionConfigBuffer` will be " + "removed in v1.2.0, please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.nnadapter_subgraph_partition_" + "config_buffer = ...`" + << std::endl; + paddle_lite_option.nnadapter_subgraph_partition_config_buffer = + nnadapter_subgraph_partition_config_buffer; +} + +void RuntimeOption::SetLiteContextProperties( + const std::string &nnadapter_context_properties) { + FDWARNING << "`RuntimeOption::SetLiteContextProperties` will be removed in " + "v1.2.0, please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.nnadapter_context_" + "properties = ...`" + << std::endl; + paddle_lite_option.nnadapter_context_properties = + nnadapter_context_properties; +} + +void RuntimeOption::SetLiteModelCacheDir( + const std::string &nnadapter_model_cache_dir) { + FDWARNING + << "`RuntimeOption::SetLiteModelCacheDir` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.nnadapter_model_cache_dir = ...`" + << std::endl; + paddle_lite_option.nnadapter_model_cache_dir = nnadapter_model_cache_dir; +} + +void RuntimeOption::SetLiteDynamicShapeInfo( + const std::map>> + &nnadapter_dynamic_shape_info) { + FDWARNING << "`RuntimeOption::SetLiteDynamicShapeInfo` will be removed in " + "v1.2.0, please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.paddle_lite_option." + "nnadapter_dynamic_shape_info = ...`" + << std::endl; + paddle_lite_option.nnadapter_dynamic_shape_info = + nnadapter_dynamic_shape_info; +} + +void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath( + const std::string &nnadapter_mixed_precision_quantization_config_path) { + FDWARNING + << "`RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath` will be " + "removed in v1.2.0, please modify its member variable directly, e.g " + "`runtime_option.paddle_lite_option.paddle_lite_option.nnadapter_" + "mixed_precision_quantization_config_path = ...`" + << std::endl; + paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = + nnadapter_mixed_precision_quantization_config_path; +} + +void RuntimeOption::SetTrtInputShape(const std::string &input_name, + const std::vector &min_shape, + const std::vector &opt_shape, + const std::vector &max_shape) { + FDWARNING << "`RuntimeOption::SetTrtInputShape` will be removed in v1.2.0, " + "please use `RuntimeOption.trt_option.SetShape()` instead." + << std::endl; + trt_option.SetShape(input_name, min_shape, opt_shape, max_shape); +} + +void RuntimeOption::SetTrtInputData(const std::string &input_name, + const std::vector &min_shape_data, + const std::vector &opt_shape_data, + const std::vector &max_shape_data) { + FDWARNING << "`RuntimeOption::SetTrtInputData` will be removed in v1.2.0, " + "please use `RuntimeOption.trt_option.SetInputData()` instead." + << std::endl; + trt_option.SetInputData(input_name, min_shape_data, opt_shape_data, + max_shape_data); +} + +void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) { + FDWARNING << "`RuntimeOption::SetTrtMaxWorkspaceSize` will be removed in " + "v1.2.0, please modify its member variable directly, e.g " + "`RuntimeOption.trt_option.max_workspace_size = " + << max_workspace_size << "`." << std::endl; + trt_option.max_workspace_size = max_workspace_size; +} +void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) { + FDWARNING << "`RuntimeOption::SetTrtMaxBatchSize` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`RuntimeOption.trt_option.max_batch_size = " + << max_batch_size << "`." << std::endl; + trt_option.max_batch_size = max_batch_size; +} + +void RuntimeOption::EnableTrtFP16() { + FDWARNING << "`RuntimeOption::EnableTrtFP16` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`runtime_option.trt_option.enable_fp16 = true;`" + << std::endl; + trt_option.enable_fp16 = true; +} + +void RuntimeOption::DisableTrtFP16() { + FDWARNING << "`RuntimeOption::DisableTrtFP16` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`runtime_option.trt_option.enable_fp16 = false;`" + << std::endl; + trt_option.enable_fp16 = false; +} + +void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; } + +void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; } + +void RuntimeOption::SetTrtCacheFile(const std::string &cache_file_path) { + FDWARNING << "`RuntimeOption::SetTrtCacheFile` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`runtime_option.trt_option.serialize_file = \"" + << cache_file_path << "\"." << std::endl; + trt_option.serialize_file = cache_file_path; +} + +void RuntimeOption::SetOpenVINOStreams(int num_streams) { + FDWARNING << "`RuntimeOption::SetOpenVINOStreams` will be removed in v1.2.0, " + "please modify its member variable directly, e.g " + "`runtime_option.openvino_option.num_streams = " + << num_streams << "`." << std::endl; + openvino_option.num_streams = num_streams; +} + +void RuntimeOption::EnablePaddleTrtCollectShape() { + FDWARNING << "`RuntimeOption::EnablePaddleTrtCollectShape` will be removed " + "in v1.2.0, please modify its member variable directly, e.g " + "runtime_option.paddle_infer_option.collect_trt_shape = true`." + << std::endl; + paddle_infer_option.collect_trt_shape = true; +} + +void RuntimeOption::DisablePaddleTrtCollectShape() { + FDWARNING << "`RuntimeOption::DisablePaddleTrtCollectShape` will be removed " + "in v1.2.0, please modify its member variable directly, e.g " + "runtime_option.paddle_infer_option.collect_trt_shape = false`." + << std::endl; + paddle_infer_option.collect_trt_shape = false; +} + +void RuntimeOption::DisablePaddleTrtOPs(const std::vector &ops) { + FDWARNING << "`RuntimeOption::DisablePaddleTrtOps` will be removed in " + "v.1.20, please use " + "`runtime_option.paddle_infer_option.DisableTrtOps` instead." + << std::endl; + paddle_infer_option.DisableTrtOps(ops); +} + +void RuntimeOption::UseTVMBackend() { +#ifdef ENABLE_TVM_BACKEND + backend = Backend::TVM; +#else + FDASSERT(false, "The UltraInfer didn't compile with TVMBackend."); +#endif +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime_option.h b/libs/ultrainfer/ultrainfer/runtime/runtime_option.h new file mode 100755 index 0000000000..8b372a28d2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/runtime/runtime_option.h @@ -0,0 +1,282 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file runtime_option.h + \brief A brief file description. + More details + */ + +#pragma once + +#include "ultrainfer/benchmark/option.h" +#include "ultrainfer/runtime/backends/lite/option.h" +#include "ultrainfer/runtime/backends/openvino/option.h" +#include "ultrainfer/runtime/backends/ort/option.h" +#include "ultrainfer/runtime/backends/paddle/option.h" +#include "ultrainfer/runtime/backends/poros/option.h" +#include "ultrainfer/runtime/backends/rknpu2/option.h" +#include "ultrainfer/runtime/backends/sophgo/option.h" +#include "ultrainfer/runtime/backends/tensorrt/option.h" +#include "ultrainfer/runtime/backends/tvm/option.h" +#include "ultrainfer/runtime/enum_variables.h" +#include +#include +#include + +namespace ultrainfer { + +/*! @brief Option object used when create a new Runtime object + */ +struct ULTRAINFER_DECL RuntimeOption { + /** \brief Set path of model file and parameter file + * + * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for + * Paddle format model / ResNet50/model.onnx for ONNX format model \param[in] + * params_path Path of parameter file, this only used when the model format is + * Paddle, e.g Resnet50/model.pdiparams \param[in] format Format of the loaded + * model + */ + void SetModelPath(const std::string &model_path, + const std::string ¶ms_path = "", + const ModelFormat &format = ModelFormat::PADDLE); + + /** \brief Specify the memory buffer of model and parameter. Used when model + * and params are loaded directly from memory + * + * \param[in] model_buffer The string of model memory buffer + * \param[in] params_buffer The string of parameters memory buffer + * \param[in] format Format of the loaded model + */ + void SetModelBuffer(const std::string &model_buffer, + const std::string ¶ms_buffer = "", + const ModelFormat &format = ModelFormat::PADDLE); + + /// Use cpu to inference, the runtime will inference on CPU by default + void UseCpu(); + /// Use Nvidia GPU to inference + void UseGpu(int gpu_id = 0); + /// Use RKNPU2 e.g RK3588/RK356X to inference + void UseRKNPU2(ultrainfer::rknpu2::CpuName rknpu2_name = + ultrainfer::rknpu2::CpuName::RK356X, + ultrainfer::rknpu2::CoreMask rknpu2_core = + ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO); + // Use Horizon NPU to inference + void UseHorizon(); + /// Use TimVX e.g RV1126/A311D to inference + void UseTimVX(); + /// Use Huawei Ascend to inference + void UseAscend(); + + /// Use onnxruntime DirectML to inference + void UseDirectML(); + + /// Use Sophgo to inference + void UseSophgo(); + /// \brief Turn on KunlunXin XPU. + /// + /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0). + /// \param l3_workspace_size The size of the video memory allocated by the l3 + /// cache, the maximum is 16M. + /// \param locked Whether the allocated L3 cache can be locked. If false, + /// it means that the L3 cache is not locked, and the allocated L3 + /// cache can be shared by multiple models, and multiple models + /// sharing the L3 cache will be executed sequentially on the card. + /// \param autotune Whether to autotune the conv operator in the model. If + /// true, when the conv operator of a certain dimension is executed + /// for the first time, it will automatically search for a better + /// algorithm to improve the performance of subsequent conv operators + /// of the same dimension. + /// \param autotune_file Specify the path of the autotune file. If + /// autotune_file is specified, the algorithm specified in the + /// file will be used and autotune will not be performed again. + /// \param precision Calculation accuracy of multi_encoder + /// \param adaptive_seqlen Is the input of multi_encoder variable length + /// \param enable_multi_stream Whether to enable the multi stream of + /// KunlunXin XPU. + /// \param gm_default_size The default size of global memory of KunlunXin XPU. + /// + void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00, + bool locked = false, bool autotune = true, + const std::string &autotune_file = "", + const std::string &precision = "int16", + bool adaptive_seqlen = false, + bool enable_multi_stream = false, + int64_t gm_default_size = 0); + + void SetExternalStream(void *external_stream); + + /* + * @brief Set number of cpu threads while inference on CPU, by default it will + * decided by the different backends + */ + void SetCpuThreadNum(int thread_num); + /// Set Paddle Inference as inference backend, support CPU/GPU + void UsePaddleInferBackend() { return UsePaddleBackend(); } + /// Set ONNX Runtime as inference backend, support CPU/GPU + void UseOrtBackend(); + /// Set SOPHGO Runtime as inference backend, support SOPHGO + void UseSophgoBackend(); + /// Set TensorRT as inference backend, only support GPU + void UseTrtBackend(); + /// Set Poros backend as inference backend, support CPU/GPU + void UsePorosBackend(); + /// Set OpenVINO as inference backend, only support CPU + void UseOpenVINOBackend(); + /// Set Paddle Lite as inference backend, only support arm cpu + void UsePaddleLiteBackend() { return UseLiteBackend(); } + /** \Use Graphcore IPU to inference. + * + * \param[in] device_num the number of IPUs. + * \param[in] micro_batch_size the batch size in the graph, only work when + * graph has no batch shape info. \param[in] enable_pipelining enable + * pipelining. \param[in] batches_per_step the number of batches per run in + * pipelining. + */ + void UseIpu(int device_num = 1, int micro_batch_size = 1, + bool enable_pipelining = false, int batches_per_step = 1); + + /// Option to configure ONNX Runtime backend + OrtBackendOption ort_option; + /// Option to configure TensorRT backend + TrtBackendOption trt_option; + /// Option to configure Paddle Inference backend + PaddleBackendOption paddle_infer_option; + /// Option to configure Poros backend + PorosBackendOption poros_option; + /// Option to configure OpenVINO backend + OpenVINOBackendOption openvino_option; + /// Option to configure Paddle Lite backend + LiteBackendOption paddle_lite_option; + /// Option to configure RKNPU2 backend + RKNPU2BackendOption rknpu2_option; + /// Option to configure TVM backend + TVMBackendOption tvm_option; + + // \brief Set the profile mode as 'true'. + // + // \param[in] inclue_h2d_d2h Whether to + // include time of H2D_D2H for time of runtime. + // \param[in] repeat Repeat times for runtime inference. + // \param[in] warmup Warmup times for runtime inference. + // + void EnableProfiling(bool inclue_h2d_d2h = false, int repeat = 100, + int warmup = 50) { + benchmark_option.enable_profile = true; + benchmark_option.warmup = warmup; + benchmark_option.repeats = repeat; + benchmark_option.include_h2d_d2h = inclue_h2d_d2h; + } + + // \brief Set the profile mode as 'false'. + // + void DisableProfiling() { benchmark_option.enable_profile = false; } + + // \brief Enable to check if current backend set by + // user can be found at valid_xxx_backend. + // + void EnableValidBackendCheck() { enable_valid_backend_check = true; } + // \brief Disable to check if current backend set by + // user can be found at valid_xxx_backend. + // + void DisableValidBackendCheck() { enable_valid_backend_check = false; } + + // Benchmark option + benchmark::BenchmarkOption benchmark_option; + // enable the check for valid backend, default true. + bool enable_valid_backend_check = true; + + // If model_from_memory is true, the model_file and params_file is + // binary stream in memory; + // Otherwise, the model_file and params_file means the path of file + std::string model_file = ""; + std::string params_file = ""; + bool model_from_memory_ = false; + // format of input model + ModelFormat model_format = ModelFormat::PADDLE; + + // for cpu inference + // default will let the backend choose their own default value + int cpu_thread_num = -1; + int device_id = 0; + Backend backend = Backend::UNKNOWN; + + Device device = Device::CPU; + + void *external_stream_ = nullptr; + + bool enable_pinned_memory = false; + + // *** The belowing api are deprecated, will be removed in v1.2.0 + // *** Do not use it anymore + void SetPaddleMKLDNN(bool pd_mkldnn = true); + void EnablePaddleToTrt(); + void DeletePaddleBackendPass(const std::string &delete_pass_name); + void EnablePaddleLogInfo(); + void DisablePaddleLogInfo(); + void SetPaddleMKLDNNCacheSize(int size); + void SetOpenVINODevice(const std::string &name = "CPU"); + void SetOpenVINOShapeInfo( + const std::map> &shape_info) { + openvino_option.shape_infos = shape_info; + } + void SetOpenVINOCpuOperators(const std::vector &operators) { + openvino_option.SetCpuOperators(operators); + } + void SetLiteOptimizedModelDir(const std::string &optimized_model_dir); + void SetLiteSubgraphPartitionPath( + const std::string &nnadapter_subgraph_partition_config_path); + void SetLiteSubgraphPartitionConfigBuffer( + const std::string &nnadapter_subgraph_partition_config_buffer); + void + SetLiteContextProperties(const std::string &nnadapter_context_properties); + void SetLiteModelCacheDir(const std::string &nnadapter_model_cache_dir); + void SetLiteDynamicShapeInfo( + const std::map>> + &nnadapter_dynamic_shape_info); + void SetLiteMixedPrecisionQuantizationConfigPath( + const std::string &nnadapter_mixed_precision_quantization_config_path); + void EnableLiteFP16(); + void DisableLiteFP16(); + void EnableLiteInt8(); + void DisableLiteInt8(); + void SetLitePowerMode(LitePowerMode mode); + void SetTrtInputShape( + const std::string &input_name, const std::vector &min_shape, + const std::vector &opt_shape = std::vector(), + const std::vector &max_shape = std::vector()); + + void SetTrtInputData( + const std::string &input_name, const std::vector &min_shape_data, + const std::vector &opt_shape_data = std::vector(), + const std::vector &max_shape_data = std::vector()); + + void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size); + void SetTrtMaxBatchSize(size_t max_batch_size); + void EnableTrtFP16(); + void DisableTrtFP16(); + void SetTrtCacheFile(const std::string &cache_file_path); + void EnablePinnedMemory(); + void DisablePinnedMemory(); + void EnablePaddleTrtCollectShape(); + void DisablePaddleTrtCollectShape(); + void DisablePaddleTrtOPs(const std::vector &ops); + void SetOpenVINOStreams(int num_streams); + void SetOrtGraphOptLevel(int level = -1); + void UsePaddleBackend(); + void UseLiteBackend(); + void UseHorizonNPUBackend(); + void UseTVMBackend(); +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text.h b/libs/ultrainfer/ultrainfer/text.h new file mode 100755 index 0000000000..d13d05dd63 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text.h @@ -0,0 +1,19 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "ultrainfer/core/config.h" +#ifdef ENABLE_TEXT +#include "ultrainfer/text/uie/model.h" +#endif diff --git a/libs/ultrainfer/ultrainfer/text/common/option.h b/libs/ultrainfer/ultrainfer/text/common/option.h new file mode 100755 index 0000000000..e00ff7178b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/common/option.h @@ -0,0 +1,26 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace text { + +struct ULTRAINFER_DECL TextPreprocessOption {}; +struct ULTRAINFER_DECL TextPostprocessOption {}; +struct ULTRAINFER_DECL PredictionOption {}; + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/common/result.cc b/libs/ultrainfer/ultrainfer/text/common/result.cc new file mode 100755 index 0000000000..2d79b4dd16 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/common/result.cc @@ -0,0 +1,18 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/text/common/result.h" + +namespace ultrainfer { +namespace text {} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/common/result.h b/libs/ultrainfer/ultrainfer/text/common/result.h new file mode 100755 index 0000000000..7002083d15 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/common/result.h @@ -0,0 +1,23 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace text { + +struct ULTRAINFER_DECL Result {}; + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc new file mode 100755 index 0000000000..d8c2349a2c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/text/postprocessor/postprocessor.h" + +namespace ultrainfer { +namespace text { + +bool Postprocessor::Decode(const std::vector &model_result, + Result *decoded_result) const { + return true; +} + +bool Postprocessor::DecodeBatch(const std::vector &model_result, + Result *decoded_result) const { + return true; +} + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h new file mode 100755 index 0000000000..fb969caa62 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/text/common/result.h" +#include "ultrainfer/utils/utils.h" +#include + +namespace ultrainfer { +namespace text { + +class Postprocessor { +public: + virtual bool Decode(const std::vector &model_result, + Result *decoded_result) const; + virtual bool DecodeBatch(const std::vector &model_result, + Result *decoded_result) const; +}; + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc new file mode 100755 index 0000000000..01848a4277 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc @@ -0,0 +1,32 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/text/preprocessor/preprocessor.h" + +namespace ultrainfer { +namespace text { + +bool Preprocessor::Encode(const std::string &raw_text, + std::vector *encoded_tensor) const { + return true; +} + +bool Preprocessor::EncodeBatch(const std::vector &raw_texts, + std::vector *encoded_tensor) const { + return true; +} + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h new file mode 100755 index 0000000000..28ccd53272 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include +#include + +namespace ultrainfer { +namespace text { + +class Preprocessor { +public: + virtual bool Encode(const std::string &raw_text, + std::vector *encoded_tensor) const; + virtual bool EncodeBatch(const std::vector &raw_texts, + std::vector *encoded_tensor) const; +}; + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/text_model.cc b/libs/ultrainfer/ultrainfer/text/text_model.cc new file mode 100755 index 0000000000..efa6c7517c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/text_model.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/text/text_model.h" +#include "ultrainfer/text/common/option.h" +#include "ultrainfer/text/common/result.h" +#include "ultrainfer/text/postprocessor/postprocessor.h" +#include "ultrainfer/text/preprocessor/preprocessor.h" + +namespace ultrainfer { +namespace text { + +bool TextModel::Predict(const std::string &raw_text, Result *result, + const PredictionOption &option) { + // Preprocess + std::vector input_tensor; + std::vector output_tensor; + if (!preprocessor_->Encode(raw_text, &input_tensor)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + + // Inference Runtime + if (!Infer(input_tensor, &output_tensor)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + + // Postprocess + if (postprocessor_->Decode(output_tensor, result)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +bool TextModel::PredictBatch(const std::vector &raw_text_array, + Result *results, const PredictionOption &option) { + // Preprocess + std::vector input_tensor; + std::vector output_tensor; + if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + + // Inference Runtime + if (!Infer(input_tensor, &output_tensor)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + + // Postprocess + if (postprocessor_->DecodeBatch(output_tensor, results)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/text_model.h b/libs/ultrainfer/ultrainfer/text/text_model.h new file mode 100755 index 0000000000..63f8db5a0f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/text_model.h @@ -0,0 +1,50 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" + +namespace ultrainfer { +namespace text { + +class Preprocessor; +class Postprocessor; +class Result; +class PredictionOption; + +class ULTRAINFER_DECL TextModel : public UltraInferModel { +public: + virtual std::string ModelName() const { return "TextModel"; } + virtual bool Predict(const std::string &raw_text, Result *result, + const PredictionOption &option); + virtual bool PredictBatch(const std::vector &raw_text_array, + Result *result, const PredictionOption &option); + template void SetPreprocessor(Args &&...args) { + preprocessor_ = utils::make_unique(std::forward(args)...); + } + template + void SetPostprocessor(Args &&...args) { + postprocessor_ = utils::make_unique(std::forward(args)...); + } + +private: + std::unique_ptr preprocessor_; + std::unique_ptr postprocessor_; +}; + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/text_pybind.cc b/libs/ultrainfer/ultrainfer/text/text_pybind.cc new file mode 100755 index 0000000000..8dbe39fd57 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/text_pybind.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace py = pybind11; +using namespace py::literals; + +namespace ultrainfer { + +void BindUIE(py::module &m); + +py::dict ConvertUIEResultToDict(const text::UIEResult &self) { + py::dict d; + d["start"] = self.start_; + d["end"] = self.end_; + d["probability"] = self.probability_; + d["text"] = self.text_; + + if (!self.relation_.empty()) { + d["relation"] = py::dict(); + for (auto iter = self.relation_.begin(); iter != self.relation_.end(); + ++iter) { + py::list l; + for (auto result_iter = iter->second.begin(); + result_iter != iter->second.end(); ++result_iter) { + l.append(ConvertUIEResultToDict(*result_iter)); + } + d["relation"][iter->first.c_str()] = l; + } + } + return d; +} + +void BindText(py::module &m) { + py::class_(m, "UIEResult", py::dynamic_attr()) + .def(py::init()) + .def_readwrite("start", &text::UIEResult::start_) + .def_readwrite("end", &text::UIEResult::end_) + .def_readwrite("probability", &text::UIEResult::probability_) + .def_readwrite("text", &text::UIEResult::text_) + .def_readwrite("relation", &text::UIEResult::relation_) + .def("get_dict", + [](const text::UIEResult &self) { + return ConvertUIEResultToDict(self); + }) + .def("__repr__", &text::UIEResult::Str) + .def("__str__", &text::UIEResult::Str); + BindUIE(m); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/uie/model.cc b/libs/ultrainfer/ultrainfer/text/uie/model.cc new file mode 100755 index 0000000000..82e46fefb1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/uie/model.cc @@ -0,0 +1,797 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/text/uie/model.h" + +#include +#include +#include +#include +#include + +#include "fast_tokenizer/pretokenizers/pretokenizer.h" +#include "fast_tokenizer/utils/utf8.h" +#include "ultrainfer/function/concat.h" +#include "ultrainfer/function/split.h" + +namespace ultrainfer { +namespace text { + +static std::string DBC2SBC(const std::string &content) { + std::string result; + size_t content_utf8_len = 0; + while (content_utf8_len < content.length()) { + uint32_t content_char; + auto content_char_width = fast_tokenizer::utils::UTF8ToUInt32( + content.data() + content_utf8_len, &content_char); + content_char = fast_tokenizer::utils::UTF8ToUnicode(content_char); + if (content_char == 0x3000) { + content_char = 0x0020; + } else { + content_char -= 0xfee0; + } + if (!(content_char >= 0x0021 && content_char <= 0x7e)) { + result.append(content.data() + content_utf8_len, content_char_width); + } else { + char dst_char[5] = {0}; + uint32_t utf8_uint32 = fast_tokenizer::utils::UnicodeToUTF8(content_char); + uint32_t utf8_char_count = + fast_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char); + result.append(dst_char, utf8_char_count); + } + content_utf8_len += content_char_width; + } + return result; +} + +static std::ostream &PrintResult(std::ostream &os, const UIEResult &result, + int tab_size) { + constexpr int TAB_OFFSET = 4; + // Print text + for (int i = 0; i < tab_size; ++i) { + os << " "; + } + os << "text: " << result.text_ << "\n"; + + // Print probability + for (int i = 0; i < tab_size; ++i) { + os << " "; + } + os << "probability: " << result.probability_ << "\n"; + + if (result.start_ != 0 || result.end_ != 0) { + // Print start + for (int i = 0; i < tab_size; ++i) { + os << " "; + } + os << "start: " << result.start_ << "\n"; + + // Print end + for (int i = 0; i < tab_size; ++i) { + os << " "; + } + os << "end: " << result.end_ << "\n"; + } + + // Print relation + if (result.relation_.size() > 0) { + for (int i = 0; i < tab_size; ++i) { + os << " "; + } + os << "relation:\n"; + for (auto &&curr_relation : result.relation_) { + for (int i = 0; i < tab_size + TAB_OFFSET; ++i) { + os << " "; + } + os << curr_relation.first << ":\n"; + for (int i = 0; i < curr_relation.second.size(); ++i) { + PrintResult(os, curr_relation.second[i], + tab_size + TAB_OFFSET + TAB_OFFSET); + } + } + } + os << "\n"; + return os; +} + +std::ostream &operator<<(std::ostream &os, const UIEResult &result) { + return PrintResult(os, result, 0); +} + +std::ostream &operator<<( + std::ostream &os, + const std::vector>> + &results) { + os << "The result:\n"; + for (int i = 0; i < results.size(); ++i) { + for (auto &&curr_result : results[i]) { + os << curr_result.first << ": \n"; + for (auto &&uie_result : curr_result.second) { + PrintResult(os, uie_result, 4); + } + } + os << std::endl; + } + return os; +} + +std::string UIEResult::Str() const { + std::ostringstream oss; + oss << *this; + return oss.str(); +} + +void Schema::CreateRoot(const std::string &name) { + root_ = ultrainfer::utils::make_unique(name); +} + +Schema::Schema(const std::string &schema, const std::string &name) { + CreateRoot(name); + root_->AddChild(schema); +} + +Schema::Schema(const std::vector &schema_list, + const std::string &name) { + CreateRoot(name); + for (const auto &schema : schema_list) { + root_->AddChild(schema); + } +} + +Schema::Schema(const std::vector &schema_list, + const std::string &name) { + CreateRoot(name); + for (const auto &schema : schema_list) { + root_->AddChild(schema); + } +} + +Schema::Schema(const SchemaNode &schema, const std::string &name) { + CreateRoot(name); + root_->AddChild(schema); +} + +UIEModel::UIEModel(const std::string &model_file, + const std::string ¶ms_file, + const std::string &vocab_file, float position_prob, + size_t max_length, const std::vector &schema, + int batch_size, + const ultrainfer::RuntimeOption &custom_option, + const ultrainfer::ModelFormat &model_format, + SchemaLanguage schema_language) + : max_length_(max_length), position_prob_(position_prob), + schema_language_(schema_language), batch_size_(batch_size), + tokenizer_(vocab_file) { + runtime_option = custom_option; + runtime_option.SetModelPath(model_file, params_file, model_format); + initialized = Initialize(); + SetSchema(schema); + tokenizer_.EnableTruncMethod( + max_length, 0, fast_tokenizer::core::Direction::RIGHT, + fast_tokenizer::core::TruncStrategy::LONGEST_FIRST); +} + +UIEModel::UIEModel(const std::string &model_file, + const std::string ¶ms_file, + const std::string &vocab_file, float position_prob, + size_t max_length, const std::vector &schema, + int batch_size, + const ultrainfer::RuntimeOption &custom_option, + const ultrainfer::ModelFormat &model_format, + SchemaLanguage schema_language) + : max_length_(max_length), position_prob_(position_prob), + schema_language_(schema_language), batch_size_(batch_size), + tokenizer_(vocab_file) { + runtime_option = custom_option; + runtime_option.SetModelPath(model_file, params_file, model_format); + initialized = Initialize(); + SetSchema(schema); + tokenizer_.EnableTruncMethod( + max_length, 0, fast_tokenizer::core::Direction::RIGHT, + fast_tokenizer::core::TruncStrategy::LONGEST_FIRST); +} + +UIEModel::UIEModel(const std::string &model_file, + const std::string ¶ms_file, + const std::string &vocab_file, float position_prob, + size_t max_length, const SchemaNode &schema, int batch_size, + const ultrainfer::RuntimeOption &custom_option, + const ultrainfer::ModelFormat &model_format, + SchemaLanguage schema_language) + : max_length_(max_length), position_prob_(position_prob), + schema_language_(schema_language), batch_size_(batch_size), + tokenizer_(vocab_file) { + runtime_option = custom_option; + runtime_option.SetModelPath(model_file, params_file, model_format); + initialized = Initialize(); + SetSchema(schema); + tokenizer_.EnableTruncMethod( + max_length, 0, fast_tokenizer::core::Direction::RIGHT, + fast_tokenizer::core::TruncStrategy::LONGEST_FIRST); +} + +bool UIEModel::Initialize() { + SetValidBackend(); + return InitRuntime(); +} + +void UIEModel::SetValidBackend() { + // TODO(zhoushunjie): Add lite backend in future + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; +} + +void UIEModel::SetSchema(const std::vector &schema) { + schema_ = ultrainfer::utils::make_unique(schema); +} + +void UIEModel::SetSchema(const std::vector &schema) { + schema_ = ultrainfer::utils::make_unique(schema); +} + +void UIEModel::SetSchema(const SchemaNode &schema) { + schema_ = ultrainfer::utils::make_unique(schema); +} + +void UIEModel::AutoSplitter(const std::vector &texts, + size_t max_length, + std::vector *short_texts, + std::vector> *input_mapping) { + size_t cnt_org = 0; + size_t cnt_short = 0; + for (auto &text : texts) { + auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(text.c_str(), + text.length()); + if (text_len <= max_length) { + short_texts->push_back(text); + if (input_mapping->size() <= cnt_org) { + input_mapping->push_back({cnt_short}); + } else { + (*input_mapping)[cnt_org].push_back(cnt_short); + } + cnt_short += 1; + } else { + fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(text); + for (size_t start = 0; start < text_len; start += max_length) { + size_t end = start + max_length; + if (end > text_len) { + end = text_len; + } + fast_tokenizer::core::Offset byte_offset; + converter.convert({start, end}, &byte_offset); + short_texts->emplace_back(text.data() + byte_offset.first, + byte_offset.second - byte_offset.first); + } + auto short_idx = cnt_short; + cnt_short += text_len / max_length; + if (text_len % max_length != 0) { + ++cnt_short; + } + std::vector temp_text_id(cnt_short - short_idx); + std::iota(temp_text_id.begin(), temp_text_id.end(), short_idx); + if (input_mapping->size() <= cnt_org) { + input_mapping->push_back(std::move(temp_text_id)); + } else { + (*input_mapping)[cnt_org].insert((*input_mapping)[cnt_org].end(), + temp_text_id.begin(), + temp_text_id.end()); + } + } + cnt_org += 1; + } +} + +void UIEModel::GetCandidateIdx( + const float *probs, int64_t batch_size, int64_t seq_len, + std::vector>> *candidate_idx_prob, + float threshold) const { + for (int i = 0; i < batch_size; ++i) { + candidate_idx_prob->push_back({}); + for (int j = 0; j < seq_len; ++j) { + if (probs[i * seq_len + j] > threshold) { + candidate_idx_prob->back().push_back({j, probs[i * seq_len + j]}); + } + } + } +} + +bool UIEModel::IdxProbCmp::operator()( + const std::pair &lhs, + const std::pair &rhs) const { + if (lhs.first.first == rhs.first.first) { + return lhs.second.first < rhs.second.first; + } + return lhs.first.first < rhs.first.first; +} + +void UIEModel::GetSpan(const std::vector &start_idx_prob, + const std::vector &end_idx_prob, + SPAN_SET *span_set) const { + size_t start_pointer = 0; + size_t end_pointer = 0; + size_t len_start = start_idx_prob.size(); + size_t len_end = end_idx_prob.size(); + while (start_pointer < len_start && end_pointer < len_end) { + if (start_idx_prob[start_pointer].first == + end_idx_prob[end_pointer].first) { + span_set->insert(std::make_pair(start_idx_prob[start_pointer], + end_idx_prob[end_pointer])); + ++start_pointer; + ++end_pointer; + } else if (start_idx_prob[start_pointer].first < + end_idx_prob[end_pointer].first) { + span_set->insert(std::make_pair(start_idx_prob[start_pointer], + end_idx_prob[end_pointer])); + ++start_pointer; + } else { + ++end_pointer; + } + } +} +void UIEModel::GetSpanIdxAndProbs( + const SPAN_SET &span_set, + const std::vector &offset_mapping, + std::vector *span_idxs, std::vector *probs) const { + auto first_sep_idx = + std::find_if(offset_mapping.begin() + 1, offset_mapping.end(), + [](const fast_tokenizer::core::Offset &offset) { + return offset == fast_tokenizer::core::Offset(0, 0); + }); + auto prompt_end_token_id = + std::distance(offset_mapping.begin(), first_sep_idx) - 1; + for (auto &&span_item : span_set) { + probs->push_back(span_item.first.second * span_item.second.second); + auto start_id = offset_mapping[span_item.first.first].first; + auto end_id = offset_mapping[span_item.second.first].second; + bool is_prompt = span_item.second.first <= prompt_end_token_id && + span_item.second.first > 0; + span_idxs->push_back({{start_id, end_id}, is_prompt}); + } +} + +void UIEModel::ConvertSpanToUIEResult( + const std::vector &texts, + const std::vector &prompts, + const std::vector> &span_idxs, + const std::vector> &probs, + std::vector> *results) const { + auto batch_size = texts.size(); + for (int i = 0; i < batch_size; ++i) { + std::vector result_list; + if (span_idxs[i].size() == 0) { + results->push_back({}); + continue; + } + auto &&text = texts[i]; + auto &&prompt = prompts[i]; + for (int j = 0; j < span_idxs[i].size(); ++j) { + auto start = span_idxs[i][j].offset_.first; + auto end = span_idxs[i][j].offset_.second; + std::string span_text; + std::vector offset_mapping; + if (span_idxs[i][j].is_prompt_) { + fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter( + prompt); + fast_tokenizer::core::Offset byte_offset; + converter.convert({start, end}, &byte_offset); + span_text = prompt.substr(byte_offset.first, + byte_offset.second - byte_offset.first); + // Indicate cls task + start = 0; + end = 0; + } else { + fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter( + text); + fast_tokenizer::core::Offset byte_offset; + converter.convert({start, end}, &byte_offset); + span_text = text.substr(byte_offset.first, + byte_offset.second - byte_offset.first); + } + result_list.emplace_back(start, end, probs[i][j], span_text); + } + results->push_back(result_list); + } +} + +void UIEModel::AutoJoiner(const std::vector &short_texts, + const std::vector> &input_mapping, + std::vector> *results) { + bool is_cls_task = false; + // 1. Detect if it's a cls task + for (auto &&short_result : *results) { + if (short_result.size() == 0) { + continue; + } else if (short_result[0].start_ == 0 && short_result[0].end_ == 0) { + is_cls_task = true; + break; + } else { + break; + } + } + // 2. Get the final result + std::vector> final_result; + if (is_cls_task) { + for (auto &&input_mapping_item : input_mapping) { + std::unordered_map> cls_options; + for (auto &&result_idx : input_mapping_item) { + if ((*results)[result_idx].size() == 0) { + continue; + } + auto &&text = (*results)[result_idx].front().text_; + auto &&probability = (*results)[result_idx].front().probability_; + if (cls_options.count(text) == 0) { + cls_options[text] = std::make_pair(1, probability); + } else { + cls_options[text].first += 1; + cls_options[text].second += probability; + } + } + std::vector result_list; + if (cls_options.size() > 0) { + auto max_iter = std::max_element( + cls_options.begin(), cls_options.end(), + [](const std::pair> &lhs, + const std::pair> &rhs) { + return lhs.second.second < rhs.second.second; + }); + result_list.emplace_back( + 0, 0, max_iter->second.second / max_iter->second.first, + max_iter->first); + } + final_result.push_back(result_list); + } + } else { + for (auto &&input_mapping_item : input_mapping) { + size_t offset = 0; + std::vector result_list; + for (auto &&result_idx : input_mapping_item) { + if (result_idx == 0) { + result_list = std::move((*results)[result_idx]); + offset += fast_tokenizer::utils::GetUnicodeLenFromUTF8( + short_texts[result_idx].c_str(), short_texts[result_idx].size()); + } else { + for (auto &&curr_result : (*results)[result_idx]) { + curr_result.start_ += offset; + curr_result.end_ += offset; + } + offset += fast_tokenizer::utils::GetUnicodeLenFromUTF8( + short_texts[result_idx].c_str(), short_texts[result_idx].size()); + result_list.insert(result_list.end(), (*results)[result_idx].begin(), + (*results)[result_idx].end()); + } + } + final_result.push_back(result_list); + } + } + *results = std::move(final_result); +} + +bool UIEModel::ConstructTextsAndPrompts( + const std::vector &raw_texts, const std::string &node_name, + const std::vector> node_prefix, + std::vector *input_texts, std::vector *prompts, + std::vector> *input_mapping_with_raw_texts, + std::vector> *input_mapping) { + size_t idx = 0; + if (node_prefix.empty()) { + for (int i = 0; i < raw_texts.size(); ++i) { + input_texts->push_back(raw_texts[i]); + prompts->push_back(DBC2SBC(node_name)); + input_mapping_with_raw_texts->push_back({idx}); + idx += 1; + } + } else { + for (int i = 0; i < raw_texts.size(); ++i) { + if (node_prefix[i].size() == 0) { + input_mapping_with_raw_texts->push_back({}); + } else { + for (auto &&pre : node_prefix[i]) { + input_texts->push_back(raw_texts[i]); + prompts->push_back(DBC2SBC(pre + node_name)); + } + auto prefix_len = node_prefix[i].size(); + input_mapping_with_raw_texts->push_back({}); + input_mapping_with_raw_texts->back().resize(prefix_len); + std::iota(input_mapping_with_raw_texts->back().begin(), + input_mapping_with_raw_texts->back().end(), idx); + idx += prefix_len; + } + } + } + + if (prompts->size() == 0) { + return false; + } + + // Shortten the input texts and prompts + auto max_prompt_iter = std::max_element( + prompts->begin(), prompts->end(), + [](const std::string &lhs, const std::string &rhs) { + auto lhs_ulen = fast_tokenizer::utils::GetUnicodeLenFromUTF8( + lhs.c_str(), lhs.length()); + auto rhs_ulen = fast_tokenizer::utils::GetUnicodeLenFromUTF8( + rhs.c_str(), rhs.length()); + return lhs_ulen < rhs_ulen; + }); + auto max_prompt_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8( + max_prompt_iter->c_str(), max_prompt_iter->length()); + auto max_predict_len = max_length_ - 3 - max_prompt_len; + + std::vector short_texts; + AutoSplitter(*input_texts, max_predict_len, &short_texts, input_mapping); + + std::vector short_texts_prompts; + for (int i = 0; i < input_mapping->size(); ++i) { + short_texts_prompts.insert(short_texts_prompts.end(), + (*input_mapping)[i].size(), (*prompts)[i]); + } + (*input_texts) = std::move(short_texts); + (*prompts) = std::move(short_texts_prompts); + return true; +} + +void UIEModel::Preprocess( + const std::vector &input_texts, + const std::vector &prompts, + std::vector *encodings, + std::vector *inputs) { + // 1. Tokenize the short texts and short prompts + std::vector text_pair_input; + for (int i = 0; i < input_texts.size(); ++i) { + text_pair_input.emplace_back( + std::pair(prompts[i], input_texts[i])); + } + tokenizer_.EncodeBatchStrings(text_pair_input, encodings); + // 2. Construct the input vector tensor + // 2.1 Allocate input tensor + int64_t batch_size = input_texts.size(); + int64_t seq_len = 0; + if (batch_size > 0) { + seq_len = (*encodings)[0].GetIds().size(); + } + inputs->resize(NumInputsOfRuntime()); + for (int i = 0; i < NumInputsOfRuntime(); ++i) { + (*inputs)[i].Allocate({batch_size, seq_len}, ultrainfer::FDDataType::INT64, + InputInfoOfRuntime(i).name); + } + + // 2.2 Set the value of data + size_t start = 0; + int64_t *input_ids_ptr = + reinterpret_cast((*inputs)[0].MutableData()); + int64_t *type_ids_ptr = + reinterpret_cast((*inputs)[1].MutableData()); + int64_t *pos_ids_ptr = + reinterpret_cast((*inputs)[2].MutableData()); + int64_t *attn_mask_ptr = + reinterpret_cast((*inputs)[3].MutableData()); + + for (int i = 0; i < encodings->size(); ++i) { + auto &&curr_input_ids = (*encodings)[i].GetIds(); + auto &&curr_type_ids = (*encodings)[i].GetTypeIds(); + auto &&curr_attn_mask = (*encodings)[i].GetAttentionMask(); + + std::copy(curr_input_ids.begin(), curr_input_ids.end(), + input_ids_ptr + start); + std::copy(curr_type_ids.begin(), curr_type_ids.end(), type_ids_ptr + start); + std::iota(pos_ids_ptr + start, pos_ids_ptr + start + seq_len, 0); + std::copy(curr_attn_mask.begin(), curr_attn_mask.end(), + attn_mask_ptr + start); + start += seq_len; + } +} + +void UIEModel::Postprocess( + const std::vector &outputs, + const std::vector &encodings, + const std::vector &short_input_texts, + const std::vector &short_prompts, + const std::vector> &input_mapping_with_short_text, + std::vector> *results) { + auto *start_prob = reinterpret_cast(outputs[0].Data()); + auto *end_prob = reinterpret_cast(outputs[1].Data()); + + std::vector>> start_candidate_idx_prob, + end_candidate_idx_prob; + GetCandidateIdx(start_prob, outputs[0].shape[0], outputs[0].shape[1], + &start_candidate_idx_prob, position_prob_); + GetCandidateIdx(end_prob, outputs[1].shape[0], outputs[1].shape[1], + &end_candidate_idx_prob, position_prob_); + + std::vector> offset_mapping; + for (int i = 0; i < encodings.size(); ++i) { + auto &&curr_offsets = encodings[i].GetOffsets(); + offset_mapping.push_back(curr_offsets); + } + + SPAN_SET span_set; + auto batch_size = outputs[0].shape[0]; + std::vector> probs(batch_size); + std::vector> span_idxs(batch_size); + for (int i = 0; i < batch_size; ++i) { + GetSpan(start_candidate_idx_prob[i], end_candidate_idx_prob[i], &span_set); + GetSpanIdxAndProbs(span_set, offset_mapping[i], &span_idxs[i], &probs[i]); + span_set.clear(); + } + ConvertSpanToUIEResult(short_input_texts, short_prompts, span_idxs, probs, + results); + AutoJoiner(short_input_texts, input_mapping_with_short_text, results); +} + +void UIEModel::ConstructChildPromptPrefix( + const std::vector> &input_mapping_with_raw_texts, + const std::vector> &results_list, + std::vector> *prefix) { + prefix->resize(input_mapping_with_raw_texts.size()); + for (int i = 0; i < input_mapping_with_raw_texts.size(); ++i) { + auto &&input_mapping_item = input_mapping_with_raw_texts[i]; + for (auto &&idx : input_mapping_item) { + for (int j = 0; j < results_list[idx].size(); ++j) { + std::string prefix_str; + if (schema_language_ == SchemaLanguage::ZH) { + // Note(zhoushunjie): It means "of" in Chinese. + prefix_str = results_list[idx][j].text_ + "\xe7\x9a\x84"; + } else { + prefix_str = " of " + results_list[idx][j].text_; + } + (*prefix)[i].push_back(prefix_str); + } + } + } +} + +void UIEModel::ConstructChildRelations( + const std::vector> &old_relations, + const std::vector> &input_mapping_with_raw_texts, + const std::vector> &results_list, + const std::string &node_name, + std::vector>> + *results, + std::vector> *new_relations) { + new_relations->resize(input_mapping_with_raw_texts.size()); + if (old_relations.size() == 0) { + for (int i = 0; i < input_mapping_with_raw_texts.size(); ++i) { + auto &&input_mapping_item = input_mapping_with_raw_texts[i]; + auto &curr_result = (*results)[i]; + for (auto &&idx : input_mapping_item) { + if (results_list[idx].size() == 0) { + continue; + } + if (curr_result.count(node_name) == 0) { + curr_result[node_name] = results_list[idx]; + } else { + curr_result[node_name].insert(curr_result[node_name].end(), + results_list[idx].begin(), + results_list[idx].end()); + } + } + if (curr_result.count(node_name) > 0) { + for (auto &&curr_result_ref : curr_result[node_name]) { + (*new_relations)[i].push_back(&curr_result_ref); + } + } + } + } else { + auto &curr_relations = old_relations; + for (int i = 0; i < input_mapping_with_raw_texts.size(); ++i) { + auto &&input_mapping_item = input_mapping_with_raw_texts[i]; + for (int j = 0; j < input_mapping_item.size(); ++j) { + auto idx = input_mapping_item[j]; + if (results_list[idx].size() == 0) { + continue; + } + if (curr_relations[i][j]->relation_.count(node_name) == 0) { + curr_relations[i][j]->relation_[node_name] = results_list[idx]; + } else { + auto &curr_result = curr_relations[i][j]->relation_[node_name]; + curr_result.insert(curr_result.end(), results_list[idx].begin(), + results_list[idx].end()); + } + } + } + for (int i = 0; i < curr_relations.size(); ++i) { + for (int j = 0; j < curr_relations[i].size(); ++j) { + if (curr_relations[i][j]->relation_.count(node_name)) { + auto &curr_relation = curr_relations[i][j]->relation_[node_name]; + for (auto &&curr_result_ref : curr_relation) { + (*new_relations)[i].push_back(&curr_result_ref); + } + } + } + } + } +} + +void UIEModel::Predict( + const std::vector &texts, + std::vector>> + *results) { + std::queue nodes; + for (auto &node : schema_->root_->children_) { + nodes.push(node); + } + results->resize(texts.size()); + while (!nodes.empty()) { + auto node = nodes.front(); + nodes.pop(); + std::vector> input_mapping_with_raw_texts; + std::vector> input_mapping_with_short_text; + std::vector short_input_texts; + std::vector short_prompts; + // 1. Construct texts and prompts from raw text + bool has_prompt = ConstructTextsAndPrompts( + texts, node.name_, node.prefix_, &short_input_texts, &short_prompts, + &input_mapping_with_raw_texts, &input_mapping_with_short_text); + std::vector> results_list; + if (has_prompt) { + // 2. Convert texts and prompts to FDTensor + std::vector inputs; + std::vector encodings; + Preprocess(short_input_texts, short_prompts, &encodings, &inputs); + + std::vector> inputs_vec(NumInputsOfRuntime()); + int encoding_size = encodings.size(); + std::vector num_or_sections; + for (int i = 0; i < encoding_size; i += batch_size_) { + int actual_batch_size = (std::min)(batch_size_, encoding_size - i); + num_or_sections.push_back(actual_batch_size); + } + for (int i = 0; i < NumInputsOfRuntime(); ++i) { + function::Split(inputs[i], num_or_sections, &inputs_vec[i]); + } + + // 3. Infer + std::vector outputs(NumOutputsOfRuntime()); + std::vector outputs0, outputs1; + + for (int i = 0; i < inputs_vec[0].size(); ++i) { + std::vector curr_inputs(NumInputsOfRuntime()); + std::vector curr_outputs(NumOutputsOfRuntime()); + for (int j = 0; j < NumInputsOfRuntime(); ++j) { + curr_inputs[j] = std::move(inputs_vec[j][i]); + curr_inputs[j].name = inputs[j].name; + } + if (!Infer(curr_inputs, &curr_outputs)) { + FDERROR << "Failed to inference while using model:" << ModelName() + << "." << std::endl; + } + outputs0.push_back(curr_outputs[0]); + outputs1.push_back(curr_outputs[1]); + } + function::Concat(outputs0, &outputs[0]); + function::Concat(outputs1, &outputs[1]); + // 4. Convert FDTensor to UIEResult + Postprocess(outputs, encodings, short_input_texts, short_prompts, + input_mapping_with_short_text, &results_list); + } + // 5. Construct the new relation of the UIEResult + std::vector> relations; + ConstructChildRelations(node.relations_, input_mapping_with_raw_texts, + results_list, node.name_, results, &relations); + + // 6. Construct the next prompt prefix + std::vector> prefix(texts.size()); + ConstructChildPromptPrefix(input_mapping_with_raw_texts, results_list, + &prefix); + for (auto &node_child : node.children_) { + node_child.relations_ = relations; + node_child.prefix_ = prefix; + nodes.push(node_child); + } + } +} + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/uie/model.h b/libs/ultrainfer/ultrainfer/text/uie/model.h new file mode 100755 index 0000000000..ef2b7107f5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/uie/model.h @@ -0,0 +1,210 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h" +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include +#include +#include +#include +#include + +using namespace paddlenlp; + +namespace ultrainfer { +namespace text { + +struct ULTRAINFER_DECL UIEResult { + size_t start_; + size_t end_; + double probability_; + std::string text_; + std::unordered_map> relation_; + UIEResult() = default; + UIEResult(size_t start, size_t end, double probability, std::string text) + : start_(start), end_(end), probability_(probability), text_(text) {} + std::string Str() const; +}; + +ULTRAINFER_DECL std::ostream &operator<<(std::ostream &os, + const UIEResult &result); +ULTRAINFER_DECL std::ostream &operator<<( + std::ostream &os, + const std::vector>> + &results); + +struct ULTRAINFER_DECL SchemaNode { + std::string name_; + std::vector> prefix_; + std::vector> relations_; + std::vector children_; + SchemaNode() = default; + SchemaNode(const SchemaNode &) = default; + explicit SchemaNode(const std::string &name, + const std::vector &children = {}) + : name_(name), children_(children) {} + void AddChild(const std::string &schema) { children_.emplace_back(schema); } + void AddChild(const SchemaNode &schema) { children_.push_back(schema); } + void AddChild(const std::string &schema, + const std::vector &children) { + SchemaNode schema_node(schema); + for (auto &child : children) { + schema_node.children_.emplace_back(child); + } + children_.emplace_back(schema_node); + } + void AddChild(const std::string &schema, + const std::vector &children) { + SchemaNode schema_node(schema); + schema_node.children_ = children; + children_.emplace_back(schema_node); + } +}; + +enum SchemaLanguage { + ZH, // Chinese + EN // English +}; + +struct Schema { + explicit Schema(const std::string &schema, const std::string &name = "root"); + explicit Schema(const std::vector &schema_list, + const std::string &name = "root"); + explicit Schema(const std::vector &schema_list, + const std::string &name = "root"); + explicit Schema(const SchemaNode &schema, const std::string &name = "root"); + +private: + void CreateRoot(const std::string &name); + std::unique_ptr root_; + friend class UIEModel; +}; + +struct ULTRAINFER_DECL UIEModel : public UltraInferModel { +public: + UIEModel(const std::string &model_file, const std::string ¶ms_file, + const std::string &vocab_file, float position_prob, + size_t max_length, const std::vector &schema, + int batch_size, + const ultrainfer::RuntimeOption &custom_option = + ultrainfer::RuntimeOption(), + const ultrainfer::ModelFormat &model_format = + ultrainfer::ModelFormat::PADDLE, + SchemaLanguage schema_language = SchemaLanguage::ZH); + UIEModel(const std::string &model_file, const std::string ¶ms_file, + const std::string &vocab_file, float position_prob, + size_t max_length, const SchemaNode &schema, int batch_size, + const ultrainfer::RuntimeOption &custom_option = + ultrainfer::RuntimeOption(), + const ultrainfer::ModelFormat &model_format = + ultrainfer::ModelFormat::PADDLE, + SchemaLanguage schema_language = SchemaLanguage::ZH); + UIEModel(const std::string &model_file, const std::string ¶ms_file, + const std::string &vocab_file, float position_prob, + size_t max_length, const std::vector &schema, + int batch_size, + const ultrainfer::RuntimeOption &custom_option = + ultrainfer::RuntimeOption(), + const ultrainfer::ModelFormat &model_format = + ultrainfer::ModelFormat::PADDLE, + SchemaLanguage schema_language = SchemaLanguage::ZH); + virtual std::string ModelName() const { return "UIEModel"; } + void SetSchema(const std::vector &schema); + void SetSchema(const std::vector &schema); + void SetSchema(const SchemaNode &schema); + + bool ConstructTextsAndPrompts( + const std::vector &raw_texts, const std::string &node_name, + const std::vector> node_prefix, + std::vector *input_texts, std::vector *prompts, + std::vector> *input_mapping_with_raw_texts, + std::vector> *input_mapping_with_short_text); + void Preprocess(const std::vector &input_texts, + const std::vector &prompts, + std::vector *encodings, + std::vector *inputs); + void Postprocess( + const std::vector &outputs, + const std::vector &encodings, + const std::vector &short_input_texts, + const std::vector &short_prompts, + const std::vector> &input_mapping_with_short_text, + std::vector> *results); + void ConstructChildPromptPrefix( + const std::vector> &input_mapping_with_raw_texts, + const std::vector> &results_list, + std::vector> *prefix); + void ConstructChildRelations( + const std::vector> &old_relations, + const std::vector> &input_mapping_with_raw_texts, + const std::vector> &results_list, + const std::string &node_name, + std::vector>> + *results, + std::vector> *new_relations); + void + Predict(const std::vector &texts, + std::vector>> + *results); + +protected: + using IDX_PROB = std::pair; + struct IdxProbCmp { + bool operator()(const std::pair &lhs, + const std::pair &rhs) const; + }; + using SPAN_SET = std::set, IdxProbCmp>; + struct SpanIdx { + fast_tokenizer::core::Offset offset_; + bool is_prompt_; + }; + void SetValidBackend(); + bool Initialize(); + void AutoSplitter(const std::vector &texts, size_t max_length, + std::vector *short_texts, + std::vector> *input_mapping); + void AutoJoiner(const std::vector &short_texts, + const std::vector> &input_mapping, + std::vector> *results); + // Get idx of the last dimension in probability arrays, which is greater than + // a limitation. + void GetCandidateIdx(const float *probs, int64_t batch_size, int64_t seq_len, + std::vector> *candidate_idx_prob, + float threshold = 0.5) const; + void GetSpan(const std::vector &start_idx_prob, + const std::vector &end_idx_prob, + SPAN_SET *span_set) const; + void GetSpanIdxAndProbs( + const SPAN_SET &span_set, + const std::vector &offset_mapping, + std::vector *span_idxs, std::vector *probs) const; + void + ConvertSpanToUIEResult(const std::vector &texts, + const std::vector &prompts, + const std::vector> &span_idxs, + const std::vector> &probs, + std::vector> *results) const; + std::unique_ptr schema_; + size_t max_length_; + float position_prob_; + int batch_size_; + SchemaLanguage schema_language_; + fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_; +}; + +} // namespace text +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc b/libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc new file mode 100755 index 0000000000..a7a3ff3fc3 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace py = pybind11; + +namespace ultrainfer { + +void BindUIE(pybind11::module &m) { + py::class_(m, "SchemaNode") + .def(py::init<>()) + .def(py::init>(), + py::arg("name"), py::arg("children")) + .def_readwrite("name", &text::SchemaNode::name_) + .def_readwrite("prefix", &text::SchemaNode::prefix_) + .def_readwrite("relations", &text::SchemaNode::relations_) + .def_readwrite("children", &text::SchemaNode::children_); + + py::enum_(m, "SchemaLanguage", py::arithmetic(), + "The language of schema.") + .value("ZH", text::SchemaLanguage::ZH) + .value("EN", text::SchemaLanguage::EN); + + py::class_(m, "UIEModel") + .def(py::init, int, RuntimeOption, ModelFormat, + text::SchemaLanguage>(), + py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), + py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("batch_size"), + py::arg("custom_option") = ultrainfer::RuntimeOption(), + py::arg("model_format") = ultrainfer::ModelFormat::PADDLE, + py::arg("schema_language") = text::SchemaLanguage::ZH) + .def(py::init, int, RuntimeOption, + ModelFormat, text::SchemaLanguage>(), + py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), + py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("batch_size"), + py::arg("custom_option") = ultrainfer::RuntimeOption(), + py::arg("model_format") = ultrainfer::ModelFormat::PADDLE, + py::arg("schema_language") = text::SchemaLanguage::ZH) + .def(py::init(), + py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), + py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("batch_size"), + py::arg("custom_option") = ultrainfer::RuntimeOption(), + py::arg("model_format") = ultrainfer::ModelFormat::PADDLE, + py::arg("schema_language") = text::SchemaLanguage::ZH) + .def("set_schema", + static_cast &)>(&text::UIEModel::SetSchema), + py::arg("schema")) + .def("set_schema", + static_cast &)>( + &text::UIEModel::SetSchema), + py::arg("schema")) + .def("set_schema", + static_cast( + &text::UIEModel::SetSchema), + py::arg("schema")) + .def( + "predict", + [](text::UIEModel &self, const std::vector &texts) { + std::vector< + std::unordered_map>> + results; + self.Predict(texts, &results); + return results; + }, + py::arg("text")); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/ultrainfer_model.cc b/libs/ultrainfer/ultrainfer/ultrainfer_model.cc new file mode 100755 index 0000000000..0fc3f3d7a5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/ultrainfer_model.cc @@ -0,0 +1,517 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/ultrainfer_model.h" + +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { + +std::string Str(const std::vector &backends) { + std::ostringstream oss; + if (backends.size() == 0) { + oss << "[]"; + return oss.str(); + } + oss << "[ " << backends[0]; + for (int i = 1; i < backends.size(); ++i) { + oss << " ," << backends[i]; + } + oss << " ]"; + return oss.str(); +} + +bool CheckBackendSupported(const std::vector &backends, + Backend backend) { + for (size_t i = 0; i < backends.size(); ++i) { + if (backends[i] == backend) { + return true; + } + } + return false; +} + +bool UltraInferModel::IsSupported(const std::vector &backends, + Backend backend) { +#ifdef ENABLE_BENCHMARK + if (runtime_option.benchmark_option.enable_profile) { + FDWARNING << "In benchmark mode, we don't check to see if " + << "the backend [" << backend + << "] is supported for current model!" << std::endl; + return true; + } else if (!runtime_option.enable_valid_backend_check) { + FDWARNING << "Checking for valid backend is disable, we don't" + << " check to see if the backend [" << backend + << "] is supported for current model!" << std::endl; + return true; + } + return CheckBackendSupported(backends, backend); +#else + if (!runtime_option.enable_valid_backend_check) { + FDWARNING << "Checking for valid backend is disable, we don't" + << " check to see if the backend [" << backend + << "] is supported for current model!" << std::endl; + return true; + } + return CheckBackendSupported(backends, backend); +#endif +} + +bool UltraInferModel::InitRuntimeWithSpecifiedBackend() { + if (!IsBackendAvailable(runtime_option.backend)) { + FDERROR << runtime_option.backend + << " is not compiled with current UltraInfer library." << std::endl; + return false; + } + + bool use_gpu = (runtime_option.device == Device::GPU); + bool use_ipu = (runtime_option.device == Device::IPU); + bool use_rknpu = (runtime_option.device == Device::RKNPU); + bool use_horizon = (runtime_option.device == Device::SUNRISENPU); + bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD); + bool use_timvx = (runtime_option.device == Device::TIMVX); + bool use_ascend = (runtime_option.device == Device::ASCEND); + bool use_directml = (runtime_option.device == Device::DIRECTML); + bool use_kunlunxin = (runtime_option.device == Device::KUNLUNXIN); + + if (use_gpu) { + if (!IsSupported(valid_gpu_backends, runtime_option.backend)) { + FDERROR << "The valid gpu backends of model " << ModelName() << " are " + << Str(valid_gpu_backends) << ", " << runtime_option.backend + << " is not supported." << std::endl; + return false; + } + } else if (use_rknpu) { + if (!IsSupported(valid_rknpu_backends, runtime_option.backend)) { + FDERROR << "The valid rknpu backends of model " << ModelName() << " are " + << Str(valid_rknpu_backends) << ", " << runtime_option.backend + << " is not supported." << std::endl; + return false; + } + } else if (use_horizon) { + if (!IsSupported(valid_horizon_backends, runtime_option.backend)) { + FDERROR << "The valid horizon backends of model " << ModelName() + << " are " << Str(valid_horizon_backends) << ", " + << runtime_option.backend << " is not supported." << std::endl; + return false; + } + } else if (use_sophgotpu) { + if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) { + FDERROR << "The valid sophgo backends of model " << ModelName() << " are " + << Str(valid_sophgonpu_backends) << ", " << runtime_option.backend + << " is not supported." << std::endl; + return false; + } + } else if (use_timvx) { + if (!IsSupported(valid_timvx_backends, runtime_option.backend)) { + FDERROR << "The valid timvx backends of model " << ModelName() << " are " + << Str(valid_timvx_backends) << ", " << runtime_option.backend + << " is not supported." << std::endl; + return false; + } + } else if (use_ascend) { + if (!IsSupported(valid_ascend_backends, runtime_option.backend)) { + FDERROR << "The valid ascend backends of model " << ModelName() << " are " + << Str(valid_ascend_backends) << ", " << runtime_option.backend + << " is not supported." << std::endl; + return false; + } + } else if (use_directml) { + if (!IsSupported(valid_directml_backends, runtime_option.backend)) { + FDERROR << "The valid directml backends of model " << ModelName() + << " are " << Str(valid_directml_backends) << ", " + << runtime_option.backend << " is not supported." << std::endl; + return false; + } + } else if (use_kunlunxin) { + if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) { + FDERROR << "The valid kunlunxin backends of model " << ModelName() + << " are " << Str(valid_kunlunxin_backends) << ", " + << runtime_option.backend << " is not supported." << std::endl; + return false; + } + } else if (use_ipu) { + if (!IsSupported(valid_ipu_backends, runtime_option.backend)) { + FDERROR << "The valid ipu backends of model " << ModelName() << " are " + << Str(valid_ipu_backends) << ", " << runtime_option.backend + << " is not supported." << std::endl; + return false; + } + } else { + if (!IsSupported(valid_cpu_backends, runtime_option.backend)) { + FDERROR << "The valid cpu backends of model " << ModelName() << " are " + << Str(valid_cpu_backends) << ", " << runtime_option.backend + << " is not supported." << std::endl; + return false; + } + } + + runtime_ = std::shared_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; +} + +bool UltraInferModel::InitRuntimeWithSpecifiedDevice() { + if (runtime_option.device == Device::CPU) { + return CreateCpuBackend(); + } else if (runtime_option.device == Device::GPU) { +#ifdef WITH_GPU + return CreateGpuBackend(); +#else + FDERROR << "The compiled UltraInfer library doesn't support GPU now." + << std::endl; + return false; +#endif + } else if (runtime_option.device == Device::RKNPU) { + return CreateRKNPUBackend(); + } else if (runtime_option.device == Device::SUNRISENPU) { + return CreateHorizonBackend(); + } else if (runtime_option.device == Device::TIMVX) { + return CreateTimVXBackend(); + } else if (runtime_option.device == Device::ASCEND) { + return CreateASCENDBackend(); + } else if (runtime_option.device == Device::DIRECTML) { + return CreateDirectMLBackend(); + } else if (runtime_option.device == Device::KUNLUNXIN) { + return CreateKunlunXinBackend(); + } else if (runtime_option.device == Device::SOPHGOTPUD) { + return CreateSophgoNPUBackend(); + } else if (runtime_option.device == Device::IPU) { +#ifdef WITH_IPU + return CreateIpuBackend(); +#else + FDERROR << "The compiled UltraInfer library doesn't support IPU now." + << std::endl; + return false; +#endif + } + FDERROR << "Only support " + "CPU/GPU/IPU/RKNPU/HORIZONNPU/TIMVX/KunlunXin/ASCEND/DirectML now." + << std::endl; + return false; +} + +bool UltraInferModel::InitRuntime() { + if (runtime_initialized_) { + FDERROR << "The model is already initialized, cannot be initliazed again." + << std::endl; + return false; + } + if (runtime_option.backend != Backend::UNKNOWN) { + return InitRuntimeWithSpecifiedBackend(); + } + + return InitRuntimeWithSpecifiedDevice(); +} + +bool UltraInferModel::CreateCpuBackend() { + if (valid_cpu_backends.size() == 0) { + FDERROR << "There's no valid cpu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_cpu_backends.size(); ++i) { + if (!IsBackendAvailable(valid_cpu_backends[i])) { + continue; + } + runtime_option.backend = valid_cpu_backends[i]; + runtime_ = std::shared_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Found no valid backend for model: " << ModelName() << std::endl; + return false; +} + +bool UltraInferModel::CreateGpuBackend() { + if (valid_gpu_backends.empty()) { + FDERROR << "There's no valid gpu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_gpu_backends.size(); ++i) { + if (!IsBackendAvailable(valid_gpu_backends[i])) { + continue; + } + runtime_option.backend = valid_gpu_backends[i]; + runtime_ = std::shared_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Cannot find an available gpu backend to load this model." + << std::endl; + return false; +} + +bool UltraInferModel::CreateRKNPUBackend() { + if (valid_rknpu_backends.empty()) { + FDERROR << "There's no valid npu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) { + if (!IsBackendAvailable(valid_rknpu_backends[i])) { + continue; + } + runtime_option.backend = valid_rknpu_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Cannot find an available npu backend to load this model." + << std::endl; + return false; +} + +bool UltraInferModel::CreateHorizonBackend() { + if (valid_horizon_backends.empty()) { + FDERROR << "There's no valid npu backends for model: " << ModelName() + << std::endl; + return false; + } + for (size_t i = 0; i < valid_horizon_backends.size(); ++i) { + if (!IsBackendAvailable(valid_horizon_backends[i])) { + continue; + } + runtime_option.backend = valid_horizon_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Cannot find an available npu backend to load this model." + << std::endl; + return false; +} +bool UltraInferModel::CreateSophgoNPUBackend() { + if (valid_sophgonpu_backends.empty()) { + FDERROR << "There's no valid npu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_sophgonpu_backends.size(); ++i) { + if (!IsBackendAvailable(valid_sophgonpu_backends[i])) { + continue; + } + runtime_option.backend = valid_sophgonpu_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Cannot find an available npu backend to load this model." + << std::endl; + return false; +} + +bool UltraInferModel::CreateTimVXBackend() { + if (valid_timvx_backends.size() == 0) { + FDERROR << "There's no valid timvx backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_timvx_backends.size(); ++i) { + if (!IsBackendAvailable(valid_timvx_backends[i])) { + continue; + } + runtime_option.backend = valid_timvx_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Found no valid backend for model: " << ModelName() << std::endl; + return false; +} + +bool UltraInferModel::CreateKunlunXinBackend() { + if (valid_kunlunxin_backends.size() == 0) { + FDERROR << "There's no valid KunlunXin backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_kunlunxin_backends.size(); ++i) { + if (!IsBackendAvailable(valid_kunlunxin_backends[i])) { + continue; + } + runtime_option.backend = valid_kunlunxin_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Found no valid backend for model: " << ModelName() << std::endl; + return false; +} + +bool UltraInferModel::CreateASCENDBackend() { + if (valid_ascend_backends.size() == 0) { + FDERROR << "There's no valid ascend backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_ascend_backends.size(); ++i) { + if (!IsBackendAvailable(valid_ascend_backends[i])) { + continue; + } + runtime_option.backend = valid_ascend_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Found no valid backend for model: " << ModelName() << std::endl; + return false; +} + +bool UltraInferModel::CreateDirectMLBackend() { + if (valid_directml_backends.size() == 0) { + FDERROR << "There's no valid directml backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_directml_backends.size(); ++i) { + if (!IsBackendAvailable(valid_directml_backends[i])) { + continue; + } + runtime_option.backend = valid_directml_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Found no valid directml backend for model: " << ModelName() + << std::endl; + return false; +} + +bool UltraInferModel::CreateIpuBackend() { + if (valid_ipu_backends.size() == 0) { + FDERROR << "There's no valid ipu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_ipu_backends.size(); ++i) { + if (!IsBackendAvailable(valid_ipu_backends[i])) { + continue; + } + runtime_option.backend = valid_ipu_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Found no valid backend for model: " << ModelName() << std::endl; + return false; +} + +bool UltraInferModel::Infer(std::vector &input_tensors, + std::vector *output_tensors) { + TimeCounter tc; + if (enable_record_time_of_runtime_) { + tc.Start(); + } + auto ret = runtime_->Infer(input_tensors, output_tensors); + if (enable_record_time_of_runtime_) { + tc.End(); + if (time_of_runtime_.size() > 50000) { + FDWARNING << "There are already 50000 records of runtime, will force to " + "disable record time of runtime now." + << std::endl; + enable_record_time_of_runtime_ = false; + } + time_of_runtime_.push_back(tc.Duration()); + } + + return ret; +} + +bool UltraInferModel::Infer() { + return Infer(reused_input_tensors_, &reused_output_tensors_); +} + +std::map UltraInferModel::PrintStatisInfoOfRuntime() { + std::map statis_info_of_runtime_dict; + + if (time_of_runtime_.size() < 10) { + FDWARNING << "PrintStatisInfoOfRuntime require the runtime ran 10 times at " + "least, but now you only ran " + << time_of_runtime_.size() << " times." << std::endl; + } + double warmup_time = 0.0; + double remain_time = 0.0; + int warmup_iter = time_of_runtime_.size() / 5; + for (size_t i = 0; i < time_of_runtime_.size(); ++i) { + if (i < warmup_iter) { + warmup_time += time_of_runtime_[i]; + } else { + remain_time += time_of_runtime_[i]; + } + } + double avg_time = remain_time / (time_of_runtime_.size() - warmup_iter); + std::cout << "============= Runtime Statis Info(" << ModelName() + << ") =============" << std::endl; + std::cout << "Total iterations: " << time_of_runtime_.size() << std::endl; + std::cout << "Total time of runtime: " << warmup_time + remain_time << "s." + << std::endl; + std::cout << "Warmup iterations: " << warmup_iter << std::endl; + std::cout << "Total time of runtime in warmup step: " << warmup_time << "s." + << std::endl; + std::cout << "Average time of runtime exclude warmup step: " + << avg_time * 1000 << "ms." << std::endl; + + statis_info_of_runtime_dict["total_time"] = warmup_time + remain_time; + statis_info_of_runtime_dict["warmup_time"] = warmup_time; + statis_info_of_runtime_dict["remain_time"] = remain_time; + statis_info_of_runtime_dict["warmup_iter"] = warmup_iter; + statis_info_of_runtime_dict["avg_time"] = avg_time; + statis_info_of_runtime_dict["iterations"] = time_of_runtime_.size(); + + return statis_info_of_runtime_dict; +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/ultrainfer_model.h b/libs/ultrainfer/ultrainfer/ultrainfer_model.h new file mode 100755 index 0000000000..d204c700f9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/ultrainfer_model.h @@ -0,0 +1,189 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/runtime.h" + +namespace ultrainfer { + +/*! @brief Base model object for all the vision models + */ +class ULTRAINFER_DECL UltraInferModel { +public: + /// Get model's name + virtual std::string ModelName() const { return "NameUndefined"; } + + /** \brief Inference the model by the runtime. This interface is included in + * the `Predict()` function, so we don't call `Infer()` directly in most + * common situation + */ + virtual bool Infer(std::vector &input_tensors, + std::vector *output_tensors); + + /** \brief Inference the model by the runtime. This interface is using class + * member reused_input_tensors_ to do inference and writing results to + * reused_output_tensors_ + */ + virtual bool Infer(); + + RuntimeOption runtime_option; + /** \brief Model's valid cpu backends. This member defined all the cpu + * backends have successfully tested for the model + */ + std::vector valid_cpu_backends = {Backend::ORT}; + /** Model's valid gpu backends. This member defined all the gpu backends have + * successfully tested for the model + */ + std::vector valid_gpu_backends = {Backend::ORT}; + /** Model's valid ipu backends. This member defined all the ipu backends have + * successfully tested for the model + */ + std::vector valid_ipu_backends = {}; + /** Model's valid timvx backends. This member defined all the timvx backends + * have successfully tested for the model + */ + std::vector valid_timvx_backends = {}; + /** Model's valid directml backends. This member defined all the onnxruntime + * directml backends have successfully tested for the model + */ + std::vector valid_directml_backends = {}; + /** Model's valid ascend backends. This member defined all the cann backends + * have successfully tested for the model + */ + std::vector valid_ascend_backends = {}; + /** Model's valid KunlunXin xpu backends. This member defined all the + * KunlunXin xpu backends have successfully tested for the model + */ + std::vector valid_kunlunxin_backends = {}; + /** Model's valid hardware backends. This member defined all the gpu backends + * have successfully tested for the model + */ + std::vector valid_rknpu_backends = {}; + /** Model's valid hardware backends. This member defined all the sophgo npu + * backends have successfully tested for the model + */ + std::vector valid_horizon_backends = {}; + std::vector valid_sophgonpu_backends = {}; + + /// Get number of inputs for this model + virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); } + /// Get number of outputs for this model + virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); } + /// Get input information for this model + virtual TensorInfo InputInfoOfRuntime(int index) { + return runtime_->GetInputInfo(index); + } + /// Get output information for this model + virtual TensorInfo OutputInfoOfRuntime(int index) { + return runtime_->GetOutputInfo(index); + } + /// Check if the model is initialized successfully + virtual bool Initialized() const { + return runtime_initialized_ && initialized; + } + + /** \brief This is a debug interface, used to record the time of runtime + * (backend + h2d + d2h) + * + * example code @code + * auto model = ultrainfer::vision::PPYOLOE("model.pdmodel", + * "model.pdiparams", "infer_cfg.yml"); if (!model.Initialized()) { std::cerr + * << "Failed to initialize." << std::endl; return -1; + * } + * model.EnableRecordTimeOfRuntime(); + * cv::Mat im = cv::imread("test.jpg"); + * for (auto i = 0; i < 1000; ++i) { + * ultrainfer::vision::DetectionResult result; + * model.Predict(&im, &result); + * } + * model.PrintStatisInfoOfRuntime(); + * @endcode After called the `PrintStatisInfoOfRuntime()`, the statistical + * information of runtime will be printed in the console + */ + virtual void EnableRecordTimeOfRuntime() { + time_of_runtime_.clear(); + std::vector().swap(time_of_runtime_); + enable_record_time_of_runtime_ = true; + } + + /** \brief Disable to record the time of runtime, see + * `EnableRecordTimeOfRuntime()` for more detail + */ + virtual void DisableRecordTimeOfRuntime() { + enable_record_time_of_runtime_ = false; + } + + /** \brief Print the statistic information of runtime in the console, see + * function `EnableRecordTimeOfRuntime()` for more detail + */ + virtual std::map PrintStatisInfoOfRuntime(); + + /** \brief Check if the `EnableRecordTimeOfRuntime()` method is enabled. + */ + virtual bool EnabledRecordTimeOfRuntime() { + return enable_record_time_of_runtime_; + } + /** \brief Get profile time of Runtime after the profile process is done. + */ + virtual double GetProfileTime() { return runtime_->GetProfileTime(); } + /** \brief Release reused input/output buffers + */ + virtual void ReleaseReusedBuffer() { + std::vector().swap(reused_input_tensors_); + std::vector().swap(reused_output_tensors_); + } + + virtual ultrainfer::Runtime *CloneRuntime() { return runtime_->Clone(); } + + virtual bool SetRuntime(ultrainfer::Runtime *clone_runtime) { + runtime_ = std::unique_ptr(clone_runtime); + return true; + } + + virtual std::unique_ptr Clone() { + FDERROR << ModelName() << " doesn't support Cone() now." << std::endl; + return nullptr; + } + +protected: + virtual bool InitRuntime(); + + bool initialized = false; + // Reused input tensors + std::vector reused_input_tensors_; + // Reused output tensors + std::vector reused_output_tensors_; + +private: + bool InitRuntimeWithSpecifiedBackend(); + bool InitRuntimeWithSpecifiedDevice(); + bool CreateCpuBackend(); + bool CreateGpuBackend(); + bool CreateIpuBackend(); + bool CreateRKNPUBackend(); + bool CreateHorizonBackend(); + bool CreateSophgoNPUBackend(); + bool CreateTimVXBackend(); + bool CreateKunlunXinBackend(); + bool CreateASCENDBackend(); + bool CreateDirectMLBackend(); + bool IsSupported(const std::vector &backends, Backend backend); + + std::shared_ptr runtime_; + bool runtime_initialized_ = false; + // whether to record inference time + bool enable_record_time_of_runtime_ = false; + std::vector time_of_runtime_; +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/utils/axis_utils.h b/libs/ultrainfer/ultrainfer/utils/axis_utils.h new file mode 100755 index 0000000000..53a9aada1a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/utils/axis_utils.h @@ -0,0 +1,52 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace ultrainfer { + +static inline int CanonicalAxis(const int axis, const int rank) { + if (axis < 0) { + return axis + rank; + } + return axis; +} + +static inline int SizeToAxis(const int axis, const std::vector &dims) { + int size = 1; + for (int i = 0; i < axis; i++) { + size *= dims[i]; + } + return size; +} + +static inline int SizeFromAxis(const int axis, + const std::vector &dims) { + int size = 1; + for (int i = axis; i < dims.size(); i++) { + size *= dims[i]; + } + return size; +} + +static inline int SizeOutAxis(const int axis, + const std::vector &dims) { + int size = 1; + for (int i = axis + 1; i < dims.size(); i++) { + size *= dims[i]; + } + return size; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/utils/path.h b/libs/ultrainfer/ultrainfer/utils/path.h new file mode 100755 index 0000000000..17f5c0d0cf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/utils/path.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#ifdef _MSC_VER +#define PATH_SEP "\\" +#else +#define PATH_SEP "/" +#endif + +namespace ultrainfer { + +inline std::string PathJoin(const std::vector &paths, + const std::string &sep = PATH_SEP) { + if (paths.size() == 1) { + return paths[0]; + } + std::string filepath = ""; + for (const auto &path : paths) { + if (filepath == "") { + filepath += path; + continue; + } + if (path[0] == sep[0] || filepath.back() == sep[0]) { + filepath += path; + } else { + filepath += sep + path; + } + } + return filepath; +} + +inline std::string PathJoin(const std::string &folder, + const std::string &filename, + const std::string &sep = PATH_SEP) { + return PathJoin(std::vector{folder, filename}, sep); +} + +inline std::string GetDirFromPath(const std::string &path) { + auto pos = path.find_last_of(PATH_SEP); + if (pos == std::string::npos) { + return ""; + } + // The root path in UNIX systems + if (pos == 0) { + return "/"; + } + return path.substr(0, pos); +} + +inline bool CheckFileExists(const std::string &path) { + std::fstream fin(path, std::ios::in); + if (!fin) { + return false; + } + return true; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/utils/perf.h b/libs/ultrainfer/ultrainfer/utils/perf.h new file mode 100755 index 0000000000..0faabfcdc6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/utils/perf.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/utils/utils.h" +#include // NOLINT + +namespace ultrainfer { + +class ULTRAINFER_DECL TimeCounter { +public: + void Start() { begin_ = std::chrono::system_clock::now(); } + + void End() { end_ = std::chrono::system_clock::now(); } + + double Duration() { + auto duration = + std::chrono::duration_cast(end_ - begin_); + return static_cast(duration.count()) * + std::chrono::microseconds::period::num / + std::chrono::microseconds::period::den; + } + + void PrintInfo(const std::string &prefix = "TimeCounter: ", + bool print_out = true) { + if (!print_out) { + return; + } + FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl; + } + +private: + std::chrono::time_point begin_; + std::chrono::time_point end_; +}; + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/utils/unique_ptr.h b/libs/ultrainfer/ultrainfer/utils/unique_ptr.h new file mode 100755 index 0000000000..9f02d5d792 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/utils/unique_ptr.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace ultrainfer { +namespace utils { +// Trait to select overloads and return types for MakeUnique. +template struct MakeUniqueResult { + using scalar = std::unique_ptr; +}; +template struct MakeUniqueResult { + using array = std::unique_ptr; +}; +template struct MakeUniqueResult { + using invalid = void; +}; + +// MakeUnique(...) is an early implementation of C++14 std::make_unique. +// It is designed to be 100% compatible with std::make_unique so that the +// eventual switchover will be a simple renaming operation. +template +typename MakeUniqueResult::scalar make_unique(Args &&...args) { // NOLINT + return std::unique_ptr( + new T(std::forward(args)...)); // NOLINT(build/c++11) +} + +// Overload for array of unknown bound. +// The allocation of arrays needs to use the array form of new, +// and cannot take element constructor arguments. +template +typename MakeUniqueResult::array make_unique(size_t n) { + return std::unique_ptr(new typename std::remove_extent::type[n]()); +} + +// Reject arrays of known bound. +template +typename MakeUniqueResult::invalid +make_unique(Args &&.../* args */) = delete; // NOLINT + +} // namespace utils +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/utils/utils.cc b/libs/ultrainfer/ultrainfer/utils/utils.cc new file mode 100755 index 0000000000..71751c70f7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/utils/utils.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/utils/utils.h" + +#include + +namespace ultrainfer { + +bool FDLogger::enable_info = true; +bool FDLogger::enable_warning = true; + +void SetLogger(bool enable_info, bool enable_warning) { + FDLogger::enable_info = enable_info; + FDLogger::enable_warning = enable_warning; +} + +FDLogger::FDLogger(bool verbose, const std::string &prefix) { + verbose_ = verbose; + line_ = ""; + prefix_ = prefix; +} + +FDLogger &FDLogger::operator<<(std::ostream &(*os)(std::ostream &)) { + if (!verbose_) { + return *this; + } + std::cout << prefix_ << " " << line_ << std::endl; + line_ = ""; + return *this; +} + +bool ReadBinaryFromFile(const std::string &file, std::string *contents) { + std::ifstream fin(file, std::ios::in | std::ios::binary); + if (!fin.is_open()) { + FDERROR << "Failed to open file: " << file << " to read." << std::endl; + return false; + } + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + return true; +} + +std::vector GetStride(const std::vector &dims) { + auto dims_size = dims.size(); + std::vector result(dims_size, 1); + for (int i = dims_size - 2; i >= 0; --i) { + result[i] = result[i + 1] * dims[i + 1]; + } + return result; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/utils/utils.h b/libs/ultrainfer/ultrainfer/utils/utils.h new file mode 100755 index 0000000000..b3af7fc371 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/utils/utils.h @@ -0,0 +1,234 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) +#ifdef ULTRAINFER_LIB +#define ULTRAINFER_DECL __declspec(dllexport) +#else +#define ULTRAINFER_DECL __declspec(dllimport) +#endif // ULTRAINFER_LIB +#else +#define ULTRAINFER_DECL __attribute__((visibility("default"))) +#endif // _WIN32 + +namespace ultrainfer { + +class ULTRAINFER_DECL FDLogger { +public: + static bool enable_info; + static bool enable_warning; + + FDLogger() { + line_ = ""; + prefix_ = "[UltraInfer]"; + verbose_ = true; + } + explicit FDLogger(bool verbose, const std::string &prefix = "[UltraInfer]"); + + template FDLogger &operator<<(const T &val) { + if (!verbose_) { + return *this; + } + std::stringstream ss; + ss << val; + line_ += ss.str(); + return *this; + } + + FDLogger &operator<<(std::ostream &(*os)(std::ostream &)); + + ~FDLogger() { + if (verbose_ && line_ != "") { + std::cout << line_ << std::endl; + } + } + +private: + std::string line_; + std::string prefix_; + bool verbose_ = true; +}; + +ULTRAINFER_DECL bool ReadBinaryFromFile(const std::string &file, + std::string *contents); + +#ifndef __REL_FILE__ +#define __REL_FILE__ __FILE__ +#endif + +#define FDERROR \ + FDLogger(true, "[ERROR]") \ + << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" + +#define FDWARNING \ + FDLogger(ultrainfer::FDLogger::enable_warning, "[WARNING]") \ + << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" + +#define FDINFO \ + FDLogger(ultrainfer::FDLogger::enable_info, "[INFO]") \ + << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" + +#define FDASSERT(condition, format, ...) \ + if (!(condition)) { \ + int n = std::snprintf(nullptr, 0, format, ##__VA_ARGS__); \ + std::vector buffer(n + 1); \ + std::snprintf(buffer.data(), n + 1, format, ##__VA_ARGS__); \ + FDERROR << buffer.data() << std::endl; \ + std::abort(); \ + } + +///////// Basic Marco /////////// + +#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \ + case enum_type: { \ + using HINT = type; \ + __VA_ARGS__(); \ + break; \ + } + +#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \ + FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__) + +// Visit different data type to match the corresponding function of FDTensor +#define FD_VISIT_ALL_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto &__dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::UINT8, uint8_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::BOOL, bool, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT32, int32_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT64, int64_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP32, float, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP64, double, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data " \ + "type BOOL, INT32, " \ + "INT64, FP32, FP64, but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ + }() + +#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto &__dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT32, int32_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT64, int64_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP32, float, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP64, double, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::UINT8, uint8_t, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type INT32, " \ + "INT64, FP32, FP64, UINT8 but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ + }() + +#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto &__dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP32, float, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP64, double, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type FP32, " \ + "FP64, but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ + }() + +#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto &__dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT32, int32_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT64, int64_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::UINT8, uint8_t, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type INT32, " \ + "INT64, UINT8 but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ + }() + +ULTRAINFER_DECL std::vector +GetStride(const std::vector &dims); + +template std::string Str(const std::vector &shape) { + std::ostringstream oss; + oss << "[ " << shape[0]; + for (size_t i = 1; i < shape.size(); ++i) { + oss << " ," << shape[i]; + } + oss << " ]"; + return oss.str(); +} + +/// Set behaviour of logging while using UltraInfer +ULTRAINFER_DECL void SetLogger(bool enable_info = true, + bool enable_warning = true); + +template +void CalculateStatisInfo(const void *src_ptr, int size, double *mean, + double *max, double *min) { + const T *ptr = static_cast(src_ptr); + *mean = static_cast(0); + *max = static_cast(-99999999); + *min = static_cast(99999999); + for (int i = 0; i < size; ++i) { + if (*(ptr + i) > *max) { + *max = *(ptr + i); + } + if (*(ptr + i) < *min) { + *min = *(ptr + i); + } + *mean += *(ptr + i); + } + *mean = *mean / size; +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision.h b/libs/ultrainfer/ultrainfer/vision.h new file mode 100755 index 0000000000..6c9a7ed69d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "ultrainfer/core/config.h" +#ifdef ENABLE_VISION +#include "ultrainfer/vision/classification/contrib/resnet.h" +#include "ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h" +#include "ultrainfer/vision/classification/ppcls/model.h" +#include "ultrainfer/vision/classification/ppshitu/ppshituv2_det.h" +#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h" +#include "ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h" +#include "ultrainfer/vision/detection/contrib/nanodet_plus.h" +#include "ultrainfer/vision/detection/contrib/rknpu2/model.h" +#include "ultrainfer/vision/detection/contrib/scaledyolov4.h" +#include "ultrainfer/vision/detection/contrib/yolor.h" +#include "ultrainfer/vision/detection/contrib/yolov5/yolov5.h" +#include "ultrainfer/vision/detection/contrib/yolov5lite.h" +#include "ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h" +#include "ultrainfer/vision/detection/contrib/yolov6.h" +#include "ultrainfer/vision/detection/contrib/yolov7/yolov7.h" +#include "ultrainfer/vision/detection/contrib/yolov7end2end_ort.h" +#include "ultrainfer/vision/detection/contrib/yolov7end2end_trt.h" +#include "ultrainfer/vision/detection/contrib/yolov8/yolov8.h" +#include "ultrainfer/vision/detection/contrib/yolox.h" +#include "ultrainfer/vision/detection/ppdet/model.h" +#include "ultrainfer/vision/facealign/contrib/face_landmark_1000.h" +#include "ultrainfer/vision/facealign/contrib/pfld.h" +#include "ultrainfer/vision/facealign/contrib/pipnet.h" +#include "ultrainfer/vision/facedet/contrib/centerface/centerface.h" +#include "ultrainfer/vision/facedet/contrib/retinaface.h" +#include "ultrainfer/vision/facedet/contrib/scrfd.h" +#include "ultrainfer/vision/facedet/contrib/ultraface.h" +#include "ultrainfer/vision/facedet/contrib/yolov5face.h" +#include "ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h" +#include "ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h" +#include "ultrainfer/vision/faceid/contrib/adaface/adaface.h" +#include "ultrainfer/vision/faceid/contrib/insightface/model.h" +#include "ultrainfer/vision/generation/contrib/animegan.h" +#include "ultrainfer/vision/headpose/contrib/fsanet.h" +#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose.h" +#include "ultrainfer/vision/matting/contrib/modnet.h" +#include "ultrainfer/vision/matting/contrib/rvm.h" +#include "ultrainfer/vision/matting/ppmatting/ppmatting.h" +#include "ultrainfer/vision/ocr/ppocr/classifier.h" +#include "ultrainfer/vision/ocr/ppocr/dbcurvedetector.h" +#include "ultrainfer/vision/ocr/ppocr/dbdetector.h" +#include "ultrainfer/vision/ocr/ppocr/ppocr_v2.h" +#include "ultrainfer/vision/ocr/ppocr/ppocr_v3.h" +#include "ultrainfer/vision/ocr/ppocr/ppocr_v4.h" +#include "ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h" +#include "ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h" +#include "ultrainfer/vision/ocr/ppocr/recognizer.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_layout.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_table.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" +#include "ultrainfer/vision/ocr/ppocr/uvdocwarpper.h" +#include "ultrainfer/vision/perception/paddle3d/caddn/caddn.h" +#include "ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h" +#include "ultrainfer/vision/perception/paddle3d/petr/petr.h" +#include "ultrainfer/vision/perception/paddle3d/smoke/smoke.h" +#include "ultrainfer/vision/segmentation/ppseg/model.h" +#include "ultrainfer/vision/sr/ppsr/model.h" +#include "ultrainfer/vision/tracking/pptracking/model.h" + +#endif + +#include "ultrainfer/vision/visualize/visualize.h" diff --git a/libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc new file mode 100755 index 0000000000..413e4879c2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindYOLOv5Cls(pybind11::module &m); +void BindPaddleClas(pybind11::module &m); +void BindPPShiTuV2(pybind11::module &m); +void BindResNet(pybind11::module &m); + +void BindClassification(pybind11::module &m) { + auto classification_module = + m.def_submodule("classification", "Image classification models."); + + BindYOLOv5Cls(classification_module); + BindPaddleClas(classification_module); + BindPPShiTuV2(classification_module); + BindResNet(classification_module); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc new file mode 100755 index 0000000000..465723ae04 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc @@ -0,0 +1,135 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/contrib/resnet.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +ResNet::ResNet(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + // In constructor, the 3 steps below are necessary. + // 1. set the Backend 2. set RuntimeOption 3. call Initialize() + + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool ResNet::Initialize() { + // In this function, the 3 steps below are necessary. + // 1. assign values to the global variables 2. call InitRuntime() + + size = {224, 224}; + mean_vals = {0.485f, 0.456f, 0.406f}; + std_vals = {0.229f, 0.224f, 0.225f}; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool ResNet::Preprocess(Mat *mat, FDTensor *output) { + // In this function, the preprocess need be implemented according to the + // original Repos, + // The result of preprocess has to be saved in FDTensor variable, because the + // input of Infer() need to be std::vector. + // 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into + // FDTensor variable. + + if (mat->Height() != size[0] || mat->Width() != size[1]) { + int interp = cv::INTER_LINEAR; + Resize::Run(mat, size[1], size[0], -1, -1, interp); + } + + BGR2RGB::Run(mat); + Normalize::Run(mat, mean_vals, std_vals); + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool ResNet::Postprocess(FDTensor &infer_result, ClassifyResult *result, + int topk) { + // In this function, the postprocess need be implemented according to the + // original Repos, + // Finally the reslut of postprocess should be saved in ClassifyResult + // variable. + // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult + // variable. + + int num_classes = infer_result.shape[1]; + function::Softmax(infer_result, &infer_result); + const float *infer_result_buffer = + reinterpret_cast(infer_result.Data()); + topk = std::min(num_classes, topk); + result->label_ids = + utils::TopKIndices(infer_result_buffer, num_classes, topk); + result->scores.resize(topk); + for (int i = 0; i < topk; ++i) { + result->scores[i] = *(infer_result_buffer + result->label_ids[i]); + } + return true; +} + +bool ResNet::Predict(cv::Mat *im, ClassifyResult *result, int topk) { + // In this function, the Preprocess(), Infer(), and Postprocess() are called + // sequentially. + + Mat mat(*im); + std::vector processed_data(1); + if (!Preprocess(&mat, &(processed_data[0]))) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + processed_data[0].name = InputInfoOfRuntime(0).name; + + std::vector output_tensors; + if (!Infer(processed_data, &output_tensors)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + + if (!Postprocess(output_tensors[0], result, topk)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h new file mode 100755 index 0000000000..4fbf6c99d9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +// The namespace shoulde be +// ultrainfer::vision::classification (ultrainfer::vision::${task}) +namespace ultrainfer { +namespace vision { +/** \brief All object classification model APIs are defined inside this + * namespace + * + */ +namespace classification { +/*! @brief Torchvision ResNet series model + */ +class ULTRAINFER_DECL ResNet : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./resnet50.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + ResNet(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + virtual std::string ModelName() const { return "ResNet"; } + /** \brief Predict for the input "im", the result will be saved in "result". + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result Saving the inference result. + * \param[in] topk The length of return values, e.g., if topk==2, the result + * will include the 2 most possible class label for input image. + */ + virtual bool Predict(cv::Mat *im, ClassifyResult *result, int topk = 1); + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {224, 224} + */ + std::vector size; + /*! @brief + Mean parameters for normalize, size should be the the same as channels, + default mean_vals = {0.485f, 0.456f, 0.406f} + */ + std::vector mean_vals; + /*! @brief + Std parameters for normalize, size should be the the same as channels, default + std_vals = {0.229f, 0.224f, 0.225f} + */ + std::vector std_vals; + +private: + /*! @brief Initialize for ResNet model, assign values to the global variables + * and call InitRuntime() + */ + bool Initialize(); + /// PreProcessing for the input "mat", the result will be saved in "outputs". + bool Preprocess(Mat *mat, FDTensor *outputs); + /*! @brief PostProcessing for the input "infer_result", the result will be + * saved in "result". + */ + bool Postprocess(FDTensor &infer_result, ClassifyResult *result, + int topk = 1); +}; +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc new file mode 100755 index 0000000000..8139ba57bd --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" +// namespace should be `ultrainfer` +namespace ultrainfer { +// the name of Pybind function should be Bind${model_name} +void BindResNet(pybind11::module &m) { + // the constructor and the predict funtion are necessary + // the constructor is used to initialize the python model class. + // the necessary public functions and variables like `size`, `mean_vals` + // should also be binded. + pybind11::class_(m, "ResNet") + .def(pybind11::init()) + .def("predict", + [](vision::classification::ResNet &self, pybind11::array &data, + int topk = 1) { + auto mat = PyArrayToCvMat(data); + vision::ClassifyResult res; + self.Predict(&mat, &res, topk); + return res; + }) + .def_readwrite("size", &vision::classification::ResNet::size) + .def_readwrite("mean_vals", &vision::classification::ResNet::mean_vals) + .def_readwrite("std_vals", &vision::classification::ResNet::std_vals); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc new file mode 100755 index 0000000000..c86a7b17c1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +YOLOv5ClsPostprocessor::YOLOv5ClsPostprocessor() { topk_ = 1; } + +bool YOLOv5ClsPostprocessor::Run( + const std::vector &tensors, std::vector *results, + const std::vector>> &ims_info) { + int batch = tensors[0].shape[0]; + FDTensor infer_result = tensors[0]; + FDTensor infer_result_softmax; + function::Softmax(infer_result, &infer_result_softmax, 1); + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + (*results)[bs].Clear(); + // output (1,1000) score classnum 1000 + int num_classes = infer_result_softmax.shape[1]; + const float *infer_result_buffer = + reinterpret_cast(infer_result_softmax.Data()) + + bs * infer_result_softmax.shape[1]; + topk_ = std::min(num_classes, topk_); + (*results)[bs].label_ids = + utils::TopKIndices(infer_result_buffer, num_classes, topk_); + (*results)[bs].scores.resize(topk_); + for (int i = 0; i < topk_; ++i) { + (*results)[bs].scores[i] = + *(infer_result_buffer + (*results)[bs].label_ids[i]); + } + + if ((*results)[bs].label_ids.size() == 0) { + return true; + } + } + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h new file mode 100755 index 0000000000..1ad4d5537b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h @@ -0,0 +1,55 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace classification { +/*! @brief Postprocessor object for YOLOv5Cls serials model. + */ +class ULTRAINFER_DECL YOLOv5ClsPostprocessor { +public: + /** \brief Create a postprocessor instance for YOLOv5Cls serials model + */ + YOLOv5ClsPostprocessor(); + + /** \brief Process the result of runtime and fill to ClassifyResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of classification + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &tensors, + std::vector *results, + const std::vector>> &ims_info); + + /// Set topk, default 1 + void SetTopK(const int &topk) { topk_ = topk; } + + /// Get topk, default 1 + float GetTopK() const { return topk_; } + +protected: + int topk_; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc new file mode 100755 index 0000000000..81f028e312 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h" +#include "ultrainfer/function/concat.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() { + size_ = {224, 224}; //{h,w} +} + +bool YOLOv5ClsPreprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + // process after image load + double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), + static_cast(mat->Width())); + + // yolov5cls's preprocess steps + // 1. CenterCrop + // 2. Normalize + // CenterCrop + int crop_size = std::min(mat->Height(), mat->Width()); + CenterCrop::Run(mat, crop_size, crop_size); + Resize::Run(mat, size_[0], size_[1], -1, -1, cv::INTER_LINEAR); + // Normalize + BGR2RGB::Run(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + std::vector mean = {0.485f, 0.456f, 0.406f}; + std::vector std = {0.229f, 0.224f, 0.225f}; + NormalizeAndPermute::Run(mat, mean, std, false); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool YOLOv5ClsPreprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h new file mode 100755 index 0000000000..9eaf06c9fd --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace classification { +/*! @brief Preprocessor object for YOLOv5Cls serials model. + */ +class ULTRAINFER_DECL YOLOv5ClsPreprocessor { +public: + /** \brief Create a preprocessor instance for YOLOv5Cls serials model + */ + YOLOv5ClsPreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {224, 224} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {224, 224} + std::vector GetSize() const { return size_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + // target size, tuple of (width, height), default size = {224, 224} + std::vector size_; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc new file mode 100755 index 0000000000..bed7fee5e8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +YOLOv5Cls::YOLOv5Cls(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv5Cls::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv5Cls::Predict(const cv::Mat &im, ClassifyResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool YOLOv5Cls::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector>> ims_info; + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h new file mode 100755 index 0000000000..7a3d3b52b9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h" +#include "ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace classification { +/*! @brief YOLOv5Cls model object used when to load a YOLOv5Cls model exported + * by YOLOv5Cls. + */ +class ULTRAINFER_DECL YOLOv5Cls : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5cls.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv5Cls(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "yolov5cls"; } + + /** \brief Predict the classification result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output + * classification result will be writen to this structure \return true if the + * prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, ClassifyResult *result); + + /** \brief Predict the classification results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output classification result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of YOLOv5Cls + virtual YOLOv5ClsPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of YOLOv5Cls + virtual YOLOv5ClsPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + YOLOv5ClsPreprocessor preprocessor_; + YOLOv5ClsPostprocessor postprocessor_; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc new file mode 100755 index 0000000000..f61cfb20d7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc @@ -0,0 +1,108 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv5Cls(pybind11::module &m) { + pybind11::class_( + m, "YOLOv5ClsPreprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::classification::YOLOv5ClsPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error( + "raise Exception('Failed to preprocess the input data in " + "YOLOv5ClsPreprocessor.')"); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", + &vision::classification::YOLOv5ClsPreprocessor::GetSize, + &vision::classification::YOLOv5ClsPreprocessor::SetSize); + + pybind11::class_( + m, "YOLOv5ClsPostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::classification::YOLOv5ClsPostprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "raise Exception('Failed to postprocess the runtime result " + "in YOLOv5ClsPostprocessor.')"); + } + return results; + }) + .def("run", + [](vision::classification::YOLOv5ClsPostprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "raise Exception('Failed to postprocess the runtime result " + "in YOLOv5ClsPostprocessor.')"); + } + return results; + }) + .def_property("topk", + &vision::classification::YOLOv5ClsPostprocessor::GetTopK, + &vision::classification::YOLOv5ClsPostprocessor::SetTopK); + + pybind11::class_( + m, "YOLOv5Cls") + .def(pybind11::init()) + .def("predict", + [](vision::classification::YOLOv5Cls &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::ClassifyResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::classification::YOLOv5Cls &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly( + "preprocessor", &vision::classification::YOLOv5Cls::GetPreprocessor) + .def_property_readonly( + "postprocessor", + &vision::classification::YOLOv5Cls::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc new file mode 100755 index 0000000000..060715523c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/ppcls/model.h" + +#include "ultrainfer/utils/unique_ptr.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +PaddleClasModel::PaddleClasModel(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file) { + if (model_format == ModelFormat::PADDLE) { + valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_timvx_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ipu_backends = {Backend::PDINFER}; + valid_directml_backends = {Backend::ORT}; + } else if (model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + } else { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_directml_backends = {Backend::ORT}; + valid_horizon_backends = {Backend::HORIZONNPU}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +std::unique_ptr PaddleClasModel::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PaddleClasModel(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool PaddleClasModel::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool PaddleClasModel::Predict(cv::Mat *im, ClassifyResult *result, int topk) { + postprocessor_.SetTopk(topk); + if (!Predict(*im, result)) { + return false; + } + return true; +} + +bool PaddleClasModel::Predict(const cv::Mat &im, ClassifyResult *result) { + FDMat mat = WrapMat(im); + return Predict(mat, result); +} + +bool PaddleClasModel::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector mats = WrapMat(images); + return BatchPredict(mats, results); +} + +bool PaddleClasModel::Predict(const FDMat &mat, ClassifyResult *result) { + std::vector results; + std::vector mats = {mat}; + if (!BatchPredict(mats, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool PaddleClasModel::BatchPredict(const std::vector &mats, + std::vector *results) { + std::vector fd_mats = mats; + if (!preprocessor_.Run(&fd_mats, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h new file mode 100755 index 0000000000..12e9574d02 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h @@ -0,0 +1,128 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/classification/ppcls/postprocessor.h" +#include "ultrainfer/vision/classification/ppcls/preprocessor.h" + +namespace ultrainfer { +namespace vision { +/** \brief All classification model APIs are defined inside this namespace + * + */ +namespace classification { +/*! @brief PaddleClas serials model object used when to load a PaddleClas model + * exported by PaddleClas repository + */ +class ULTRAINFER_DECL PaddleClasModel : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g resnet/model.pdmodel + * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g + * resnet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference, + * the default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + PaddleClasModel(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new PaddleClasModel with less memory usage when multiple + * instances of the same model are created + * + * \return new PaddleClasModel* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + virtual std::string ModelName() const { return "PaddleClas/Model"; } + + /** \brief DEPRECATED Predict the classification result for an input image, + * remove at 1.0 version + * + * \param[in] im The input image data, comes from cv::imread() + * \param[in] result The output classification result will be writen to this + * structure \return true if the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, ClassifyResult *result, int topk = 1); + + /** \brief Predict the classification result for an input image + * + * \param[in] img The input image data, comes from cv::imread() + * \param[in] result The output classification result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, ClassifyResult *result); + + /** \brief Predict the classification results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output classification result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /** \brief Predict the classification result for an input image + * + * \param[in] mat The input mat + * \param[in] result The output classification result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const FDMat &mat, ClassifyResult *result); + + /** \brief Predict the classification results for a batch of input images + * + * \param[in] mats, The input mat list + * \param[in] results The output classification result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &mats, + std::vector *results); + + /// Get preprocessor reference of PaddleClasModel + virtual PaddleClasPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of PaddleClasModel + virtual PaddleClasPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + PaddleClasPreprocessor preprocessor_; + PaddleClasPostprocessor postprocessor_; +}; + +typedef PaddleClasModel PPLCNet; +typedef PaddleClasModel PPLCNetv2; +typedef PaddleClasModel EfficientNet; +typedef PaddleClasModel GhostNet; +typedef PaddleClasModel MobileNetv1; +typedef PaddleClasModel MobileNetv2; +typedef PaddleClasModel MobileNetv3; +typedef PaddleClasModel ShuffleNetv2; +typedef PaddleClasModel SqueezeNet; +typedef PaddleClasModel Inceptionv3; +typedef PaddleClasModel PPHGNet; +typedef PaddleClasModel ResNet50vd; +typedef PaddleClasModel SwinTransformer; +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc new file mode 100755 index 0000000000..03f5222453 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/ppcls/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +PaddleClasPostprocessor::PaddleClasPostprocessor(int topk) { + topk_ = topk; + initialized_ = true; +} + +bool PaddleClasPostprocessor::Run(const std::vector &infer_result, + std::vector *results) { + if (!initialized_) { + FDERROR << "Postprocessor is not initialized." << std::endl; + return false; + } + + int batch = infer_result[0].shape[0]; + int num_classes = infer_result[0].shape[1]; + const float *infer_result_data = + reinterpret_cast(infer_result[0].Data()); + + results->resize(batch); + + int topk = std::min(num_classes, topk_); + for (int i = 0; i < batch; ++i) { + (*results)[i].label_ids = utils::TopKIndices( + infer_result_data + i * num_classes, num_classes, topk); + (*results)[i].scores.resize(topk); + for (int j = 0; j < topk; ++j) { + (*results)[i].scores[j] = + infer_result_data[i * num_classes + (*results)[i].label_ids[j]]; + } + } + + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h new file mode 100755 index 0000000000..fafcef58bc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h @@ -0,0 +1,56 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace classification { +/*! @brief Postprocessor object for PaddleClas serials model. + */ +class ULTRAINFER_DECL PaddleClasPostprocessor { +public: + /** \brief Create a postprocessor instance for PaddleClas serials model + * + * \param[in] topk The topk result filtered by the classify confidence score, + * default 1 + */ + explicit PaddleClasPostprocessor(int topk = 1); + + /** \brief Process the result of runtime and fill to ClassifyResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of classification + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *result); + + /// Set topk value + void SetTopk(int topk) { topk_ = topk; } + + /// Get topk value + int GetTopk() const { return topk_; } + +private: + int topk_ = 1; + bool initialized_ = false; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc new file mode 100755 index 0000000000..b5826e8084 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPaddleClas(pybind11::module &m) { + pybind11::class_(m, "PaddleClasPreprocessor") + .def(pybind11::init()) + .def("disable_normalize", + [](vision::classification::PaddleClasPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", + [](vision::classification::PaddleClasPreprocessor &self) { + self.DisablePermute(); + }) + .def("initial_resize_on_cpu", + [](vision::classification::PaddleClasPreprocessor &self, bool v) { + self.InitialResizeOnCpu(v); + }); + + pybind11::class_( + m, "PaddleClasPostprocessor") + .def(pybind11::init()) + .def("run", + [](vision::classification::PaddleClasPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PaddleClasPostprocessor."); + } + return results; + }) + .def("run", + [](vision::classification::PaddleClasPostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PaddleClasPostprocessor."); + } + return results; + }) + .def_property("topk", + &vision::classification::PaddleClasPostprocessor::GetTopk, + &vision::classification::PaddleClasPostprocessor::SetTopk); + + pybind11::class_( + m, "PaddleClasModel") + .def(pybind11::init()) + .def("clone", + [](vision::classification::PaddleClasModel &self) { + return self.Clone(); + }) + .def("predict", + [](vision::classification::PaddleClasModel &self, + pybind11::array &data) { + cv::Mat im = PyArrayToCvMat(data); + vision::ClassifyResult result; + self.Predict(im, &result); + return result; + }) + .def("batch_predict", + [](vision::classification::PaddleClasModel &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly( + "preprocessor", + &vision::classification::PaddleClasModel::GetPreprocessor) + .def_property_readonly( + "postprocessor", + &vision::classification::PaddleClasModel::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc new file mode 100755 index 0000000000..eeece4461f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc @@ -0,0 +1,156 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/ppcls/preprocessor.h" + +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +PaddleClasPreprocessor::PaddleClasPreprocessor(const std::string &config_file) { + this->config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), + "Failed to create PaddleClasPreprocessor."); + initialized_ = true; +} + +bool PaddleClasPreprocessor::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + auto preprocess_cfg = cfg["PreProcess"]["transform_ops"]; + processors_.push_back(std::make_shared()); + for (const auto &op : preprocess_cfg) { + FDASSERT(op.IsMap(), + "Require the transform information in yaml be Map type."); + auto op_name = op.begin()->first.as(); + if (op_name == "ResizeImage") { + if (op.begin()->second["resize_short"]) { + int target_size = op.begin()->second["resize_short"].as(); + bool use_scale = false; + int interp = 1; + processors_.push_back( + std::make_shared(target_size, 1, use_scale)); + } else if (op.begin()->second["size"]) { + int width = 0; + int height = 0; + if (op.begin()->second["size"].IsScalar()) { + auto size = op.begin()->second["size"].as(); + width = size; + height = size; + } else { + auto size = op.begin()->second["size"].as>(); + width = size[0]; + height = size[1]; + } + processors_.push_back( + std::make_shared(width, height, -1.0, -1.0, 1, false)); + } else { + FDERROR << "Invalid params for ResizeImage for both 'size' and " + "'resize_short' are None" + << std::endl; + } + + } else if (op_name == "CropImage") { + int width = op.begin()->second["size"].as(); + int height = op.begin()->second["size"].as(); + processors_.push_back(std::make_shared(width, height)); + } else if (op_name == "NormalizeImage") { + if (!disable_normalize_) { + auto mean = op.begin()->second["mean"].as>(); + auto std = op.begin()->second["std"].as>(); + const auto &scale_origin = op.begin()->second["scale"]; + float scale; + if (scale_origin.as() == "1/255") { + scale = 1.0f / 255.0f; + } else { + scale = scale_origin.as(); + } + processors_.push_back(std::make_shared( + mean, std, true, std::vector(mean.size(), 0.0f), + std::vector(mean.size(), 1.0f / scale))); + } + } else if (op_name == "ToCHWImage") { + if (!disable_permute_) { + processors_.push_back(std::make_shared()); + } + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + + // Fusion will improve performance + FuseTransforms(&processors_); + return true; +} + +void PaddleClasPreprocessor::DisableNormalize() { + this->disable_normalize_ = true; + // the DisableNormalize function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} +void PaddleClasPreprocessor::DisablePermute() { + this->disable_permute_ = true; + // the DisablePermute function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} + +bool PaddleClasPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + for (size_t j = 0; j < processors_.size(); ++j) { + image_batch->proc_lib = proc_lib_; + if (initial_resize_on_cpu_ && j == 0 && + processors_[j]->Name().find("Resize") == 0) { + image_batch->proc_lib = ProcLib::OPENCV; + } + if (!(*(processors_[j].get()))(image_batch)) { + FDERROR << "Failed to processs image in " << processors_[j]->Name() << "." + << std::endl; + return false; + } + } + + outputs->resize(1); + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h new file mode 100755 index 0000000000..97831cd44d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace classification { +/*! @brief Preprocessor object for PaddleClas serials model. + */ +class ULTRAINFER_DECL PaddleClasPreprocessor : public ProcessorManager { +public: + /** \brief Create a preprocessor instance for PaddleClas serials model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * resnet/infer_cfg.yml + */ + explicit PaddleClasPreprocessor(const std::string &config_file); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// This function will disable normalize in preprocessing step. + void DisableNormalize(); + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute(); + + /** \brief When the initial operator is Resize, and input image size is large, + * maybe it's better to run resize on CPU, because the HostToDevice memcpy + * is time consuming. Set this true to run the initial resize on CPU. + * + * \param[in] v ture or false + */ + void InitialResizeOnCpu(bool v) { initial_resize_on_cpu_ = v; } + +private: + bool BuildPreprocessPipelineFromConfig(); + bool initialized_ = false; + std::vector> processors_; + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + // read config file + std::string config_file_; + bool initial_resize_on_cpu_ = false; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc new file mode 100755 index 0000000000..424921e290 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPShiTuV2(pybind11::module &m) { + pybind11::class_(m, + "PPShiTuV2RecognizerPreprocessor") + .def(pybind11::init()) + .def("disable_normalize", + [](vision::classification::PPShiTuV2RecognizerPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", + [](vision::classification::PPShiTuV2RecognizerPreprocessor &self) { + self.DisablePermute(); + }) + .def("initial_resize_on_cpu", + [](vision::classification::PPShiTuV2RecognizerPreprocessor &self, + bool v) { self.InitialResizeOnCpu(v); }); + + pybind11::class_( + m, "PPShiTuV2RecognizerPostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::classification::PPShiTuV2RecognizerPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PPShiTuV2RecognizerPostprocessor."); + } + return results; + }) + .def("run", + [](vision::classification::PPShiTuV2RecognizerPostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PPShiTuV2RecognizerPostprocessor."); + } + return results; + }) + .def_property("feature_norm", + &vision::classification::PPShiTuV2RecognizerPostprocessor:: + GetFeatureNorm, + &vision::classification::PPShiTuV2RecognizerPostprocessor:: + SetFeatureNorm); + + pybind11::class_(m, "PPShiTuV2Recognizer") + .def(pybind11::init()) + .def("clone", + [](vision::classification::PPShiTuV2Recognizer &self) { + return self.Clone(); + }) + .def("predict", + [](vision::classification::PPShiTuV2Recognizer &self, + pybind11::array &data) { + cv::Mat im = PyArrayToCvMat(data); + vision::ClassifyResult result; + self.Predict(im, &result); + return result; + }) + .def("batch_predict", + [](vision::classification::PPShiTuV2Recognizer &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly( + "preprocessor", + &vision::classification::PPShiTuV2Recognizer::GetPreprocessor) + .def_property_readonly( + "postprocessor", + &vision::classification::PPShiTuV2Recognizer::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h new file mode 100755 index 0000000000..fa3ba4bb42 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h @@ -0,0 +1,25 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/vision/detection/ppdet/model.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +typedef detection::PicoDet PPShiTuV2Detector; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc new file mode 100755 index 0000000000..8ed2e3b882 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h" + +#include "ultrainfer/utils/unique_ptr.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +PPShiTuV2Recognizer::PPShiTuV2Recognizer(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file) { + if (model_format == ModelFormat::PADDLE) { + valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_timvx_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ipu_backends = {Backend::PDINFER}; + valid_directml_backends = {Backend::ORT}; + } else if (model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + } else { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_directml_backends = {Backend::ORT}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +std::unique_ptr PPShiTuV2Recognizer::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PPShiTuV2Recognizer(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool PPShiTuV2Recognizer::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool PPShiTuV2Recognizer::Predict(cv::Mat *im, ClassifyResult *result) { + if (!Predict(*im, result)) { + return false; + } + return true; +} + +bool PPShiTuV2Recognizer::Predict(const cv::Mat &im, ClassifyResult *result) { + FDMat mat = WrapMat(im); + return Predict(mat, result); +} + +bool PPShiTuV2Recognizer::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector mats = WrapMat(images); + return BatchPredict(mats, results); +} + +bool PPShiTuV2Recognizer::Predict(const FDMat &mat, ClassifyResult *result) { + std::vector results; + std::vector mats = {mat}; + if (!BatchPredict(mats, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool PPShiTuV2Recognizer::BatchPredict(const std::vector &mats, + std::vector *results) { + std::vector fd_mats = mats; + if (!preprocessor_.Run(&fd_mats, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h new file mode 100755 index 0000000000..77190d22fc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h @@ -0,0 +1,117 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h" +#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace classification { +/*! @brief PPShiTuV2Recognizer model object used when to load a + * PPShiTuV2Recognizer model exported by PP-ShiTuV2 Rec model. + */ +class ULTRAINFER_DECL PPShiTuV2Recognizer : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g PPLCNet/inference.pdmodel + * \param[in] params_file Path of parameter file, e.g + * PPLCNet/inference.pdiparams, if the model format is ONNX, this parameter + * will be ignored \param[in] config_file Path of configuration file for + * deployment, e.g PPLCNet/inference_cls.yml \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends` \param[in] model_format Model + * format of the loaded model, default is Paddle format + */ + PPShiTuV2Recognizer(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new PPShiTuV2Recognizer with less memory usage when + * multiple instances of the same model are created + * + * \return new PPShiTuV2Recognizer* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + virtual std::string ModelName() const { return "PPShiTuV2Recognizer"; } + + /** \brief DEPRECATED Predict the feature vector result for an input image, + * remove at 1.0 version + * + * \param[in] im The input image data, comes from cv::imread() + * \param[in] result The output feature vector result will be writen to this + * structure \return true if the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, ClassifyResult *result); + + /** \brief Predict the classification result for an input image + * + * \param[in] img The input image data, comes from cv::imread() + * \param[in] result The output feature vector result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, ClassifyResult *result); + + /** \brief Predict the feature vector results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output feature vector(namely ClassifyResult.feature) + * result list \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /** \brief Predict the feature vector result for an input image + * + * \param[in] mat The input mat + * \param[in] result The output feature vector result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const FDMat &mat, ClassifyResult *result); + + /** \brief Predict the feature vector results for a batch of input images + * + * \param[in] mats, The input mat list + * \param[in] results The output feature vector result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &mats, + std::vector *results); + + /// Get preprocessor reference of PPShiTuV2Recognizer + virtual PPShiTuV2RecognizerPreprocessor &GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of PPShiTuV2Recognizer + virtual PPShiTuV2RecognizerPostprocessor &GetPostprocessor() { + return postprocessor_; + } + +protected: + bool Initialize(); + PPShiTuV2RecognizerPreprocessor preprocessor_; + PPShiTuV2RecognizerPostprocessor postprocessor_; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc new file mode 100755 index 0000000000..fa36ee5815 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" +#include +#include + +namespace ultrainfer { +namespace vision { +namespace classification { + +bool PPShiTuV2RecognizerPostprocessor::Run( + const std::vector &tensors, + std::vector *results) { + int batch = tensors[0].shape[0]; // e.g [batch, 512] + int num_feature = tensors[0].shape[1]; + const float *tensor_data = reinterpret_cast(tensors[0].Data()); + + results->resize(batch); + + // post processing per batch=1 + for (int i = 0; i < batch; ++i) { + (*results)[i].feature.resize(num_feature); + const float *tensor_data_i_start = tensor_data + i * num_feature; + std::memcpy((*results)[i].feature.data(), tensor_data_i_start, + num_feature * sizeof(float)); + if (feature_norm_) { + FeatureNorm((*results)[i].feature); + } + } + + return true; +} + +void PPShiTuV2RecognizerPostprocessor::FeatureNorm( + std::vector &feature) { + float feature_sqrt = std::sqrt(std::inner_product( + feature.begin(), feature.end(), feature.begin(), 0.0f)); + for (int i = 0; i < feature.size(); ++i) { + feature[i] /= feature_sqrt; + } +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h new file mode 100755 index 0000000000..57e50d69a1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h @@ -0,0 +1,50 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace classification { +/*! @brief Postprocessor object for PP-ShiTuV2 Recognizer model. + */ +class ULTRAINFER_DECL PPShiTuV2RecognizerPostprocessor { +public: + PPShiTuV2RecognizerPostprocessor() = default; + + /** \brief Process the result of runtime and fill to ClassifyResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of feature vector (see + * ClassifyResult.feature member) \return true if the postprocess successed, + * otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *results); + /// Set the value of feature_norm_ for Postprocessor + void SetFeatureNorm(bool feature_norm) { feature_norm_ = feature_norm; } + /// Get the value of feature_norm_ from Postprocessor, default to true. + bool GetFeatureNorm() { return feature_norm_; } + +private: + void FeatureNorm(std::vector &feature); + bool feature_norm_ = true; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc new file mode 100755 index 0000000000..9bf42dba1d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc @@ -0,0 +1,160 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h" + +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +PPShiTuV2RecognizerPreprocessor::PPShiTuV2RecognizerPreprocessor( + const std::string &config_file) { + this->config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), + "Failed to create PPShiTuV2RecognizerPreprocessor."); + initialized_ = true; +} + +bool PPShiTuV2RecognizerPreprocessor::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + auto preprocess_cfg = cfg["PreProcess"]["transform_ops"]; + // Outdated: + // We use the key 'RecPreProcess' to denote the preprocess + // operators for PP-ShiTuV2 recognizer. + // auto preprocess_cfg = cfg["RecPreProcess"]["transform_ops"]; + processors_.push_back(std::make_shared()); + for (const auto &op : preprocess_cfg) { + FDASSERT(op.IsMap(), + "Require the transform information in yaml be Map type."); + auto op_name = op.begin()->first.as(); + if (op_name == "ResizeImage") { + if (op.begin()->second["resize_short"]) { + int target_size = op.begin()->second["resize_short"].as(); + bool use_scale = false; + int interp = 1; + processors_.push_back( + std::make_shared(target_size, 1, use_scale)); + } else if (op.begin()->second["size"]) { + int width = 0; + int height = 0; + if (op.begin()->second["size"].IsScalar()) { + auto size = op.begin()->second["size"].as(); + width = size; + height = size; + } else { + auto size = op.begin()->second["size"].as>(); + width = size[0]; + height = size[1]; + } + processors_.push_back( + std::make_shared(width, height, -1.0, -1.0, 1, false)); + } else { + FDERROR << "Invalid params for ResizeImage for both 'size' and " + "'resize_short' are None" + << std::endl; + } + + } else if (op_name == "CropImage") { + int width = op.begin()->second["size"].as(); + int height = op.begin()->second["size"].as(); + processors_.push_back(std::make_shared(width, height)); + } else if (op_name == "NormalizeImage") { + if (!disable_normalize_) { + auto mean = op.begin()->second["mean"].as>(); + auto std = op.begin()->second["std"].as>(); + const auto &scale_origin = op.begin()->second["scale"]; + float scale; + if (scale_origin.as() == "1/255") { + scale = 1.0f / 255.0f; + } else { + scale = scale_origin.as(); + } + processors_.push_back(std::make_shared( + mean, std, true, std::vector(mean.size(), 0.0f), + std::vector(mean.size(), 1.0f / scale))); + } + } else if (op_name == "ToCHWImage") { + if (!disable_permute_) { + processors_.push_back(std::make_shared()); + } + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + + // Fusion will improve performance + FuseTransforms(&processors_); + return true; +} + +void PPShiTuV2RecognizerPreprocessor::DisableNormalize() { + this->disable_normalize_ = true; + // the DisableNormalize function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} +void PPShiTuV2RecognizerPreprocessor::DisablePermute() { + this->disable_permute_ = true; + // the DisablePermute function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} + +bool PPShiTuV2RecognizerPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + for (size_t j = 0; j < processors_.size(); ++j) { + image_batch->proc_lib = proc_lib_; + if (initial_resize_on_cpu_ && j == 0 && + processors_[j]->Name().find("Resize") == 0) { + image_batch->proc_lib = ProcLib::OPENCV; + } + if (!(*(processors_[j].get()))(image_batch)) { + FDERROR << "Failed to processs image in " << processors_[j]->Name() << "." + << std::endl; + return false; + } + } + + outputs->resize(1); + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h new file mode 100755 index 0000000000..5dcee60d48 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h @@ -0,0 +1,73 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace classification { + +/*! @brief Preprocessor object for PP-ShiTuV2 Recognizer model. + */ +class ULTRAINFER_DECL PPShiTuV2RecognizerPreprocessor + : public ProcessorManager { +public: + /** \brief Create a preprocessor instance for PP-ShiTuV2 Recognizer model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * PPLCNet/infer_cfg.yml + */ + explicit PPShiTuV2RecognizerPreprocessor(const std::string &config_file); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// This function will disable normalize in preprocessing step. + void DisableNormalize(); + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute(); + + /** \brief When the initial operator is Resize, and input image size is large, + * maybe it's better to run resize on CPU, because the HostToDevice memcpy + * is time consuming. Set this true to run the initial resize on CPU. + * + * \param[in] v ture or false + */ + void InitialResizeOnCpu(bool v) { initial_resize_on_cpu_ = v; } + +private: + bool BuildPreprocessPipelineFromConfig(); + bool initialized_ = false; + std::vector> processors_; + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + // read config file + std::string config_file_; + bool initial_resize_on_cpu_ = false; +}; + +} // namespace classification +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc new file mode 100755 index 0000000000..194b3ced2d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/image_decoder/image_decoder.h" + +#include "opencv2/imgcodecs.hpp" + +namespace ultrainfer { +namespace vision { + +ImageDecoder::ImageDecoder(ImageDecoderLib lib) { + if (lib == ImageDecoderLib::NVJPEG) { +#ifdef ENABLE_NVJPEG + nvjpeg::init_decoder(nvjpeg_params_); +#endif + } + lib_ = lib; +} + +ImageDecoder::~ImageDecoder() { + if (lib_ == ImageDecoderLib::NVJPEG) { +#ifdef ENABLE_NVJPEG + nvjpeg::destroy_decoder(nvjpeg_params_); +#endif + } +} + +bool ImageDecoder::Decode(const std::string &img_name, FDMat *mat) { + std::vector mats(1); + mats[0] = std::move(*mat); + if (!BatchDecode({img_name}, &mats)) { + return false; + } + *mat = std::move(mats[0]); + return true; +} + +bool ImageDecoder::BatchDecode(const std::vector &img_names, + std::vector *mats) { + if (lib_ == ImageDecoderLib::OPENCV) { + return ImplByOpenCV(img_names, mats); + } else if (lib_ == ImageDecoderLib::NVJPEG) { + return ImplByNvJpeg(img_names, mats); + } + return true; +} + +bool ImageDecoder::ImplByOpenCV(const std::vector &img_names, + std::vector *mats) { + for (size_t i = 0; i < img_names.size(); ++i) { + cv::Mat im = cv::imread(img_names[i]); + (*mats)[i].SetMat(im); + (*mats)[i].layout = Layout::HWC; + (*mats)[i].SetWidth(im.cols); + (*mats)[i].SetHeight(im.rows); + (*mats)[i].SetChannels(im.channels()); + } + return true; +} + +bool ImageDecoder::ImplByNvJpeg(const std::vector &img_names, + std::vector *mats) { +#ifdef ENABLE_NVJPEG + nvjpeg_params_.batch_size = img_names.size(); + std::vector output_imgs(nvjpeg_params_.batch_size); + std::vector widths(nvjpeg_params_.batch_size); + std::vector heights(nvjpeg_params_.batch_size); + // TODO(wangxinyu): support other output format + nvjpeg_params_.fmt = NVJPEG_OUTPUT_BGRI; + double total; + nvjpeg_params_.stream = (*mats)[0].Stream(); + + std::vector output_buffers; + for (size_t i = 0; i < mats->size(); ++i) { + FDASSERT((*mats)[i].output_cache != nullptr, + "The output_cache of FDMat was not set."); + output_buffers.push_back((*mats)[i].output_cache); + } + + if (nvjpeg::process_images(img_names, nvjpeg_params_, total, output_imgs, + output_buffers, widths, heights)) { + // If nvJPEG decode failed, will fallback to OpenCV, + // e.g. png format is not supported by nvJPEG + FDWARNING << "nvJPEG decode failed, falling back to OpenCV for this batch" + << std::endl; + return ImplByOpenCV(img_names, mats); + } + + for (size_t i = 0; i < mats->size(); ++i) { + (*mats)[i].mat_type = ProcLib::CUDA; + (*mats)[i].layout = Layout::HWC; + (*mats)[i].SetTensor(output_buffers[i]); + } +#else + FDASSERT(false, "UltraInfer didn't compile with NVJPEG."); +#endif + return true; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h new file mode 100755 index 0000000000..dbd2342900 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h" +#include "ultrainfer/vision/common/processors/mat.h" + +namespace ultrainfer { +namespace vision { + +enum class ULTRAINFER_DECL ImageDecoderLib { OPENCV, NVJPEG }; + +class ULTRAINFER_DECL ImageDecoder { +public: + explicit ImageDecoder(ImageDecoderLib lib = ImageDecoderLib::OPENCV); + + ~ImageDecoder(); + + bool Decode(const std::string &img_name, FDMat *mat); + + bool BatchDecode(const std::vector &img_names, + std::vector *mats); + +private: + bool ImplByOpenCV(const std::vector &img_names, + std::vector *mats); + bool ImplByNvJpeg(const std::vector &img_names, + std::vector *mats); + ImageDecoderLib lib_ = ImageDecoderLib::OPENCV; +#ifdef ENABLE_NVJPEG + nvjpeg::decode_params_t nvjpeg_params_; +#endif +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc new file mode 100755 index 0000000000..9181758e75 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc @@ -0,0 +1,364 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Part of the following code in this file refs to +// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.cpp +// +// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Licensed under the Apache-2.0 license +// \brief +// \author NVIDIA + +#ifdef ENABLE_NVJPEG +#include "ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h" + +namespace ultrainfer { +namespace vision { +namespace nvjpeg { + +#define CHECK_CUDA(call) \ + { \ + cudaError_t _e = (call); \ + if (_e != cudaSuccess) { \ + std::cout << "CUDA Runtime failure: '#" << _e << "' at " << __FILE__ \ + << ":" << __LINE__ << std::endl; \ + exit(1); \ + } \ + } + +#define CHECK_NVJPEG(call) \ + { \ + nvjpegStatus_t _e = (call); \ + if (_e != NVJPEG_STATUS_SUCCESS) { \ + std::cout << "NVJPEG failure: '#" << _e << "' at " << __FILE__ << ":" \ + << __LINE__ << std::endl; \ + exit(1); \ + } \ + } + +static int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); } + +static int dev_free(void *p) { return (int)cudaFree(p); } + +static int host_malloc(void **p, size_t s, unsigned int f) { + return (int)cudaHostAlloc(p, s, f); +} + +static int host_free(void *p) { return (int)cudaFreeHost(p); } + +static int read_images(const FileNames &image_names, FileData &raw_data, + std::vector &raw_len) { + for (size_t i = 0; i < image_names.size(); ++i) { + if (image_names.size() == 0) { + std::cerr << "No valid images left in the input list, exit" << std::endl; + return EXIT_FAILURE; + } + + // Read an image from disk. + std::ifstream input(image_names[i].c_str(), + std::ios::in | std::ios::binary | std::ios::ate); + if (!(input.is_open())) { + std::cerr << "Cannot open image: " << image_names[i] << std::endl; + FDASSERT(false, "Read file error."); + continue; + } + + // Get the size + long unsigned int file_size = input.tellg(); + input.seekg(0, std::ios::beg); + // resize if buffer is too small + if (raw_data[i].size() < file_size) { + raw_data[i].resize(file_size); + } + if (!input.read(raw_data[i].data(), file_size)) { + std::cerr << "Cannot read from file: " << image_names[i] << std::endl; + // image_names.erase(cur_iter); + FDASSERT(false, "Read file error."); + continue; + } + raw_len[i] = file_size; + } + return EXIT_SUCCESS; +} + +// prepare buffers for RGBi output format +static int prepare_buffers(FileData &file_data, std::vector &file_len, + std::vector &img_width, + std::vector &img_height, + std::vector &ibuf, + std::vector &isz, + std::vector &output_buffers, + const FileNames ¤t_names, + decode_params_t ¶ms) { + int widths[NVJPEG_MAX_COMPONENT]; + int heights[NVJPEG_MAX_COMPONENT]; + int channels; + nvjpegChromaSubsampling_t subsampling; + + for (long unsigned int i = 0; i < file_data.size(); i++) { + nvjpegStatus_t status = nvjpegGetImageInfo( + params.nvjpeg_handle, (unsigned char *)file_data[i].data(), file_len[i], + &channels, &subsampling, widths, heights); + if (status != NVJPEG_STATUS_SUCCESS) { + std::cout << "NVJPEG failure: #" << status << " in nvjpegGetImageInfo." + << std::endl; + return EXIT_FAILURE; + } + + img_width[i] = widths[0]; + img_height[i] = heights[0]; + + int mul = 1; + // in the case of interleaved RGB output, write only to single channel, but + // 3 samples at once + if (params.fmt == NVJPEG_OUTPUT_RGBI || params.fmt == NVJPEG_OUTPUT_BGRI) { + channels = 1; + mul = 3; + } else if (params.fmt == NVJPEG_OUTPUT_RGB || + params.fmt == NVJPEG_OUTPUT_BGR) { + // in the case of rgb create 3 buffers with sizes of original image + channels = 3; + widths[1] = widths[2] = widths[0]; + heights[1] = heights[2] = heights[0]; + } else { + FDASSERT(false, "Unsupport NVJPEG output format: %d", params.fmt); + } + + output_buffers[i]->Resize({heights[0], widths[0], mul * channels}, + FDDataType::UINT8, "output_cache", Device::GPU); + + uint8_t *cur_buffer = + reinterpret_cast(output_buffers[i]->Data()); + + // realloc output buffer if required + for (int c = 0; c < channels; c++) { + int aw = mul * widths[c]; + int ah = heights[c]; + size_t sz = aw * ah; + ibuf[i].pitch[c] = aw; + if (sz > isz[i].pitch[c]) { + ibuf[i].channel[c] = cur_buffer; + cur_buffer = cur_buffer + sz; + isz[i].pitch[c] = sz; + } + } + } + return EXIT_SUCCESS; +} + +static void create_decoupled_api_handles(decode_params_t ¶ms) { + CHECK_NVJPEG(nvjpegDecoderCreate(params.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, + ¶ms.nvjpeg_decoder)); + CHECK_NVJPEG(nvjpegDecoderStateCreate(params.nvjpeg_handle, + params.nvjpeg_decoder, + ¶ms.nvjpeg_decoupled_state)); + + CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL, + ¶ms.pinned_buffers[0])); + CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL, + ¶ms.pinned_buffers[1])); + CHECK_NVJPEG(nvjpegBufferDeviceCreate(params.nvjpeg_handle, NULL, + ¶ms.device_buffer)); + + CHECK_NVJPEG( + nvjpegJpegStreamCreate(params.nvjpeg_handle, ¶ms.jpeg_streams[0])); + CHECK_NVJPEG( + nvjpegJpegStreamCreate(params.nvjpeg_handle, ¶ms.jpeg_streams[1])); + + CHECK_NVJPEG(nvjpegDecodeParamsCreate(params.nvjpeg_handle, + ¶ms.nvjpeg_decode_params)); +} + +static void destroy_decoupled_api_handles(decode_params_t ¶ms) { + CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params.nvjpeg_decode_params)); + CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[0])); + CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[1])); + CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[0])); + CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[1])); + CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params.device_buffer)); + CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_decoupled_state)); + CHECK_NVJPEG(nvjpegDecoderDestroy(params.nvjpeg_decoder)); +} + +int decode_images(const FileData &img_data, const std::vector &img_len, + std::vector &out, decode_params_t ¶ms, + double &time) { + CHECK_CUDA(cudaStreamSynchronize(params.stream)); + + std::vector batched_bitstreams; + std::vector batched_bitstreams_size; + std::vector batched_output; + + // bit-streams that batched decode cannot handle + std::vector otherdecode_bitstreams; + std::vector otherdecode_bitstreams_size; + std::vector otherdecode_output; + + if (params.hw_decode_available) { + for (int i = 0; i < params.batch_size; i++) { + // extract bitstream meta data to figure out whether a bit-stream can be + // decoded + nvjpegJpegStreamParseHeader(params.nvjpeg_handle, + (const unsigned char *)img_data[i].data(), + img_len[i], params.jpeg_streams[0]); + int isSupported = -1; + nvjpegDecodeBatchedSupported(params.nvjpeg_handle, params.jpeg_streams[0], + &isSupported); + + if (isSupported == 0) { + batched_bitstreams.push_back((const unsigned char *)img_data[i].data()); + batched_bitstreams_size.push_back(img_len[i]); + batched_output.push_back(out[i]); + } else { + otherdecode_bitstreams.push_back( + (const unsigned char *)img_data[i].data()); + otherdecode_bitstreams_size.push_back(img_len[i]); + otherdecode_output.push_back(out[i]); + } + } + } else { + for (int i = 0; i < params.batch_size; i++) { + otherdecode_bitstreams.push_back( + (const unsigned char *)img_data[i].data()); + otherdecode_bitstreams_size.push_back(img_len[i]); + otherdecode_output.push_back(out[i]); + } + } + + if (batched_bitstreams.size() > 0) { + CHECK_NVJPEG(nvjpegDecodeBatchedInitialize( + params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.size(), 1, + params.fmt)); + + CHECK_NVJPEG(nvjpegDecodeBatched( + params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.data(), + batched_bitstreams_size.data(), batched_output.data(), params.stream)); + } + + if (otherdecode_bitstreams.size() > 0) { + CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state, + params.device_buffer)); + int buffer_index = 0; + CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, + params.fmt)); + for (int i = 0; i < params.batch_size; i++) { + CHECK_NVJPEG(nvjpegJpegStreamParse(params.nvjpeg_handle, + otherdecode_bitstreams[i], + otherdecode_bitstreams_size[i], 0, 0, + params.jpeg_streams[buffer_index])); + + CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer( + params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index])); + + CHECK_NVJPEG(nvjpegDecodeJpegHost( + params.nvjpeg_handle, params.nvjpeg_decoder, + params.nvjpeg_decoupled_state, params.nvjpeg_decode_params, + params.jpeg_streams[buffer_index])); + + CHECK_CUDA(cudaStreamSynchronize(params.stream)); + + CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice( + params.nvjpeg_handle, params.nvjpeg_decoder, + params.nvjpeg_decoupled_state, params.jpeg_streams[buffer_index], + params.stream)); + + buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode + // to avoid an extra sync + + CHECK_NVJPEG( + nvjpegDecodeJpegDevice(params.nvjpeg_handle, params.nvjpeg_decoder, + params.nvjpeg_decoupled_state, + &otherdecode_output[i], params.stream)); + } + } + return EXIT_SUCCESS; +} + +double process_images(const FileNames &image_names, decode_params_t ¶ms, + double &total, std::vector &iout, + std::vector &output_buffers, + std::vector &widths, std::vector &heights) { + FDASSERT(image_names.size() == params.batch_size, + "Number of images and batch size must be equal."); + // vector for storing raw files and file lengths + FileData file_data(params.batch_size); + std::vector file_len(params.batch_size); + FileNames current_names(params.batch_size); + // we wrap over image files to process total_images of files + auto file_iter = image_names.begin(); + + // output buffer sizes, for convenience + std::vector isz(params.batch_size); + + for (long unsigned int i = 0; i < iout.size(); i++) { + for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) { + iout[i].channel[c] = NULL; + iout[i].pitch[c] = 0; + isz[i].pitch[c] = 0; + } + } + + if (read_images(image_names, file_data, file_len)) { + return EXIT_FAILURE; + } + + if (prepare_buffers(file_data, file_len, widths, heights, iout, isz, + output_buffers, image_names, params)) { + return EXIT_FAILURE; + } + + double time; + if (decode_images(file_data, file_len, iout, params, time)) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +void init_decoder(decode_params_t ¶ms) { + params.hw_decode_available = true; + nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free}; + nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free}; + nvjpegStatus_t status = + nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator, + NVJPEG_FLAGS_DEFAULT, ¶ms.nvjpeg_handle); + if (status == NVJPEG_STATUS_ARCH_MISMATCH) { + std::cout << "Hardware Decoder not supported. " + "Falling back to default backend" + << std::endl; + CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator, + &pinned_allocator, NVJPEG_FLAGS_DEFAULT, + ¶ms.nvjpeg_handle)); + params.hw_decode_available = false; + } else { + CHECK_NVJPEG(status); + } + + CHECK_NVJPEG( + nvjpegJpegStateCreate(params.nvjpeg_handle, ¶ms.nvjpeg_state)); + + create_decoupled_api_handles(params); +} + +void destroy_decoder(decode_params_t ¶ms) { + destroy_decoupled_api_handles(params); + CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_state)); + CHECK_NVJPEG(nvjpegDestroy(params.nvjpeg_handle)); +} + +} // namespace nvjpeg +} // namespace vision +} // namespace ultrainfer + +#endif // ENABLE_NVJPEG diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h new file mode 100755 index 0000000000..14080f611a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Part of the following code in this file refs to +// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.h +// +// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Licensed under the Apache-2.0 license +// \brief +// \author NVIDIA + +#pragma once + +#ifdef ENABLE_NVJPEG +#include "ultrainfer/core/fd_tensor.h" + +#include +#include + +namespace ultrainfer { +namespace vision { +namespace nvjpeg { + +typedef std::vector FileNames; +typedef std::vector> FileData; + +struct decode_params_t { + int batch_size; + nvjpegJpegState_t nvjpeg_state; + nvjpegHandle_t nvjpeg_handle; + cudaStream_t stream; + + // used with decoupled API + nvjpegJpegState_t nvjpeg_decoupled_state; + nvjpegBufferPinned_t pinned_buffers[2]; // 2 buffers for pipelining + nvjpegBufferDevice_t device_buffer; + nvjpegJpegStream_t jpeg_streams[2]; // 2 streams for pipelining + nvjpegDecodeParams_t nvjpeg_decode_params; + nvjpegJpegDecoder_t nvjpeg_decoder; + + nvjpegOutputFormat_t fmt; + bool hw_decode_available; +}; + +void init_decoder(decode_params_t ¶ms); +void destroy_decoder(decode_params_t ¶ms); + +double process_images(const FileNames &image_names, decode_params_t ¶ms, + double &total, std::vector &iout, + std::vector &output_buffers, + std::vector &widths, std::vector &heights); + +} // namespace nvjpeg +} // namespace vision +} // namespace ultrainfer + +#endif // ENABLE_NVJPEG diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/base.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/base.cc new file mode 100755 index 0000000000..4279d3cf8c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/base.cc @@ -0,0 +1,177 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/base.h" + +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/processors/proc_lib.h" + +namespace ultrainfer { +namespace vision { + +bool Processor::ImplByOpenCV(FDMat *mat) { + FDERROR << Name() << " Not Implement Yet." << std::endl; + return false; +} + +bool Processor::ImplByOpenCV(FDMatBatch *mat_batch) { + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + if (ImplByOpenCV(&(*(mat_batch->mats))[i]) != true) { + return false; + } + } + return true; +} + +bool Processor::ImplByFlyCV(FDMat *mat) { return ImplByOpenCV(mat); } + +bool Processor::ImplByFlyCV(FDMatBatch *mat_batch) { + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + if (ImplByFlyCV(&(*(mat_batch->mats))[i]) != true) { + return false; + } + } + return true; +} + +bool Processor::ImplByCuda(FDMat *mat) { + FDWARNING << Name() + << " is not implemented with CUDA, will fallback to OpenCV." + << std::endl; + return ImplByOpenCV(mat); +} + +bool Processor::ImplByCuda(FDMatBatch *mat_batch) { + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + if (ImplByCuda(&(*(mat_batch->mats))[i]) != true) { + return false; + } + } + return true; +} + +bool Processor::ImplByCvCuda(FDMat *mat) { + FDWARNING << Name() + << " is not implemented with CV-CUDA, will fallback to OpenCV." + << std::endl; + return ImplByOpenCV(mat); +} + +bool Processor::ImplByCvCuda(FDMatBatch *mat_batch) { + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + if (ImplByCvCuda(&(*(mat_batch->mats))[i]) != true) { + return false; + } + } + return true; +} + +bool Processor::operator()(FDMat *mat) { + ProcLib target = mat->proc_lib; + if (mat->proc_lib == ProcLib::DEFAULT) { + target = DefaultProcLib::default_lib; + } + if (target == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + return ImplByFlyCV(mat); +#else + FDASSERT(false, "UltraInfer didn't compile with FlyCV."); +#endif + } else if (target == ProcLib::CUDA) { +#ifdef WITH_GPU + FDASSERT(mat->Stream() != nullptr, + "CUDA processor requires cuda stream, please set stream for Mat"); + return ImplByCuda(mat); +#else + FDASSERT(false, "UltraInfer didn't compile with WITH_GPU."); +#endif + } else if (target == ProcLib::CVCUDA) { +#ifdef ENABLE_CVCUDA + FDASSERT(mat->Stream() != nullptr, + "CV-CUDA requires cuda stream, please set stream for Mat"); + return ImplByCvCuda(mat); +#else + FDASSERT(false, "UltraInfer didn't compile with CV-CUDA."); +#endif + } + // DEFAULT & OPENCV + return ImplByOpenCV(mat); +} + +bool Processor::operator()(FDMat *mat, ProcLib lib) { + mat->proc_lib = lib; + return operator()(mat); +} + +bool Processor::operator()(FDMatBatch *mat_batch) { + ProcLib target = mat_batch->proc_lib; + if (mat_batch->proc_lib == ProcLib::DEFAULT) { + target = DefaultProcLib::default_lib; + } + if (target == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + return ImplByFlyCV(mat_batch); +#else + FDASSERT(false, "UltraInfer didn't compile with FlyCV."); +#endif + } else if (target == ProcLib::CUDA) { +#ifdef WITH_GPU + FDASSERT( + mat_batch->Stream() != nullptr, + "CUDA processor requires cuda stream, please set stream for mat_batch"); + return ImplByCuda(mat_batch); +#else + FDASSERT(false, "UltraInfer didn't compile with WITH_GPU."); +#endif + } else if (target == ProcLib::CVCUDA) { +#ifdef ENABLE_CVCUDA + FDASSERT(mat_batch->Stream() != nullptr, + "CV-CUDA processor requires cuda stream, please set stream for " + "mat_batch"); + return ImplByCvCuda(mat_batch); +#else + FDASSERT(false, "UltraInfer didn't compile with CV-CUDA."); +#endif + } + // DEFAULT & OPENCV + return ImplByOpenCV(mat_batch); +} + +void EnableFlyCV() { +#ifdef ENABLE_FLYCV + DefaultProcLib::default_lib = ProcLib::FLYCV; + FDINFO << "Will change to use image processing library " + << DefaultProcLib::default_lib << std::endl; +#else + FDWARNING << "UltraInfer didn't compile with FlyCV, " + "will fallback to use OpenCV instead." + << std::endl; +#endif +} + +void DisableFlyCV() { + DefaultProcLib::default_lib = ProcLib::OPENCV; + FDINFO << "Will change to use image processing library " + << DefaultProcLib::default_lib << std::endl; +} + +void SetProcLibCpuNumThreads(int threads) { + cv::setNumThreads(threads); +#ifdef ENABLE_FLYCV + fcv::set_thread_num(threads); +#endif +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/base.h b/libs/ultrainfer/ultrainfer/vision/common/processors/base.h new file mode 100755 index 0000000000..4a15e66733 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/base.h @@ -0,0 +1,93 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/processors/mat.h" +#include "ultrainfer/vision/common/processors/mat_batch.h" +#include + +namespace ultrainfer { +namespace vision { + +/*! @brief Enable using FlyCV to process image while deploy vision models. + * Currently, FlyCV in only available on ARM(Linux aarch64), so will + * fallback to using OpenCV in other platform + */ +ULTRAINFER_DECL void EnableFlyCV(); + +/// Disable using FlyCV to process image while deploy vision models. +ULTRAINFER_DECL void DisableFlyCV(); + +/*! @brief Set the cpu num threads of ProcLib. + */ +ULTRAINFER_DECL void SetProcLibCpuNumThreads(int threads); + +/*! @brief Processor base class for processors in + * ultrainfer/vision/common/processors + */ +class ULTRAINFER_DECL Processor { +public: + // default_lib has the highest priority + // all the function in `processor` will force to use + // default_lib if this flag is set. + // DEFAULT means this flag is not set + // static ProcLib default_lib; + + virtual std::string Name() = 0; + + virtual bool ImplByOpenCV(FDMat *mat); + virtual bool ImplByOpenCV(FDMatBatch *mat_batch); + + virtual bool ImplByFlyCV(FDMat *mat); + virtual bool ImplByFlyCV(FDMatBatch *mat_batch); + + virtual bool ImplByCuda(FDMat *mat); + virtual bool ImplByCuda(FDMatBatch *mat_batch); + + virtual bool ImplByCvCuda(FDMat *mat); + virtual bool ImplByCvCuda(FDMatBatch *mat_batch); + + /*! @brief operator `()` for calling processor in this way: `processor(mat)` + * + * \param[in] mat: The input mat + * \return true if the process successed, otherwise false + */ + virtual bool operator()(FDMat *mat); + + /*! @brief operator `()` for calling processor in this way: `processor(mat, + * lib)` This function is for backward compatibility, will be removed in the + * near future, please use operator()(FDMat* mat) instead and set proc_lib in + * mat. + * + * \param[in] mat: The input mat + * \param[in] lib: The processing library, opencv, cv-cuda, flycv, etc. + * \return true if the process successed, otherwise false + */ + virtual bool operator()(FDMat *mat, ProcLib lib); + + /*! @brief operator `()` for calling processor in this way: + * `processor(mat_batch)` + * + * \param[in] mat_batch: The input mat batch + * \return true if the process successed, otherwise false + */ + virtual bool operator()(FDMatBatch *mat_batch); +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc new file mode 100755 index 0000000000..0656fe951b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc @@ -0,0 +1,28 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" +#include + +namespace ultrainfer { +void BindProcessor(pybind11::module &m) { + pybind11::class_(m, "Processor") + .def("__call__", [](vision::Processor &self, + vision::FDMat *mat) { return self(mat); }) + .def("__call__", + [](vision::Processor &self, vision::FDMatBatch *mat_batch) { + return self(mat_batch); + }); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc new file mode 100755 index 0000000000..12047588e7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/cast.h" + +#include "ultrainfer/vision/common/processors/utils.h" + +namespace ultrainfer { +namespace vision { + +bool Cast::ImplByOpenCV(Mat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int c = im->channels(); + if (dtype_ == "float") { + if (im->type() != CV_32FC(c)) { + im->convertTo(*im, CV_32FC(c)); + } + } else if (dtype_ == "double") { + if (im->type() != CV_64FC(c)) { + im->convertTo(*im, CV_64FC(c)); + } + } else { + FDWARNING << "Cast not support for " << dtype_ + << " now! will skip this operation." << std::endl; + } + return true; +} + +#ifdef ENABLE_FLYCV +bool Cast::ImplByFlyCV(Mat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + if (dtype_ == "float" && mat->Type() == FDDataType::FP32) { + return true; + } + if (dtype_ == "double" && mat->Type() == FDDataType::FP64) { + return true; + } + if (mat->layout != Layout::HWC) { + FDERROR + << "While using FlyCV to cast image, the image must be layout of HWC." + << std::endl; + return false; + } + if (dtype_ == "float") { + fcv::Mat new_im; + auto fcv_type = CreateFlyCVDataType(FDDataType::FP32, im->channels()); + im->convert_to(new_im, fcv_type); + mat->SetMat(new_im); + } else if (dtype_ == "double") { + fcv::Mat new_im; + auto fcv_type = CreateFlyCVDataType(FDDataType::FP64, im->channels()); + im->convert_to(new_im, fcv_type); + mat->SetMat(new_im); + } else { + FDWARNING << "Cast not support for " << dtype_ + << " now! will skip this operation." << std::endl; + } + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool Cast::ImplByCvCuda(FDMat *mat) { + FDDataType dst_dtype; + if (dtype_ == "float") { + dst_dtype = FDDataType::FP32; + } else if (dtype_ == "double") { + dst_dtype = FDDataType::FP64; + } else { + FDWARNING << "Cast not support for " << dtype_ + << " now! will skip this operation." << std::endl; + return false; + } + if (mat->Type() == dst_dtype) { + return true; + } + + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + // Prepare output tensor + mat->output_cache->Resize(src->Shape(), dst_dtype, "output_cache", + Device::GPU); + auto dst_tensor = + CreateCvCudaTensorWrapData(*(mat->output_cache), mat->layout); + + cvcuda_convert_op_(mat->Stream(), *src_tensor, *dst_tensor, 1.0f, 0.0f); + + mat->SetTensor(mat->output_cache); + mat->mat_type = ProcLib::CVCUDA; + return true; +} +#endif + +bool Cast::Run(Mat *mat, const std::string &dtype, ProcLib lib) { + auto c = Cast(dtype); + return c(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cast.h b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.h new file mode 100755 index 0000000000..eddc1c3de6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.h @@ -0,0 +1,59 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for cast images with given type deafault is float. + */ +class ULTRAINFER_DECL Cast : public Processor { +public: + explicit Cast(const std::string &dtype = "float") : dtype_(dtype) {} + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); +#endif + std::string Name() { return "Cast"; } + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] dtype type of data will be casted to + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, const std::string &dtype, + ProcLib lib = ProcLib::DEFAULT); + + std::string GetDtype() const { return dtype_; } + +private: + std::string dtype_; +#ifdef ENABLE_CVCUDA + cvcuda::ConvertTo cvcuda_convert_op_; +#endif +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc new file mode 100755 index 0000000000..2b85572465 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindCast(pybind11::module &m) { + pybind11::class_(m, "Cast").def( + pybind11::init(), "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc new file mode 100755 index 0000000000..dd9a3c569a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/center_crop.h" + +namespace ultrainfer { +namespace vision { + +bool CenterCrop::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int height = static_cast(im->rows); + int width = static_cast(im->cols); + if (height < height_ || width < width_) { + FDERROR << "[CenterCrop] Image size less than crop size" << std::endl; + return false; + } + int offset_x = static_cast((width - width_) / 2); + int offset_y = static_cast((height - height_) / 2); + cv::Rect crop_roi(offset_x, offset_y, width_, height_); + cv::Mat new_im = (*im)(crop_roi).clone(); + mat->SetMat(new_im); + mat->SetWidth(width_); + mat->SetHeight(height_); + return true; +} + +#ifdef ENABLE_FLYCV +bool CenterCrop::ImplByFlyCV(FDMat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + int height = static_cast(im->height()); + int width = static_cast(im->width()); + if (height < height_ || width < width_) { + FDERROR << "[CenterCrop] Image size less than crop size" << std::endl; + return false; + } + int offset_x = static_cast((width - width_) / 2); + int offset_y = static_cast((height - height_) / 2); + fcv::Rect crop_roi(offset_x, offset_y, width_, height_); + fcv::Mat new_im; + fcv::crop(*im, new_im, crop_roi); + mat->SetMat(new_im); + mat->SetWidth(width_); + mat->SetHeight(height_); + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool CenterCrop::ImplByCvCuda(FDMat *mat) { + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + // Prepare output tensor + mat->output_cache->Resize({height_, width_, mat->Channels()}, src->Dtype(), + "output_cache", Device::GPU); + auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); + + int offset_x = static_cast((mat->Width() - width_) / 2); + int offset_y = static_cast((mat->Height() - height_) / 2); + NVCVRectI crop_roi = {offset_x, offset_y, width_, height_}; + cvcuda_crop_op_(mat->Stream(), *src_tensor, *dst_tensor, crop_roi); + + mat->SetTensor(mat->output_cache); + mat->SetWidth(width_); + mat->SetHeight(height_); + mat->device = Device::GPU; + mat->mat_type = ProcLib::CVCUDA; + return true; +} + +bool CenterCrop::ImplByCvCuda(FDMatBatch *mat_batch) { + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + if (ImplByCvCuda(&((*(mat_batch->mats))[i])) != true) { + return false; + } + } + mat_batch->device = Device::GPU; + mat_batch->mat_type = ProcLib::CVCUDA; + return true; +} +#endif + +bool CenterCrop::Run(FDMat *mat, const int &width, const int &height, + ProcLib lib) { + auto c = CenterCrop(width, height); + return c(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h new file mode 100755 index 0000000000..66ad486129 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h @@ -0,0 +1,63 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for crop images in center with given type deafault is + * float. + */ +class ULTRAINFER_DECL CenterCrop : public Processor { +public: + CenterCrop(int width, int height) : height_(height), width_(width) {} + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); + bool ImplByCvCuda(FDMatBatch *mat_batch); +#endif + std::string Name() { return "CenterCrop"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] width width of data will be croped to + * \param[in] height height of data will be croped to + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, const int &width, const int &height, + ProcLib lib = ProcLib::DEFAULT); + +private: + int height_; + int width_; +#ifdef ENABLE_CVCUDA + cvcuda::CustomCrop cvcuda_crop_op_; +#endif +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc new file mode 100755 index 0000000000..f0a5b5e03f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindCenterCrop(pybind11::module &m) { + pybind11::class_(m, "CenterCrop") + .def(pybind11::init(), "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc new file mode 100755 index 0000000000..5bbeed56af --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc @@ -0,0 +1,133 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/color_space_convert.h" + +namespace ultrainfer { +namespace vision { +bool BGR2RGB::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + cv::Mat new_im; + cv::cvtColor(*im, new_im, cv::COLOR_BGR2RGB); + mat->SetMat(new_im); + return true; +} + +#ifdef ENABLE_FLYCV +bool BGR2RGB::ImplByFlyCV(FDMat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "[BGR2RGB] The channel of input image must be 3, but not it's " + << im->channels() << "." << std::endl; + return false; + } + fcv::Mat new_im; + fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_BGR2PA_RGB); + mat->SetMat(new_im); + return true; +} +#endif + +bool RGB2BGR::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + cv::Mat new_im; + cv::cvtColor(*im, new_im, cv::COLOR_RGB2BGR); + mat->SetMat(new_im); + return true; +} + +#ifdef ENABLE_FLYCV +bool RGB2BGR::ImplByFlyCV(FDMat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "[RGB2BGR] The channel of input image must be 3, but not it's " + << im->channels() << "." << std::endl; + return false; + } + fcv::Mat new_im; + fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_RGB2PA_BGR); + mat->SetMat(new_im); + return true; +} +#endif + +bool BGR2GRAY::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + cv::Mat new_im; + cv::cvtColor(*im, new_im, cv::COLOR_BGR2GRAY); + mat->SetMat(new_im); + mat->SetChannels(1); + return true; +} + +#ifdef ENABLE_FLYCV +bool BGR2GRAY::ImplByFlyCV(FDMat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "[BGR2GRAY] The channel of input image must be 3, but not it's " + << im->channels() << "." << std::endl; + return false; + } + fcv::Mat new_im; + fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_BGR2GRAY); + mat->SetMat(new_im); + return true; +} +#endif + +bool RGB2GRAY::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + cv::Mat new_im; + cv::cvtColor(*im, new_im, cv::COLOR_RGB2GRAY); + mat->SetMat(new_im); + return true; +} + +#ifdef ENABLE_FLYCV +bool RGB2GRAY::ImplByFlyCV(FDMat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "[RGB2GRAY] The channel of input image must be 3, but not it's " + << im->channels() << "." << std::endl; + return false; + } + fcv::Mat new_im; + fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_RGB2GRAY); + mat->SetMat(new_im); + return true; +} +#endif + +bool BGR2RGB::Run(FDMat *mat, ProcLib lib) { + auto b = BGR2RGB(); + return b(mat, lib); +} + +bool RGB2BGR::Run(FDMat *mat, ProcLib lib) { + auto r = RGB2BGR(); + return r(mat, lib); +} + +bool BGR2GRAY::Run(FDMat *mat, ProcLib lib) { + auto b = BGR2GRAY(); + return b(mat, lib); +} + +bool RGB2GRAY::Run(FDMat *mat, ProcLib lib) { + auto r = RGB2GRAY(); + return r(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h new file mode 100755 index 0000000000..b908177364 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h @@ -0,0 +1,99 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for tansform images from BGR to RGB. + */ +class ULTRAINFER_DECL BGR2RGB : public Processor { +public: + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif + virtual std::string Name() { return "BGR2RGB"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT); +}; + +/*! @brief Processor for tansform images from RGB to BGR. + */ +class ULTRAINFER_DECL RGB2BGR : public Processor { +public: + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif + std::string Name() { return "RGB2BGR"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT); +}; + +/*! @brief Processor for tansform images from BGR to GRAY. + */ +class ULTRAINFER_DECL BGR2GRAY : public Processor { +public: + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif + virtual std::string Name() { return "BGR2GRAY"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT); +}; + +/*! @brief Processor for tansform images from RGB to GRAY. + */ +class ULTRAINFER_DECL RGB2GRAY : public Processor { +public: + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif + std::string Name() { return "RGB2GRAY"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT); +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc new file mode 100755 index 0000000000..3353b77d18 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/convert.h" + +namespace ultrainfer { + +namespace vision { + +Convert::Convert(const std::vector &alpha, + const std::vector &beta) { + FDASSERT(alpha.size() == beta.size(), + "Convert: requires the size of alpha equal to the size of beta."); + FDASSERT(alpha.size() != 0, + "Convert: requires the size of alpha and beta > 0."); + alpha_.assign(alpha.begin(), alpha.end()); + beta_.assign(beta.begin(), beta.end()); +} + +bool Convert::ImplByOpenCV(Mat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + std::vector split_im; + cv::split(*im, split_im); + for (int c = 0; c < im->channels(); c++) { + split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); + } + cv::merge(split_im, *im); + return true; +} + +#ifdef ENABLE_FLYCV +bool Convert::ImplByFlyCV(Mat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + FDASSERT(im->channels() == 3, "Only support 3-channels image in FlyCV."); + std::vector mean(3, 0); + std::vector std(3, 0); + for (size_t i = 0; i < 3; ++i) { + std[i] = 1.0 / alpha_[i]; + mean[i] = -1 * beta_[i] * std[i]; + } + fcv::Mat new_im; + fcv::normalize_to_submean_to_reorder(*im, mean, std, std::vector(), + new_im, true); + mat->SetMat(new_im); + return true; +} +#endif + +bool Convert::Run(Mat *mat, const std::vector &alpha, + const std::vector &beta, ProcLib lib) { + auto c = Convert(alpha, beta); + return c(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert.h b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.h new file mode 100755 index 0000000000..9eeff4e36b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.h @@ -0,0 +1,52 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { +/*! @brief Processor for convert images with given paramters. + */ +class ULTRAINFER_DECL Convert : public Processor { +public: + Convert(const std::vector &alpha, const std::vector &beta); + + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif + std::string Name() { return "Convert"; } + + // Compute `result = mat * alpha + beta` directly by channel. + // The default behavior is the same as OpenCV's convertTo method. + /** \brief Process the input images + * + * \param[in] mat The input image data,`result = mat * alpha + beta` + * \param[in] alpha The alpha channel data + * \param[in] beta The beta channel data + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, const std::vector &alpha, + const std::vector &beta, + ProcLib lib = ProcLib::DEFAULT); + +private: + std::vector alpha_; + std::vector beta_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc new file mode 100755 index 0000000000..2ab6ac638c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/convert_and_permute.h" + +namespace ultrainfer { +namespace vision { + +ConvertAndPermute::ConvertAndPermute(const std::vector &alpha, + const std::vector &beta, + bool swap_rb) { + FDASSERT(alpha.size() == beta.size(), "ConvertAndPermute: requires the size " + "of alpha equal to the size of beta."); + FDASSERT(alpha.size() > 0 && beta.size() > 0, + "ConvertAndPermute: requires the size of alpha and beta > 0."); + alpha_.assign(alpha.begin(), alpha.end()); + beta_.assign(beta.begin(), beta.end()); + swap_rb_ = swap_rb; +} + +bool ConvertAndPermute::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int origin_w = im->cols; + int origin_h = im->rows; + std::vector split_im; + cv::split(*im, split_im); + if (swap_rb_) + std::swap(split_im[0], split_im[2]); + for (int c = 0; c < im->channels(); c++) { + split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); + } + cv::Mat res(origin_h, origin_w, CV_32FC(im->channels())); + for (int i = 0; i < im->channels(); ++i) { + cv::extractChannel(split_im[i], + cv::Mat(origin_h, origin_w, CV_32FC1, + res.ptr() + i * origin_h * origin_w * 4), + 0); + } + + mat->SetMat(res); + mat->layout = Layout::CHW; + return true; +} + +#ifdef ENABLE_FLYCV +bool ConvertAndPermute::ImplByFlyCV(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Only supports input with HWC layout." << std::endl; + return false; + } + fcv::Mat *im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "Only supports 3-channels image in FlyCV, but now it's " + << im->channels() << "." << std::endl; + return false; + } + std::vector mean(3, 0); + std::vector std(3, 0); + for (size_t i = 0; i < 3; ++i) { + std[i] = 1.0 / alpha_[i]; + mean[i] = -1 * beta_[i] * std[i]; + } + + std::vector channel_reorder_index = {0, 1, 2}; + if (swap_rb_) + std::swap(channel_reorder_index[0], channel_reorder_index[2]); + + fcv::Mat new_im; + fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index, + new_im, false); + mat->SetMat(new_im); + mat->layout = Layout::CHW; + return true; +} +#endif + +bool ConvertAndPermute::Run(FDMat *mat, const std::vector &alpha, + const std::vector &beta, bool swap_rb, + ProcLib lib) { + auto n = ConvertAndPermute(alpha, beta, swap_rb); + return n(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h new file mode 100755 index 0000000000..f00aad5f63 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { +/*! @brief Processor for convert images with given paramters and permute images + * from HWC to CHW. + */ +class ULTRAINFER_DECL ConvertAndPermute : public Processor { +public: + ConvertAndPermute(const std::vector &alpha = std::vector(), + const std::vector &beta = std::vector(), + bool swap_rb = false); + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif + std::string Name() { return "ConvertAndPermute"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data,`result = mat * alpha + beta` + * \param[in] alpha The alpha channel data + * \param[in] beta The beta channel data + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, const std::vector &alpha, + const std::vector &beta, bool swap_rb = false, + ProcLib lib = ProcLib::DEFAULT); + + std::vector GetAlpha() const { return alpha_; } + + /** \brief Process the input images + * + * \param[in] alpha set the value of the alpha parameter + */ + void SetAlpha(const std::vector &alpha) { + alpha_.clear(); + std::vector().swap(alpha_); + alpha_.assign(alpha.begin(), alpha.end()); + } + + std::vector GetBeta() const { return beta_; } + + /** \brief Process the input images + * + * \param[in] beta set the value of the beta parameter + */ + void SetBeta(const std::vector &beta) { + beta_.clear(); + std::vector().swap(beta_); + beta_.assign(beta.begin(), beta.end()); + } + + bool GetSwapRB() { return swap_rb_; } + + /** \brief Process the input images + * + * \param[in] swap_rb set the value of the swap_rb parameter + */ + void SetSwapRB(bool swap_rb) { swap_rb_ = swap_rb; } + +private: + std::vector alpha_; + std::vector beta_; + bool swap_rb_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc new file mode 100755 index 0000000000..1a8011dbfc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/crop.h" + +namespace ultrainfer { +namespace vision { + +bool Crop::ImplByOpenCV(Mat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int height = static_cast(im->rows); + int width = static_cast(im->cols); + if (height < height_ + offset_h_ || width < width_ + offset_w_) { + FDERROR << "[Crop] Cannot crop [" << height_ << ", " << width_ + << "] from the input image [" << height << ", " << width + << "], with offset [" << offset_h_ << ", " << offset_w_ << "]." + << std::endl; + return false; + } + cv::Rect crop_roi(offset_w_, offset_h_, width_, height_); + cv::Mat new_im = (*im)(crop_roi).clone(); + mat->SetMat(new_im); + mat->SetWidth(width_); + mat->SetHeight(height_); + return true; +} + +#ifdef ENABLE_FLYCV +bool Crop::ImplByFlyCV(Mat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + int height = static_cast(im->height()); + int width = static_cast(im->width()); + if (height < height_ + offset_h_ || width < width_ + offset_w_) { + FDERROR << "[Crop] Cannot crop [" << height_ << ", " << width_ + << "] from the input image [" << height << ", " << width + << "], with offset [" << offset_h_ << ", " << offset_w_ << "]." + << std::endl; + return false; + } + fcv::Rect crop_roi(offset_w_, offset_h_, width_, height_); + fcv::Mat new_im; + fcv::crop(*im, new_im, crop_roi); + mat->SetMat(new_im); + mat->SetWidth(width_); + mat->SetHeight(height_); + return true; +} +#endif + +bool Crop::Run(Mat *mat, int offset_w, int offset_h, int width, int height, + ProcLib lib) { + auto c = Crop(offset_w, offset_h, width, height); + return c(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/crop.h b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.h new file mode 100755 index 0000000000..b40dd97015 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.h @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for crop images with given paramters. + */ +class ULTRAINFER_DECL Crop : public Processor { +public: + Crop(int offset_w, int offset_h, int width, int height) { + offset_w_ = offset_w; + offset_h_ = offset_h; + width_ = width; + height_ = height; + } + + bool ImplByOpenCV(Mat *mat); + +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif + std::string Name() { return "Crop"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] offset_w The offset of width. + * \param[in] offset_h The offset of height. + * \param[in] width The width of the output image. + * \param[in] height The height of the output image. + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, int offset_w, int offset_h, int width, int height, + ProcLib lib = ProcLib::DEFAULT); + +private: + int offset_w_; + int offset_h_; + int height_; + int width_; +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc new file mode 100755 index 0000000000..9841af7df1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc @@ -0,0 +1,127 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" + +namespace ultrainfer { +namespace vision { + +#ifdef ENABLE_CVCUDA +nvcv::ImageFormat CreateCvCudaImageFormat(FDDataType type, int channel, + bool interleaved) { + FDASSERT(channel == 1 || channel == 3 || channel == 4, + "Only support channel be 1/3/4 in CV-CUDA."); + if (type == FDDataType::UINT8) { + if (channel == 1) { + return nvcv::FMT_U8; + } else if (channel == 3) { + return (interleaved ? nvcv::FMT_BGR8 : nvcv::FMT_BGR8p); + } else { + return (interleaved ? nvcv::FMT_BGRA8 : nvcv::FMT_BGRA8p); + } + } else if (type == FDDataType::FP32) { + if (channel == 1) { + return nvcv::FMT_F32; + } else if (channel == 3) { + return (interleaved ? nvcv::FMT_BGRf32 : nvcv::FMT_BGRf32p); + } else { + return (interleaved ? nvcv::FMT_BGRAf32 : nvcv::FMT_BGRAf32p); + } + } + FDASSERT(false, "Data type of %s is not supported.", Str(type).c_str()); + return nvcv::FMT_BGRf32; +} + +std::shared_ptr +CreateCvCudaTensorWrapData(const FDTensor &tensor, Layout layout) { + FDASSERT(tensor.shape.size() == 3, "When create CVCUDA tensor from FD tensor," + "tensor shape should be 3-Dim,"); + int batchsize = 1; + int h = tensor.Shape()[0]; + int w = tensor.Shape()[1]; + int c = tensor.Shape()[2]; + + nvcv::TensorDataStridedCuda::Buffer buf; + buf.strides[3] = FDDataTypeSize(tensor.Dtype()); + buf.strides[2] = c * buf.strides[3]; + buf.strides[1] = w * buf.strides[2]; + buf.strides[0] = h * buf.strides[1]; + if (layout == Layout::CHW) { + c = tensor.Shape()[0]; + h = tensor.Shape()[1]; + w = tensor.Shape()[2]; + buf.strides[3] = FDDataTypeSize(tensor.Dtype()); + buf.strides[2] = w * buf.strides[3]; + buf.strides[1] = h * buf.strides[2]; + buf.strides[0] = c * buf.strides[1]; + } + buf.basePtr = reinterpret_cast(const_cast(tensor.Data())); + + nvcv::Tensor::Requirements req = nvcv::Tensor::CalcRequirements( + batchsize, {w, h}, + CreateCvCudaImageFormat(tensor.Dtype(), c, layout == Layout::HWC)); + + nvcv::TensorDataStridedCuda tensor_data( + nvcv::TensorShape{req.shape, req.rank, req.layout}, + nvcv::DataType{req.dtype}, buf); + return std::make_shared(tensor_data, nullptr); +} + +void *GetCvCudaTensorDataPtr(const nvcv::TensorWrapData &tensor) { + auto data = + dynamic_cast(tensor.exportData()); + return reinterpret_cast(data->basePtr()); +} + +nvcv::ImageWrapData CreateImageWrapData(const FDTensor &tensor) { + FDASSERT(tensor.shape.size() == 3, + "When create CVCUDA image from FD tensor," + "tensor shape should be 3-Dim, HWC layout"); + int h = tensor.Shape()[0]; + int w = tensor.Shape()[1]; + int c = tensor.Shape()[2]; + nvcv::ImageDataStridedCuda::Buffer buf; + buf.numPlanes = 1; + buf.planes[0].width = w; + buf.planes[0].height = h; + buf.planes[0].rowStride = w * c * FDDataTypeSize(tensor.Dtype()); + buf.planes[0].basePtr = + reinterpret_cast(const_cast(tensor.Data())); + nvcv::ImageWrapData nvimg{nvcv::ImageDataStridedCuda{ + nvcv::ImageFormat{CreateCvCudaImageFormat(tensor.Dtype(), c)}, buf}}; + return nvimg; +} + +void CreateCvCudaImageBatchVarShape(std::vector &tensors, + nvcv::ImageBatchVarShape &img_batch) { + for (size_t i = 0; i < tensors.size(); ++i) { + FDASSERT(tensors[i]->device == Device::GPU, "Tensor must on GPU."); + img_batch.pushBack(CreateImageWrapData(*(tensors[i]))); + } +} + +NVCVInterpolationType CreateCvCudaInterp(int interp) { + // CV-CUDA Interp value is compatible with OpenCV + auto nvcv_interp = NVCVInterpolationType(interp); + + // Due to bug of CV-CUDA CUBIC resize, will force to convert CUBIC to LINEAR + if (nvcv_interp == NVCV_INTERP_CUBIC) { + return NVCV_INTERP_LINEAR; + } + return nvcv_interp; +} +#endif + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h new file mode 100755 index 0000000000..701e88cb09 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/vision/common/processors/mat.h" + +#ifdef ENABLE_CVCUDA +#include +#include +#include + +namespace ultrainfer { +namespace vision { + +nvcv::ImageFormat CreateCvCudaImageFormat(FDDataType type, int channel, + bool interleaved = true); +std::shared_ptr +CreateCvCudaTensorWrapData(const FDTensor &tensor, Layout layout = Layout::HWC); +void *GetCvCudaTensorDataPtr(const nvcv::TensorWrapData &tensor); +nvcv::ImageWrapData CreateImageWrapData(const FDTensor &tensor); +void CreateCvCudaImageBatchVarShape(std::vector &tensors, + nvcv::ImageBatchVarShape &img_batch); +NVCVInterpolationType CreateCvCudaInterp(int interp); + +} // namespace vision +} // namespace ultrainfer +#endif diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc new file mode 100755 index 0000000000..be32cf1809 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/hwc2chw.h" + +#include "ultrainfer/function/transpose.h" + +namespace ultrainfer { +namespace vision { +bool HWC2CHW::ImplByOpenCV(Mat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "HWC2CHW: The input data is not Layout::HWC format!" + << std::endl; + return false; + } + cv::Mat *im = mat->GetOpenCVMat(); + cv::Mat im_clone = im->clone(); + int rh = im->rows; + int rw = im->cols; + int rc = im->channels(); + + for (int i = 0; i < rc; ++i) { + cv::extractChannel( + im_clone, + cv::Mat(rh, rw, im->type() % 8, + im->ptr() + i * rh * rw * FDDataTypeSize(mat->Type())), + i); + } + mat->layout = Layout::CHW; + return true; +} + +#ifdef ENABLE_FLYCV +bool HWC2CHW::ImplByFlyCV(Mat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "HWC2CHW: The input data is not Layout::HWC format!" + << std::endl; + return false; + } + if (mat->Type() != FDDataType::FP32) { + FDERROR << "HWC2CHW: Only support float data while use FlyCV, but now it's " + << mat->Type() << "." << std::endl; + return false; + } + fcv::Mat *im = mat->GetFlyCVMat(); + fcv::Mat new_im; + fcv::normalize_to_submean_to_reorder(*im, {0.0, 0.0, 0.0}, {1.0, 1.0, 1.0}, + std::vector(), new_im, false); + mat->SetMat(new_im); + mat->layout = Layout::CHW; + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool HWC2CHW::ImplByCvCuda(FDMat *mat) { + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + // Prepare output tensor + mat->output_cache->Resize({mat->Channels(), mat->Height(), mat->Width()}, + src->Dtype(), "output_cache", Device::GPU); + auto dst_tensor = + CreateCvCudaTensorWrapData(*(mat->output_cache), Layout::CHW); + + cvcuda_reformat_op_(mat->Stream(), *src_tensor, *dst_tensor); + + mat->layout = Layout::CHW; + mat->SetTensor(mat->output_cache); + mat->mat_type = ProcLib::CVCUDA; + return true; +} +#endif + +bool HWC2CHW::Run(Mat *mat, ProcLib lib) { + auto h = HWC2CHW(); + return h(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h new file mode 100755 index 0000000000..6d042e724d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h @@ -0,0 +1,54 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for transform images from HWC to CHW. + */ +class ULTRAINFER_DECL HWC2CHW : public Processor { +public: + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); +#endif + std::string Name() { return "HWC2CHW"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, ProcLib lib = ProcLib::DEFAULT); + +private: +#ifdef ENABLE_CVCUDA + cvcuda::Reformat cvcuda_reformat_op_; +#endif +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc new file mode 100755 index 0000000000..67598277fa --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindHWC2CHW(pybind11::module &m) { + pybind11::class_(m, "HWC2CHW") + .def(pybind11::init<>(), "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc new file mode 100755 index 0000000000..96934c2ab0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/limit_by_stride.h" + +namespace ultrainfer { +namespace vision { + +bool LimitByStride::ImplByOpenCV(Mat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int origin_w = im->cols; + int origin_h = im->rows; + int rw = origin_w - origin_w % stride_; + int rh = origin_h - origin_h % stride_; + if (rw == 0) { + rw = stride_; + } + if (rh == 0) { + rh = stride_; + } + if (rw != origin_w || rh != origin_w) { + cv::resize(*im, *im, cv::Size(rw, rh), 0, 0, interp_); + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + } + return true; +} + +#ifdef ENABLE_FLYCV +bool LimitByStride::ImplByFlyCV(Mat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + int origin_w = im->width(); + int origin_h = im->height(); + int rw = origin_w - origin_w % stride_; + int rh = origin_h - origin_h % stride_; + if (rw == 0) { + rw = stride_; + } + if (rh == 0) { + rh = stride_; + } + if (rw != origin_w || rh != origin_h) { + auto interp_method = fcv::InterpolationType::INTER_LINEAR; + if (interp_ == 0) { + interp_method = fcv::InterpolationType::INTER_NEAREST; + } else if (interp_ == 1) { + interp_method = fcv::InterpolationType::INTER_LINEAR; + } else if (interp_ == 2) { + interp_method = fcv::InterpolationType::INTER_CUBIC; + } else if (interp_ == 3) { + interp_method = fcv::InterpolationType::INTER_AREA; + } else { + FDERROR + << "LimitByStride: Only support interp_ be 0/1/2/3 with FlyCV, but " + "now it's " + << interp_ << "." << std::endl; + return false; + } + + fcv::Mat new_im; + fcv::resize(*im, new_im, fcv::Size(rw, rh), 0, 0, interp_method); + mat->SetMat(new_im); + mat->SetWidth(new_im.width()); + mat->SetHeight(new_im.height()); + } + return true; +} +#endif + +bool LimitByStride::Run(Mat *mat, int stride, int interp, ProcLib lib) { + auto r = LimitByStride(stride, interp); + return r(mat, lib); +} +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h new file mode 100755 index 0000000000..f905a60e1c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h @@ -0,0 +1,54 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for LimitByStride images with given paramters. + */ +class ULTRAINFER_DECL LimitByStride : public Processor { +public: + explicit LimitByStride(int stride = 32, int interp = 1) { + stride_ = stride; + interp_ = interp; + } + + // Resize Mat* mat to make the size divisible by stride_. + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif + std::string Name() { return "LimitByStride"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] stride limit image stride, deafult is 32 + * \param[in] interp interpolation method, deafult is 1 + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, int stride = 32, int interp = 1, + ProcLib lib = ProcLib::DEFAULT); + +private: + int interp_; + int stride_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc new file mode 100755 index 0000000000..93d80394cc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/limit_short.h" + +namespace ultrainfer { +namespace vision { + +bool LimitShort::ImplByOpenCV(Mat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int origin_w = im->cols; + int origin_h = im->rows; + int im_size_min = std::min(origin_w, origin_h); + int target = im_size_min; + if (max_short_ > 0 && im_size_min > max_short_) { + target = max_short_; + } else if (min_short_ > 0 && im_size_min < min_short_) { + target = min_short_; + } + double scale = -1.f; + if (target != im_size_min) { + scale = static_cast(target) / static_cast(im_size_min); + } + if (fabs(scale - 1.0) > 1e-06) { + cv::resize(*im, *im, cv::Size(), scale, scale, interp_); + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + } + return true; +} + +#ifdef ENABLE_FLYCV +bool LimitShort::ImplByFlyCV(Mat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + int origin_w = im->width(); + int origin_h = im->height(); + int im_size_min = std::min(origin_w, origin_h); + int target = im_size_min; + if (max_short_ > 0 && im_size_min > max_short_) { + target = max_short_; + } else if (min_short_ > 0 && im_size_min < min_short_) { + target = min_short_; + } + double scale = -1.f; + if (target != im_size_min) { + scale = static_cast(target) / static_cast(im_size_min); + } + if (fabs(scale - 1.0) > 1e-06) { + auto interp_method = fcv::InterpolationType::INTER_LINEAR; + if (interp_ == 0) { + interp_method = fcv::InterpolationType::INTER_NEAREST; + } else if (interp_ == 1) { + interp_method = fcv::InterpolationType::INTER_LINEAR; + } else if (interp_ == 2) { + interp_method = fcv::InterpolationType::INTER_CUBIC; + } else if (interp_ == 3) { + interp_method = fcv::InterpolationType::INTER_AREA; + } else { + FDERROR + << "LimitByShort: Only support interp_ be 0/1/2/3 with FlyCV, but " + "now it's " + << interp_ << "." << std::endl; + return false; + } + + fcv::Mat new_im; + fcv::resize(*im, new_im, fcv::Size(), scale, scale, interp_method); + mat->SetMat(new_im); + mat->SetWidth(new_im.width()); + mat->SetHeight(new_im.height()); + } + return true; +} +#endif + +bool LimitShort::Run(Mat *mat, int max_short, int min_short, int interp, + ProcLib lib) { + auto l = LimitShort(max_short, min_short, interp); + return l(mat, lib); +} +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h new file mode 100755 index 0000000000..cc991878c2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h @@ -0,0 +1,62 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for Limit images by short edge with given paramters. + */ +class LimitShort : public Processor { +public: + explicit LimitShort(int max_short = -1, int min_short = -1, int interp = 1) { + max_short_ = max_short; + min_short_ = min_short; + interp_ = interp; + } + + // Limit the short edge of image. + // If the short edge is larger than max_short_, resize the short edge + // to max_short_, while scale the long edge proportionally. + // If the short edge is smaller than min_short_, resize the short edge + // to min_short_, while scale the long edge proportionally. + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif + std::string Name() { return "LimitShort"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data + * \param[in] max_short target size of short edge + * \param[in] min_short target size of short edge + * \param[in] interp interpolation method, deafult is 1 + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, int max_short = -1, int min_short = -1, + int interp = 1, ProcLib lib = ProcLib::DEFAULT); + int GetMaxShort() const { return max_short_; } + +private: + int max_short_; + int min_short_; + int interp_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc new file mode 100755 index 0000000000..f03198aa07 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/common/processors/manager.h" + +namespace ultrainfer { +namespace vision { + +ProcessorManager::~ProcessorManager() { +#ifdef WITH_GPU + if (stream_) + cudaStreamDestroy(stream_); +#endif +} + +void ProcessorManager::UseCuda(bool enable_cv_cuda, int gpu_id) { +#ifdef WITH_GPU + if (gpu_id >= 0) { + device_id_ = gpu_id; + FDASSERT(cudaSetDevice(device_id_) == cudaSuccess, + "[ERROR] Error occurs while setting cuda device."); + } + FDASSERT(cudaStreamCreate(&stream_) == cudaSuccess, + "[ERROR] Error occurs while creating cuda stream."); + proc_lib_ = ProcLib::CUDA; +#else + FDASSERT(false, "UltraInfer didn't compile with WITH_GPU."); +#endif + + if (enable_cv_cuda) { +#ifdef ENABLE_CVCUDA + proc_lib_ = ProcLib::CVCUDA; +#else + FDASSERT(false, "UltraInfer didn't compile with CV-CUDA."); +#endif + } +} + +bool ProcessorManager::CudaUsed() { + return (proc_lib_ == ProcLib::CUDA || proc_lib_ == ProcLib::CVCUDA); +} + +void ProcessorManager::PreApply(FDMatBatch *image_batch) { + FDASSERT(image_batch->mats != nullptr, "The mats is empty."); + FDASSERT(image_batch->mats->size() > 0, + "The size of input images should be greater than 0."); + + if (image_batch->mats->size() > input_caches_.size()) { + input_caches_.resize(image_batch->mats->size()); + output_caches_.resize(image_batch->mats->size()); + } + image_batch->input_cache = &batch_input_cache_; + image_batch->output_cache = &batch_output_cache_; + image_batch->proc_lib = proc_lib_; + if (CudaUsed()) { + SetStream(image_batch); + } + + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + mat->input_cache = &input_caches_[i]; + mat->output_cache = &output_caches_[i]; + mat->proc_lib = proc_lib_; + if (mat->mat_type == ProcLib::CUDA) { + // Make a copy of the input data ptr, so that the original data ptr of + // FDMat won't be modified. + auto fd_tensor = std::make_shared(); + fd_tensor->SetExternalData(mat->Tensor()->shape, mat->Tensor()->Dtype(), + mat->Tensor()->Data(), mat->Tensor()->device, + mat->Tensor()->device_id); + mat->SetTensor(fd_tensor); + } + } +} + +void ProcessorManager::PostApply() { + if (CudaUsed()) { + SyncStream(); + } +} + +bool ProcessorManager::Run(std::vector *images, + std::vector *outputs) { + FDMatBatch image_batch(images); + PreApply(&image_batch); + bool ret = Apply(&image_batch, outputs); + PostApply(); + return ret; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/manager.h b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.h new file mode 100755 index 0000000000..80950e9db9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.h @@ -0,0 +1,104 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/processors/base.h" +#include "ultrainfer/vision/common/processors/mat.h" +#include "ultrainfer/vision/common/processors/mat_batch.h" + +namespace ultrainfer { +namespace vision { + +/*! @brief ProcessorManager for Preprocess + */ +class ULTRAINFER_DECL ProcessorManager { +public: + ~ProcessorManager(); + + /** \brief Use CUDA to boost the performance of processors + * + * \param[in] enable_cv_cuda ture: use CV-CUDA, false: use CUDA only + * \param[in] gpu_id GPU device id + * \return true if the preprocess successed, otherwise false + */ + void UseCuda(bool enable_cv_cuda = false, int gpu_id = -1); + + bool CudaUsed(); + +#ifdef WITH_GPU + cudaStream_t Stream() const { return stream_; } +#endif + + void SetStream(FDMat *mat) { +#ifdef WITH_GPU + mat->SetStream(stream_); +#endif + } + + void SetStream(FDMatBatch *mat_batch) { +#ifdef WITH_GPU + mat_batch->SetStream(stream_); +#endif + } + + void SyncStream() { +#ifdef WITH_GPU + FDASSERT(cudaStreamSynchronize(stream_) == cudaSuccess, + "[ERROR] Error occurs while sync cuda stream."); +#endif + } + + int DeviceId() { return device_id_; } + + /** \brief Process the input images and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs); + + /** \brief Apply() is the body of Run() function, it needs to be implemented + * by a derived class + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, + std::vector *outputs) = 0; + + void PreApply(FDMatBatch *image_batch); + + void PostApply(); + +protected: + ProcLib proc_lib_ = ProcLib::DEFAULT; + +private: +#ifdef WITH_GPU + cudaStream_t stream_ = nullptr; +#endif + int device_id_ = -1; + + std::vector input_caches_; + std::vector output_caches_; + FDTensor batch_input_cache_; + FDTensor batch_output_cache_; +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc new file mode 100755 index 0000000000..e97a4aca6a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +namespace vision { +// PyProcessorManager is used for pybind11::init() of ProcessorManager +// Because ProcessorManager have a pure Virtual function Apply() +class ULTRAINFER_DECL PyProcessorManager : public ProcessorManager { +public: + using ProcessorManager::ProcessorManager; + bool Apply(FDMatBatch *image_batch, std::vector *outputs) override { + PYBIND11_OVERRIDE_PURE(bool, ProcessorManager, Apply, image_batch, outputs); + } +}; +} // namespace vision + +void BindProcessorManager(pybind11::module &m) { + pybind11::class_( + m, "ProcessorManager") + .def(pybind11::init<>()) + .def("run", + [](vision::ProcessorManager &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error("Failed to process the input data"); + } + if (!self.CudaUsed()) { + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + } + return outputs; + }) + .def("pre_apply", &vision::ProcessorManager::PreApply) + .def("post_apply", &vision::ProcessorManager::PostApply) + .def("use_cuda", + [](vision::ProcessorManager &self, bool enable_cv_cuda = false, + int gpu_id = -1) { self.UseCuda(enable_cv_cuda, gpu_id); }); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc new file mode 100755 index 0000000000..23660879ec --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc @@ -0,0 +1,337 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/common/processors/mat.h" + +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/processors/utils.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat *Mat::GetOpenCVMat() { + if (mat_type == ProcLib::OPENCV) { + return &cpu_mat; + } else if (mat_type == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + // Just a reference to fcv_mat, zero copy. After you + // call this method, cpu_mat and fcv_mat will point + // to the same memory buffer. + cpu_mat = ConvertFlyCVMatToOpenCV(fcv_mat); + mat_type = ProcLib::OPENCV; + return &cpu_mat; +#else + FDASSERT(false, "UltraInfer didn't compiled with FlyCV!"); +#endif + } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) { +#ifdef WITH_GPU + FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess, + "[ERROR] Error occurs while sync cuda stream."); + cpu_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor, layout); + mat_type = ProcLib::OPENCV; + device = Device::CPU; + return &cpu_mat; +#else + FDASSERT(false, "UltraInfer didn't compiled with -DWITH_GPU=ON"); +#endif + } else { + FDASSERT(false, "The mat_type of custom Mat can not be ProcLib::DEFAULT"); + } +} + +#ifdef ENABLE_FLYCV +fcv::Mat *Mat::GetFlyCVMat() { + if (mat_type == ProcLib::FLYCV) { + return &fcv_mat; + } else if (mat_type == ProcLib::OPENCV) { + // Just a reference to cpu_mat, zero copy. After you + // call this method, fcv_mat and cpu_mat will point + // to the same memory buffer. + fcv_mat = ConvertOpenCVMatToFlyCV(cpu_mat); + mat_type = ProcLib::FLYCV; + return &fcv_mat; + } else { + FDASSERT(false, "The mat_type of custom Mat can not be ProcLib::DEFAULT"); + } +} +#endif + +void *Mat::Data() { + if (mat_type == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + return fcv_mat.data(); +#else + FDASSERT(false, + "UltraInfer didn't compile with FlyCV, but met data type with " + "fcv::Mat."); +#endif + } else if (device == Device::GPU) { + return fd_tensor->Data(); + } + return cpu_mat.ptr(); +} + +FDTensor *Mat::Tensor() { + if (mat_type == ProcLib::OPENCV) { + ShareWithTensor(fd_tensor.get()); + } else if (mat_type == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + cpu_mat = ConvertFlyCVMatToOpenCV(fcv_mat); + mat_type = ProcLib::OPENCV; + ShareWithTensor(fd_tensor.get()); +#else + FDASSERT(false, "UltraInfer didn't compiled with FlyCV!"); +#endif + } + return fd_tensor.get(); +} + +void Mat::SetTensor(FDTensor *tensor) { + fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(), + tensor->device, tensor->device_id); + device = tensor->device; + if (layout == Layout::HWC) { + height = tensor->Shape()[0]; + width = tensor->Shape()[1]; + channels = tensor->Shape()[2]; + } else if (layout == Layout::CHW) { + channels = tensor->Shape()[0]; + height = tensor->Shape()[1]; + width = tensor->Shape()[2]; + } +} + +void Mat::SetTensor(std::shared_ptr &tensor) { + fd_tensor = tensor; + device = tensor->device; + if (layout == Layout::HWC) { + height = tensor->Shape()[0]; + width = tensor->Shape()[1]; + channels = tensor->Shape()[2]; + } else if (layout == Layout::CHW) { + channels = tensor->Shape()[0]; + height = tensor->Shape()[1]; + width = tensor->Shape()[2]; + } +} + +void Mat::ShareWithTensor(FDTensor *tensor) { + tensor->SetExternalData({Channels(), Height(), Width()}, Type(), Data()); + tensor->device = device; + if (layout == Layout::HWC) { + tensor->shape = {Height(), Width(), Channels()}; + } +} + +bool Mat::CopyToTensor(FDTensor *tensor) { + int total_bytes = Height() * Width() * Channels() * FDDataTypeSize(Type()); + if (total_bytes != tensor->Nbytes()) { + FDERROR << "While copy Mat to Tensor, requires the memory size be same, " + "but now size of Tensor = " + << tensor->Nbytes() << ", size of Mat = " << total_bytes << "." + << std::endl; + return false; + } + memcpy(tensor->MutableData(), Data(), total_bytes); + return true; +} + +void Mat::PrintInfo(const std::string &flag) { + std::cout << flag << ": " + << "DataType=" << Type() << ", " + << "Channel=" << Channels() << ", " + << "Height=" << Height() << ", " + << "Width=" << Width() << ", " + << "Mean="; + if (mat_type == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + fcv::Scalar mean = fcv::mean(fcv_mat); + for (int i = 0; i < Channels(); ++i) { + std::cout << mean[i] << " "; + } + std::cout << std::endl; +#else + FDASSERT(false, + "UltraInfer didn't compile with FlyCV, but met data type with " + "fcv::Mat."); +#endif + } else if (mat_type == ProcLib::OPENCV) { + cv::Scalar mean = cv::mean(cpu_mat); + for (int i = 0; i < Channels(); ++i) { + std::cout << mean[i] << " "; + } + std::cout << std::endl; + } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) { +#ifdef WITH_GPU + FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess, + "[ERROR] Error occurs while sync cuda stream."); + cv::Mat tmp_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor, layout); + cv::Scalar mean = cv::mean(tmp_mat); + for (int i = 0; i < Channels(); ++i) { + std::cout << mean[i] << " "; + } + std::cout << std::endl; +#else + FDASSERT(false, "UltraInfer didn't compiled with -DWITH_GPU=ON"); +#endif + } +} + +FDDataType Mat::Type() { + int type = -1; + if (mat_type == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + return FlyCVDataTypeToFD(fcv_mat.type()); +#else + FDASSERT(false, + "UltraInfer didn't compile with FlyCV, but met data type with " + "fcv::Mat."); +#endif + } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) { + return fd_tensor->Dtype(); + } + return OpenCVDataTypeToFD(cpu_mat.type()); +} + +Mat Mat::Create(const FDTensor &tensor) { + if (DefaultProcLib::default_lib == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + fcv::Mat tmp_fcv_mat = CreateZeroCopyFlyCVMatFromTensor(tensor); + Mat mat = Mat(tmp_fcv_mat); + return mat; +#else + FDASSERT(false, "UltraInfer didn't compiled with FlyCV!"); +#endif + } + cv::Mat tmp_ocv_mat = CreateZeroCopyOpenCVMatFromTensor(tensor); + Mat mat = Mat(tmp_ocv_mat); + return mat; +} + +Mat Mat::Create(const FDTensor &tensor, ProcLib lib) { + if (lib == ProcLib::DEFAULT) { + return Create(tensor); + } + if (lib == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + fcv::Mat tmp_fcv_mat = CreateZeroCopyFlyCVMatFromTensor(tensor); + Mat mat = Mat(tmp_fcv_mat); + return mat; +#else + FDASSERT(false, "UltraInfer didn't compiled with FlyCV!"); +#endif + } + cv::Mat tmp_ocv_mat = CreateZeroCopyOpenCVMatFromTensor(tensor); + Mat mat = Mat(tmp_ocv_mat); + return mat; +} + +Mat Mat::Create(int height, int width, int channels, FDDataType type, + void *data) { + if (DefaultProcLib::default_lib == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + fcv::Mat tmp_fcv_mat = + CreateZeroCopyFlyCVMatFromBuffer(height, width, channels, type, data); + Mat mat = Mat(tmp_fcv_mat); + return mat; +#else + FDASSERT(false, "UltraInfer didn't compiled with FlyCV!"); +#endif + } + cv::Mat tmp_ocv_mat = + CreateZeroCopyOpenCVMatFromBuffer(height, width, channels, type, data); + Mat mat = Mat(tmp_ocv_mat); + return mat; +} + +Mat Mat::Create(int height, int width, int channels, FDDataType type, + void *data, ProcLib lib) { + if (lib == ProcLib::DEFAULT) { + return Create(height, width, channels, type, data); + } + if (lib == ProcLib::FLYCV) { +#ifdef ENABLE_FLYCV + fcv::Mat tmp_fcv_mat = + CreateZeroCopyFlyCVMatFromBuffer(height, width, channels, type, data); + Mat mat = Mat(tmp_fcv_mat); + return mat; +#else + FDASSERT(false, "UltraInfer didn't compiled with FlyCV!"); +#endif + } + cv::Mat tmp_ocv_mat = + CreateZeroCopyOpenCVMatFromBuffer(height, width, channels, type, data); + Mat mat = Mat(tmp_ocv_mat); + return mat; +} + +FDMat WrapMat(const cv::Mat &image) { + FDMat mat(image); + return mat; +} + +std::vector WrapMat(const std::vector &images) { + std::vector mats; + for (size_t i = 0; i < images.size(); ++i) { + mats.emplace_back(FDMat(images[i])); + } + return mats; +} + +bool CheckShapeConsistency(std::vector *mats) { + if (mats == nullptr) { + return true; + } + for (size_t i = 1; i < mats->size(); ++i) { + if ((*mats)[i].Channels() != (*mats)[0].Channels() || + (*mats)[i].Width() != (*mats)[0].Width() || + (*mats)[i].Height() != (*mats)[0].Height()) { + return false; + } + } + return true; +} + +FDTensor *CreateCachedGpuInputTensor(Mat *mat) { +#ifdef WITH_GPU + FDTensor *src = mat->Tensor(); + // Need to make sure the tensor is pointed to the input_cache. + if (src->Data() == mat->output_cache->Data()) { + std::swap(mat->input_cache, mat->output_cache); + std::swap(mat->input_cache->name, mat->output_cache->name); + } + if (src->device == Device::GPU) { + return src; + } else if (src->device == Device::CPU) { + // Tensor on CPU, we need copy it from CPU to GPU + FDASSERT(src->Shape().size() == 3, "The CPU tensor must has 3 dims.") + mat->output_cache->Resize(src->Shape(), src->Dtype(), "output_cache", + Device::GPU); + FDASSERT(cudaMemcpyAsync(mat->output_cache->Data(), src->Data(), + src->Nbytes(), cudaMemcpyHostToDevice, + mat->Stream()) == 0, + "[ERROR] Error occurs while copy memory from CPU to GPU."); + std::swap(mat->input_cache, mat->output_cache); + std::swap(mat->input_cache->name, mat->output_cache->name); + return mat->input_cache; + } else { + FDASSERT(false, "FDMat is on unsupported device: %d", src->device); + } +#else + FDASSERT(false, "UltraInfer didn't compile with WITH_GPU."); +#endif + return nullptr; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat.h b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.h new file mode 100755 index 0000000000..02fb71da96 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.h @@ -0,0 +1,176 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "opencv2/core/core.hpp" +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/vision/common/processors/proc_lib.h" + +#ifdef ENABLE_FLYCV +#include "flycv.h" // NOLINT +#endif + +#ifdef WITH_GPU +#include +#endif + +namespace ultrainfer { +namespace vision { + +enum Layout { HWC, CHW }; + +/*! @brief FDMat is a structure for replace cv::Mat + */ +struct ULTRAINFER_DECL Mat { + Mat() = default; + explicit Mat(const cv::Mat &mat) { + cpu_mat = mat; + layout = Layout::HWC; + height = cpu_mat.rows; + width = cpu_mat.cols; + channels = cpu_mat.channels(); + mat_type = ProcLib::OPENCV; + } + +#ifdef ENABLE_FLYCV + explicit Mat(const fcv::Mat &mat) { + fcv_mat = mat; + layout = Layout::HWC; + height = fcv_mat.height(); + width = fcv_mat.width(); + channels = fcv_mat.channels(); + mat_type = ProcLib::FLYCV; + } +#endif + + Mat(const Mat &mat) = default; + Mat &operator=(const Mat &mat) = default; + + // Move constructor + Mat(Mat &&other) = default; + + // Careful if you use this interface + // this only used if you don't want to write + // the original data, and write to a new cv::Mat + // then replace the old cv::Mat of this structure + void SetMat(const cv::Mat &mat) { + cpu_mat = mat; + mat_type = ProcLib::OPENCV; + } + + cv::Mat *GetOpenCVMat(); + +#ifdef ENABLE_FLYCV + void SetMat(const fcv::Mat &mat) { + fcv_mat = mat; + mat_type = ProcLib::FLYCV; + } + fcv::Mat *GetFlyCVMat(); +#endif + + void *Data(); + + // Get fd_tensor + FDTensor *Tensor(); + + // Set fd_tensor + void SetTensor(FDTensor *tensor); + + void SetTensor(std::shared_ptr &tensor); + +private: + int channels; + int height; + int width; + cv::Mat cpu_mat; +#ifdef ENABLE_FLYCV + fcv::Mat fcv_mat; +#endif +#ifdef WITH_GPU + cudaStream_t stream = nullptr; +#endif + // Currently, fd_tensor is only used by CUDA and CV-CUDA, + // OpenCV and FlyCV are not using it. + std::shared_ptr fd_tensor = std::make_shared(); + +public: + FDDataType Type(); + int Channels() const { return channels; } + int Width() const { return width; } + int Height() const { return height; } + void SetChannels(int s) { channels = s; } + void SetWidth(int w) { width = w; } + void SetHeight(int h) { height = h; } + + // When using CV-CUDA/CUDA, please set input/output cache, + // refer to manager.cc + FDTensor *input_cache = nullptr; + FDTensor *output_cache = nullptr; +#ifdef WITH_GPU + cudaStream_t Stream() const { return stream; } + void SetStream(cudaStream_t s) { stream = s; } +#endif + + // Transfer the vision::Mat to FDTensor + void ShareWithTensor(FDTensor *tensor); + // Only support copy to cpu tensor now + bool CopyToTensor(FDTensor *tensor); + + // Debug functions + // TODO(jiangjiajun) Develop a right process pipeline with c++ + // is not a easy things, Will add more debug function here to + // help debug processed image. This function will print shape + // and mean of each channels of the Mat + void PrintInfo(const std::string &flag); + + ProcLib mat_type = ProcLib::OPENCV; + Layout layout = Layout::HWC; + Device device = Device::CPU; + ProcLib proc_lib = ProcLib::DEFAULT; + + // Create FD Mat from FD Tensor. This method only create a + // new FD Mat with zero copy and it's data pointer is reference + // to the original memory buffer of input FD Tensor. Carefully, + // any operation on this Mat may change memory that points to + // FDTensor. We assume that the memory Mat points to is mutable. + // This method will create a FD Mat according to current global + // default ProcLib (OPENCV,FLYCV,...). + static Mat Create(const FDTensor &tensor); + static Mat Create(const FDTensor &tensor, ProcLib lib); + static Mat Create(int height, int width, int channels, FDDataType type, + void *data); + static Mat Create(int height, int width, int channels, FDDataType type, + void *data, ProcLib lib); +}; + +typedef Mat FDMat; +/* + * @brief Wrap a cv::Mat to FDMat, there's no memory copy, memory buffer is + * managed by user + */ +ULTRAINFER_DECL FDMat WrapMat(const cv::Mat &image); +/* + * Warp a vector to vector, there's no memory copy, memory + * buffer is managed by user + */ +ULTRAINFER_DECL std::vector WrapMat(const std::vector &images); + +bool CheckShapeConsistency(std::vector *mats); + +// Create an input tensor on GPU and save into input_cache. +// If the Mat is on GPU, return the mat->Tensor() directly. +// If the Mat is on CPU, then update the input cache tensor and copy the mat's +// CPU tensor to this new GPU input cache tensor. +FDTensor *CreateCachedGpuInputTensor(Mat *mat); +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc new file mode 100755 index 0000000000..88f4f53769 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/common/processors/mat_batch.h" + +namespace ultrainfer { +namespace vision { + +#ifdef WITH_GPU +void FDMatBatch::SetStream(cudaStream_t s) { + stream = s; + for (size_t i = 0; i < mats->size(); ++i) { + (*mats)[i].SetStream(s); + } +} +#endif + +FDTensor *FDMatBatch::Tensor() { + if (has_batched_tensor) { + return fd_tensor.get(); + } + FDASSERT(mats != nullptr, "Failed to get batched tensor, Mats are empty."); + FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent."); + // Each mat has its own tensor, + // to get a batched tensor, we need copy these tensors to a batched tensor + FDTensor *src = (*mats)[0].Tensor(); + device = src->device; + auto new_shape = src->Shape(); + new_shape.insert(new_shape.begin(), mats->size()); + input_cache->Resize(new_shape, src->Dtype(), "batch_input_cache", device); + for (size_t i = 0; i < mats->size(); ++i) { + FDASSERT(device == (*mats)[i].Tensor()->device, + "Mats and MatBatch are not on the same device"); + uint8_t *p = reinterpret_cast(input_cache->Data()); + int num_bytes = (*mats)[i].Tensor()->Nbytes(); + FDTensor::CopyBuffer(p + i * num_bytes, (*mats)[i].Tensor()->Data(), + num_bytes, device, false); + } + SetTensor(input_cache); + return fd_tensor.get(); +} + +void FDMatBatch::SetTensor(FDTensor *tensor) { + fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(), + tensor->device, tensor->device_id); + device = tensor->device; + has_batched_tensor = true; +} + +FDTensor *CreateCachedGpuInputTensor(FDMatBatch *mat_batch) { +#ifdef WITH_GPU + // Get the batched tensor + FDTensor *src = mat_batch->Tensor(); + // Need to make sure the returned tensor is pointed to the input_cache. + if (src->Data() == mat_batch->output_cache->Data()) { + std::swap(mat_batch->input_cache, mat_batch->output_cache); + std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name); + } + if (src->device == Device::GPU) { + return src; + } else if (src->device == Device::CPU) { + // Batched tensor on CPU, we need copy it to GPU + mat_batch->output_cache->Resize(src->Shape(), src->Dtype(), "output_cache", + Device::GPU); + FDASSERT(cudaMemcpyAsync(mat_batch->output_cache->Data(), src->Data(), + src->Nbytes(), cudaMemcpyHostToDevice, + mat_batch->Stream()) == 0, + "[ERROR] Error occurs while copy memory from CPU to GPU."); + std::swap(mat_batch->input_cache, mat_batch->output_cache); + std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name); + return mat_batch->input_cache; + } else { + FDASSERT(false, "FDMatBatch is on unsupported device: %d", src->device); + } +#else + FDASSERT(false, "UltraInfer didn't compile with WITH_GPU."); +#endif + return nullptr; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h new file mode 100755 index 0000000000..0c11915786 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/vision/common/processors/mat.h" + +#ifdef WITH_GPU +#include +#endif + +namespace ultrainfer { +namespace vision { + +enum FDMatBatchLayout { NHWC, NCHW }; + +/*! @brief FDMatBatch contains batch data for preprocess + */ +struct ULTRAINFER_DECL FDMatBatch { + FDMatBatch() = default; + + // MatBatch is intialized with a list of mats, + // the data is stored in the mats separately. + // Call Tensor() function to get a batched 4-dimension tensor. + explicit FDMatBatch(std::vector *_mats) { + mats = _mats; + layout = FDMatBatchLayout::NHWC; + mat_type = ProcLib::OPENCV; + } + + // Get the batched 4-dimension tensor. + FDTensor *Tensor(); + + void SetTensor(FDTensor *tensor); + +private: +#ifdef WITH_GPU + cudaStream_t stream = nullptr; +#endif + std::shared_ptr fd_tensor = std::make_shared(); + +public: + // When using CV-CUDA/CUDA, please set input/output cache, + // refer to manager.cc + FDTensor *input_cache; + FDTensor *output_cache; +#ifdef WITH_GPU + cudaStream_t Stream() const { return stream; } + void SetStream(cudaStream_t s); +#endif + + std::vector *mats = nullptr; + + // Used by pybind, since python cannot pass list as pointer or reference + std::vector mats_holder; + + ProcLib mat_type = ProcLib::OPENCV; + FDMatBatchLayout layout = FDMatBatchLayout::NHWC; + Device device = Device::CPU; + ProcLib proc_lib = ProcLib::DEFAULT; + + // False: the data is stored in the mats separately + // True: the data is stored in the fd_tensor continuously in 4 dimensions + bool has_batched_tensor = false; +}; + +// Create a batched input tensor on GPU and save into input_cache. +// If the MatBatch is on GPU, return the Tensor() directly. +// If the MatBatch is on CPU, then copy the CPU tensors to GPU and get a GPU +// batched input tensor. +FDTensor *CreateCachedGpuInputTensor(FDMatBatch *mat_batch); + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc new file mode 100755 index 0000000000..447d07e83e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindFDMatBatch(pybind11::module &m) { + pybind11::class_(m, "FDMatBatch") + .def(pybind11::init<>(), "Default constructor") + .def_readwrite("input_cache", &vision::FDMatBatch::input_cache) + .def_readwrite("output_cache", &vision::FDMatBatch::output_cache) + .def_readwrite("mats", &vision::FDMatBatch::mats) + .def("from_mats", + [](vision::FDMatBatch &self, std::vector &_mats) { + self.mats_holder = _mats; + self.mats = &(self.mats_holder); + }); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc new file mode 100755 index 0000000000..2bf591e1c7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindFDMat(pybind11::module &m) { + pybind11::class_(m, "FDMat") + .def(pybind11::init<>(), "Default constructor") + .def_readwrite("input_cache", &vision::FDMat::input_cache) + .def_readwrite("output_cache", &vision::FDMat::output_cache) + .def("from_numpy", + [](vision::FDMat &self, pybind11::array &pyarray) { + self = vision::WrapMat(PyArrayToCvMat(pyarray)); + }) + .def("print_info", &vision::FDMat::PrintInfo); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc new file mode 100755 index 0000000000..7f5cc49086 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/normalize.h" + +namespace ultrainfer { +namespace vision { +Normalize::Normalize(const std::vector &mean, + const std::vector &std, bool is_scale, + const std::vector &min, + const std::vector &max, bool swap_rb) { + FDASSERT(mean.size() == std.size(), + "Normalize: requires the size of mean equal to the size of std."); + std::vector mean_(mean.begin(), mean.end()); + std::vector std_(std.begin(), std.end()); + std::vector min_(mean.size(), 0.0); + std::vector max_(mean.size(), 255.0); + if (min.size() != 0) { + FDASSERT( + min.size() == mean.size(), + "Normalize: while min is defined, requires the size of min equal to " + "the size of mean."); + min_.assign(min.begin(), min.end()); + } + if (max.size() != 0) { + FDASSERT( + min.size() == mean.size(), + "Normalize: while max is defined, requires the size of max equal to " + "the size of mean."); + max_.assign(max.begin(), max.end()); + } + for (auto c = 0; c < mean_.size(); ++c) { + double alpha = 1.0; + if (is_scale) { + alpha /= (max_[c] - min_[c]); + } + double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c]; + alpha /= std_[c]; + alpha_.push_back(alpha); + beta_.push_back(beta); + } + swap_rb_ = swap_rb; +} + +bool Normalize::ImplByOpenCV(Mat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + + std::vector split_im; + cv::split(*im, split_im); + if (swap_rb_) + std::swap(split_im[0], split_im[2]); + for (int c = 0; c < im->channels(); c++) { + split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); + } + cv::merge(split_im, *im); + return true; +} + +#ifdef ENABLE_FLYCV +bool Normalize::ImplByFlyCV(Mat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "Only supports 3-channels image in FlyCV, but now it's " + << im->channels() << "." << std::endl; + return false; + } + + std::vector mean(3, 0); + std::vector std(3, 0); + for (size_t i = 0; i < 3; ++i) { + std[i] = 1.0 / alpha_[i]; + mean[i] = -1 * beta_[i] * std[i]; + } + + std::vector channel_reorder_index = {0, 1, 2}; + if (swap_rb_) + std::swap(channel_reorder_index[0], channel_reorder_index[2]); + + fcv::Mat new_im(im->width(), im->height(), fcv::FCVImageType::PKG_BGR_F32); + fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index, + new_im, true); + mat->SetMat(new_im); + return true; +} +#endif + +bool Normalize::Run(Mat *mat, const std::vector &mean, + const std::vector &std, bool is_scale, + const std::vector &min, + const std::vector &max, ProcLib lib, bool swap_rb) { + auto n = Normalize(mean, std, is_scale, min, max, swap_rb); + return n(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu new file mode 100755 index 0000000000..1f31689898 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu @@ -0,0 +1,117 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef WITH_GPU +#include "ultrainfer/vision/common/processors/normalize.h" + +namespace ultrainfer { +namespace vision { + +__global__ void NormalizeKernel(const uint8_t *src, float *dst, + const float *alpha, const float *beta, + int num_channel, bool swap_rb, int batch_size, + int edge) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + if (idx >= edge) + return; + + int img_size = edge / batch_size; + int n = idx / img_size; // batch index + int p = idx - (n * img_size); // pixel index within the image + + for (int i = 0; i < num_channel; ++i) { + int j = i; + if (swap_rb) { + j = 2 - i; + } + dst[num_channel * idx + j] = + src[num_channel * idx + j] * alpha[i] + beta[i]; + } +} + +bool Normalize::ImplByCuda(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "The input data must be NHWC format!" << std::endl; + return false; + } + + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + src->ExpandDim(0); + FDMatBatch mat_batch; + mat_batch.SetTensor(src); + mat_batch.mat_type = ProcLib::CUDA; + mat_batch.input_cache = mat->input_cache; + mat_batch.output_cache = mat->output_cache; + + bool ret = ImplByCuda(&mat_batch); + + FDTensor *dst = mat_batch.Tensor(); + dst->Squeeze(0); + mat->SetTensor(dst); + mat->mat_type = ProcLib::CUDA; + return true; +} + +bool Normalize::ImplByCuda(FDMatBatch *mat_batch) { + if (mat_batch->layout != FDMatBatchLayout::NHWC) { + FDERROR << "The input data must be NHWC format!" << std::endl; + return false; + } + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat_batch); + + // Prepare output tensor + mat_batch->output_cache->Resize(src->Shape(), FDDataType::FP32, + "batch_output_cache", Device::GPU); + + // Copy alpha and beta to GPU + gpu_alpha_.Resize({1, 1, static_cast(alpha_.size())}, FDDataType::FP32, + "alpha", Device::GPU); + cudaMemcpy(gpu_alpha_.Data(), alpha_.data(), gpu_alpha_.Nbytes(), + cudaMemcpyHostToDevice); + + gpu_beta_.Resize({1, 1, static_cast(beta_.size())}, FDDataType::FP32, + "beta", Device::GPU); + cudaMemcpy(gpu_beta_.Data(), beta_.data(), gpu_beta_.Nbytes(), + cudaMemcpyHostToDevice); + + int jobs = + mat_batch->output_cache->Numel() / mat_batch->output_cache->shape[3]; + int threads = 256; + int blocks = ceil(jobs / (float)threads); + NormalizeKernel<<Stream()>>>( + reinterpret_cast(src->Data()), + reinterpret_cast(mat_batch->output_cache->Data()), + reinterpret_cast(gpu_alpha_.Data()), + reinterpret_cast(gpu_beta_.Data()), + mat_batch->output_cache->shape[3], swap_rb_, + mat_batch->output_cache->shape[0], jobs); + + mat_batch->SetTensor(mat_batch->output_cache); + mat_batch->mat_type = ProcLib::CUDA; + return true; +} + +#ifdef ENABLE_CVCUDA +bool Normalize::ImplByCvCuda(FDMat *mat) { return ImplByCuda(mat); } + +bool Normalize::ImplByCvCuda(FDMatBatch *mat_batch) { + return ImplByCuda(mat_batch); +} +#endif + +} // namespace vision +} // namespace ultrainfer +#endif diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h new file mode 100755 index 0000000000..7625ef35aa --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h @@ -0,0 +1,90 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { +/*! @brief Processor for Normalize images with given paramters. + */ +class ULTRAINFER_DECL Normalize : public Processor { +public: + Normalize(const std::vector &mean, const std::vector &std, + bool is_scale = true, + const std::vector &min = std::vector(), + const std::vector &max = std::vector(), + bool swap_rb = false); + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif +#ifdef WITH_GPU + bool ImplByCuda(FDMat *mat); + bool ImplByCuda(FDMatBatch *mat_batch); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); + bool ImplByCvCuda(FDMatBatch *mat_batch); +#endif + std::string Name() { return "Normalize"; } + + // While use normalize, it is more recommend not use this function + // this function will need to compute result = ((mat / 255) - mean) / std + // if we use the following method + // ``` + // auto norm = Normalize(...) + // norm(mat) + // ``` + // There will be some precomputation in contruct function + // and the `norm(mat)` only need to compute result = mat * alpha + beta + // which will reduce lots of time + /** \brief Process the input images + * + * \param[in] mat The input image data, `result = mat * alpha + beta` + * \param[in] mean target mean vector of output images + * \param[in] std target std vector of output images + * \param[in] max max value vector to be in target image + * \param[in] min min value vector to be in target image + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \param[in] swap_rb to define whether to swap r and b channel order + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, const std::vector &mean, + const std::vector &std, bool is_scale = true, + const std::vector &min = std::vector(), + const std::vector &max = std::vector(), + ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false); + + std::vector GetAlpha() const { return alpha_; } + std::vector GetBeta() const { return beta_; } + + bool GetSwapRB() { return swap_rb_; } + + /** \brief Process the input images + * + * \param[in] swap_rb set the value of the swap_rb parameter + */ + void SetSwapRB(bool swap_rb) { swap_rb_ = swap_rb; } + +private: + std::vector alpha_; + std::vector beta_; + FDTensor gpu_alpha_; + FDTensor gpu_beta_; + bool swap_rb_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc new file mode 100755 index 0000000000..d724b38ac4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc @@ -0,0 +1,124 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/normalize_and_permute.h" + +namespace ultrainfer { +namespace vision { + +NormalizeAndPermute::NormalizeAndPermute(const std::vector &mean, + const std::vector &std, + bool is_scale, + const std::vector &min, + const std::vector &max, + bool swap_rb) { + FDASSERT(mean.size() == std.size(), + "Normalize: requires the size of mean equal to the size of std."); + std::vector mean_(mean.begin(), mean.end()); + std::vector std_(std.begin(), std.end()); + std::vector min_(mean.size(), 0.0); + std::vector max_(mean.size(), 255.0); + if (min.size() != 0) { + FDASSERT( + min.size() == mean.size(), + "Normalize: while min is defined, requires the size of min equal to " + "the size of mean."); + min_.assign(min.begin(), min.end()); + } + if (max.size() != 0) { + FDASSERT( + min.size() == mean.size(), + "Normalize: while max is defined, requires the size of max equal to " + "the size of mean."); + max_.assign(max.begin(), max.end()); + } + for (auto c = 0; c < mean_.size(); ++c) { + double alpha = 1.0; + if (is_scale) { + alpha /= (max_[c] - min_[c]); + } + double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c]; + alpha /= std_[c]; + alpha_.push_back(alpha); + beta_.push_back(beta); + } + swap_rb_ = swap_rb; +} + +bool NormalizeAndPermute::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int origin_w = im->cols; + int origin_h = im->rows; + std::vector split_im; + cv::split(*im, split_im); + if (swap_rb_) + std::swap(split_im[0], split_im[2]); + for (int c = 0; c < im->channels(); c++) { + split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); + } + cv::Mat res(origin_h, origin_w, CV_32FC(im->channels())); + for (int i = 0; i < im->channels(); ++i) { + cv::extractChannel(split_im[i], + cv::Mat(origin_h, origin_w, CV_32FC1, + res.ptr() + i * origin_h * origin_w * 4), + 0); + } + mat->SetMat(res); + mat->layout = Layout::CHW; + return true; +} + +#ifdef ENABLE_FLYCV +bool NormalizeAndPermute::ImplByFlyCV(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Only supports input with HWC layout." << std::endl; + return false; + } + fcv::Mat *im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "Only supports 3-channels image in FlyCV, but now it's " + << im->channels() << "." << std::endl; + return false; + } + std::vector mean(3, 0); + std::vector std(3, 0); + for (size_t i = 0; i < 3; ++i) { + std[i] = 1.0 / alpha_[i]; + mean[i] = -1 * beta_[i] * std[i]; + } + + std::vector channel_reorder_index = {0, 1, 2}; + if (swap_rb_) + std::swap(channel_reorder_index[0], channel_reorder_index[2]); + + fcv::Mat new_im; + fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index, + new_im, false); + mat->SetMat(new_im); + mat->layout = Layout::CHW; + return true; +} +#endif + +bool NormalizeAndPermute::Run(FDMat *mat, const std::vector &mean, + const std::vector &std, bool is_scale, + const std::vector &min, + const std::vector &max, ProcLib lib, + bool swap_rb) { + auto n = NormalizeAndPermute(mean, std, is_scale, min, max, swap_rb); + return n(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu new file mode 100755 index 0000000000..177bb1c193 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu @@ -0,0 +1,134 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef WITH_GPU +#include "ultrainfer/vision/common/processors/normalize_and_permute.h" + +namespace ultrainfer { +namespace vision { + +__global__ void NormalizeAndPermuteKernel(const uint8_t *src, float *dst, + const float *alpha, const float *beta, + int num_channel, bool swap_rb, + int batch_size, int edge) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + if (idx >= edge) + return; + + int img_size = edge / batch_size; + int n = idx / img_size; // batch index + int p = idx - (n * img_size); // pixel index within the image + + for (int i = 0; i < num_channel; ++i) { + int j = i; + if (swap_rb) { + j = 2 - i; + } + dst[n * img_size * num_channel + i * img_size + p] = + src[num_channel * idx + j] * alpha[i] + beta[i]; + } +} + +bool NormalizeAndPermute::ImplByCuda(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Only supports input with HWC layout." << std::endl; + return false; + } + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + + // Prepare output tensor + mat->output_cache->Resize({src->shape[2], src->shape[0], src->shape[1]}, + FDDataType::FP32, "output_cache", Device::GPU); + + // Copy alpha and beta to GPU + gpu_alpha_.Resize({1, 1, static_cast(alpha_.size())}, FDDataType::FP32, + "alpha", Device::GPU); + cudaMemcpy(gpu_alpha_.Data(), alpha_.data(), gpu_alpha_.Nbytes(), + cudaMemcpyHostToDevice); + + gpu_beta_.Resize({1, 1, static_cast(beta_.size())}, FDDataType::FP32, + "beta", Device::GPU); + cudaMemcpy(gpu_beta_.Data(), beta_.data(), gpu_beta_.Nbytes(), + cudaMemcpyHostToDevice); + + int jobs = 1 * mat->Width() * mat->Height(); + int threads = 256; + int blocks = ceil(jobs / (float)threads); + NormalizeAndPermuteKernel<<Stream()>>>( + reinterpret_cast(src->Data()), + reinterpret_cast(mat->output_cache->Data()), + reinterpret_cast(gpu_alpha_.Data()), + reinterpret_cast(gpu_beta_.Data()), mat->Channels(), swap_rb_, 1, + jobs); + + mat->layout = Layout::CHW; + mat->SetTensor(mat->output_cache); + mat->mat_type = ProcLib::CUDA; + return true; +} + +bool NormalizeAndPermute::ImplByCuda(FDMatBatch *mat_batch) { + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat_batch); + + // Prepare output tensor + mat_batch->output_cache->Resize(src->Shape(), FDDataType::FP32, + "batch_output_cache", Device::GPU); + // NHWC -> NCHW + std::swap(mat_batch->output_cache->shape[1], + mat_batch->output_cache->shape[3]); + std::swap(mat_batch->output_cache->shape[2], + mat_batch->output_cache->shape[3]); + + // Copy alpha and beta to GPU + gpu_alpha_.Resize({1, 1, static_cast(alpha_.size())}, FDDataType::FP32, + "alpha", Device::GPU); + cudaMemcpy(gpu_alpha_.Data(), alpha_.data(), gpu_alpha_.Nbytes(), + cudaMemcpyHostToDevice); + + gpu_beta_.Resize({1, 1, static_cast(beta_.size())}, FDDataType::FP32, + "beta", Device::GPU); + cudaMemcpy(gpu_beta_.Data(), beta_.data(), gpu_beta_.Nbytes(), + cudaMemcpyHostToDevice); + + int jobs = + mat_batch->output_cache->Numel() / mat_batch->output_cache->shape[1]; + int threads = 256; + int blocks = ceil(jobs / (float)threads); + NormalizeAndPermuteKernel<<Stream()>>>( + reinterpret_cast(src->Data()), + reinterpret_cast(mat_batch->output_cache->Data()), + reinterpret_cast(gpu_alpha_.Data()), + reinterpret_cast(gpu_beta_.Data()), + mat_batch->output_cache->shape[1], swap_rb_, + mat_batch->output_cache->shape[0], jobs); + + mat_batch->SetTensor(mat_batch->output_cache); + mat_batch->layout = FDMatBatchLayout::NCHW; + mat_batch->mat_type = ProcLib::CUDA; + return true; +} + +#ifdef ENABLE_CVCUDA +bool NormalizeAndPermute::ImplByCvCuda(FDMat *mat) { return ImplByCuda(mat); } + +bool NormalizeAndPermute::ImplByCvCuda(FDMatBatch *mat_batch) { + return ImplByCuda(mat_batch); +} +#endif + +} // namespace vision +} // namespace ultrainfer +#endif diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h new file mode 100755 index 0000000000..877749dcd0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { +/*! @brief Processor for Normalize and Permute images from HWC to CHW. + */ +class ULTRAINFER_DECL NormalizeAndPermute : public Processor { +public: + NormalizeAndPermute(const std::vector &mean, + const std::vector &std, bool is_scale = true, + const std::vector &min = std::vector(), + const std::vector &max = std::vector(), + bool swap_rb = false); + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif +#ifdef WITH_GPU + bool ImplByCuda(FDMat *mat); + bool ImplByCuda(FDMatBatch *mat_batch); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); + bool ImplByCvCuda(FDMatBatch *mat_batch); +#endif + std::string Name() { return "NormalizeAndPermute"; } + + // While use normalize, it is more recommend not use this function + // this function will need to compute result = ((mat / 255) - mean) / std + // if we use the following method + // ``` + // auto norm = Normalize(...) + // norm(mat) + // ``` + // There will be some precomputation in contruct function + // and the `norm(mat)` only need to compute result = mat * alpha + beta + // which will reduce lots of time + /** \brief Process the input images + * + * \param[in] mat The input image data, `result = mat * alpha + beta` + * \param[in] mean target mean vector of output images + * \param[in] std target std vector of output images + * \param[in] max max value vector to be in target image + * \param[in] min min value vector to be in target image + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \param[in] swap_rb to define whether to swap r and b channel order + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, const std::vector &mean, + const std::vector &std, bool is_scale = true, + const std::vector &min = std::vector(), + const std::vector &max = std::vector(), + ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false); + + /** \brief Process the input images + * + * \param[in] alpha set the value of the alpha parameter + */ + void SetAlpha(const std::vector &alpha) { + alpha_.clear(); + std::vector().swap(alpha_); + alpha_.assign(alpha.begin(), alpha.end()); + } + + /** \brief Process the input images + * + * \param[in] beta set the value of the beta parameter + */ + void SetBeta(const std::vector &beta) { + beta_.clear(); + std::vector().swap(beta_); + beta_.assign(beta.begin(), beta.end()); + } + + bool GetSwapRB() { return swap_rb_; } + + /** \brief Process the input images + * + * \param[in] swap_rb set the value of the swap_rb parameter + */ + void SetSwapRB(bool swap_rb) { swap_rb_ = swap_rb; } + +private: + std::vector alpha_; + std::vector beta_; + FDTensor gpu_alpha_; + FDTensor gpu_beta_; + bool swap_rb_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc new file mode 100755 index 0000000000..b8e4b8f503 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindNormalizeAndPermute(pybind11::module &m) { + pybind11::class_( + m, "NormalizeAndPermute") + .def(pybind11::init, std::vector, bool, + std::vector, std::vector, bool>(), + "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc new file mode 100755 index 0000000000..489ba7322e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc @@ -0,0 +1,24 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindNormalize(pybind11::module &m) { + pybind11::class_(m, "Normalize") + .def(pybind11::init, std::vector, bool, + std::vector, std::vector, bool>(), + "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc new file mode 100755 index 0000000000..4ca38c4382 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc @@ -0,0 +1,152 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/pad.h" + +namespace ultrainfer { +namespace vision { + +bool Pad::ImplByOpenCV(Mat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "Pad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR << "Pad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() + << ", the size of padding values = " << value_.size() << "." + << std::endl; + return false; + } + cv::Mat *im = mat->GetOpenCVMat(); + cv::Scalar value; + if (value_.size() == 1) { + value = cv::Scalar(value_[0]); + } else if (value_.size() == 2) { + value = cv::Scalar(value_[0], value_[1]); + } else if (value_.size() == 3) { + value = cv::Scalar(value_[0], value_[1], value_[2]); + } else { + value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); + } + cv::copyMakeBorder(*im, *im, top_, bottom_, left_, right_, + cv::BORDER_CONSTANT, value); + mat->SetHeight(im->rows); + mat->SetWidth(im->cols); + return true; +} + +#ifdef ENABLE_FLYCV +bool Pad::ImplByFlyCV(Mat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "Pad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR << "Pad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() + << ", the size of padding values = " << value_.size() << "." + << std::endl; + return false; + } + fcv::Mat *im = mat->GetFlyCVMat(); + fcv::Scalar value; + if (value_.size() == 1) { + value = fcv::Scalar(value_[0]); + } else if (value_.size() == 2) { + value = fcv::Scalar(value_[0], value_[1]); + } else if (value_.size() == 3) { + value = fcv::Scalar(value_[0], value_[1], value_[2]); + } else { + value = fcv::Scalar(value_[0], value_[1], value_[2], value_[3]); + } + fcv::Mat new_im; + fcv::copy_make_border(*im, new_im, top_, bottom_, left_, right_, + fcv::BorderType::BORDER_CONSTANT, value); + mat->SetMat(new_im); + mat->SetHeight(new_im.height()); + mat->SetWidth(new_im.width()); + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool Pad::ImplByCvCuda(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "Pad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR << "Pad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() + << ", the size of padding values = " << value_.size() << "." + << std::endl; + return false; + } + + float4 value; + if (value_.size() == 1) { + value = make_float4(value_[0], 0.0f, 0.0f, 0.0f); + } else if (value_.size() == 2) { + value = make_float4(value_[0], value_[1], 0.0f, 0.0f); + } else if (value_.size() == 3) { + value = make_float4(value_[0], value_[1], value_[2], 0.0f); + } else { + value = make_float4(value_[0], value_[1], value_[2], value_[3]); + } + + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + int height = mat->Height() + top_ + bottom_; + int width = mat->Width() + left_ + right_; + + // Prepare output tensor + mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(), + "output_cache", Device::GPU); + auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); + + cvcuda_pad_op_(mat->Stream(), *src_tensor, *dst_tensor, top_, left_, + NVCV_BORDER_CONSTANT, value); + + mat->SetTensor(mat->output_cache); + mat->mat_type = ProcLib::CVCUDA; + return true; +} +#endif + +bool Pad::Run(Mat *mat, const int &top, const int &bottom, const int &left, + const int &right, const std::vector &value, ProcLib lib) { + auto p = Pad(top, bottom, left, right, value); + return p(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad.h b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.h new file mode 100755 index 0000000000..5984a58858 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.h @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for padding images. + */ +class ULTRAINFER_DECL Pad : public Processor { +public: + Pad(int top, int bottom, int left, int right, + const std::vector &value) { + top_ = top; + bottom_ = bottom; + left_ = left; + right_ = right; + value_ = value; + } + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); +#endif + std::string Name() { return "Pad"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data, `result = mat * alpha + beta` + * \param[in] top top pad size of the output image. + * \param[in] bottom bottom pad size of the output image. + * \param[in] left left pad size of the output image. + * \param[in] right right pad size of the output image. + * \param[in] value value vector used by padding of the output image. + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, const int &top, const int &bottom, const int &left, + const int &right, const std::vector &value, + ProcLib lib = ProcLib::DEFAULT); + + /** \brief Process the input images + * + * \param[in] top set the value of the top parameter + * \param[in] bottom set the value of the bottom parameter + * \param[in] left set the value of the left parameter + * \param[in] right set the value of the right parameter + */ + bool SetPaddingSize(int top, int bottom, int left, int right) { + top_ = top; + bottom_ = bottom; + left_ = left; + right_ = right; + return true; + } + +private: + int top_; + int bottom_; + int left_; + int right_; + std::vector value_; +#ifdef ENABLE_CVCUDA + cvcuda::CopyMakeBorder cvcuda_pad_op_; +#endif +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc new file mode 100755 index 0000000000..a9886872bb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPad(pybind11::module &m) { + pybind11::class_(m, "Pad").def( + pybind11::init>(), + "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc new file mode 100755 index 0000000000..25c36b55a6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc @@ -0,0 +1,272 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/pad_to_size.h" + +#include "ultrainfer/vision/common/processors/utils.h" + +namespace ultrainfer { +namespace vision { + +static bool PadHWCByOpenCV(FDMat *mat, int width, int height, + const std::vector &value) { + int origin_w = mat->Width(); + int origin_h = mat->Height(); + cv::Mat *im = mat->GetOpenCVMat(); + cv::Scalar scalar; + if (value.size() == 1) { + scalar = cv::Scalar(value[0]); + } else if (value.size() == 2) { + scalar = cv::Scalar(value[0], value[1]); + } else if (value.size() == 3) { + scalar = cv::Scalar(value[0], value[1], value[2]); + } else { + scalar = cv::Scalar(value[0], value[1], value[2], value[3]); + } + // top, bottom, left, right + cv::copyMakeBorder(*im, *im, 0, height - origin_h, 0, width - origin_w, + cv::BORDER_CONSTANT, scalar); + mat->SetHeight(height); + mat->SetWidth(width); + return true; +} + +static bool PadCHWByOpenCV(FDMat *mat, int width, int height, + const std::vector &value) { + int origin_w = mat->Width(); + int origin_h = mat->Height(); + cv::Mat *im = mat->GetOpenCVMat(); + cv::Mat new_im(height, width, + CreateOpenCVDataType(mat->Type(), mat->Channels())); + + for (int i = 0; i < mat->Channels(); ++i) { + uint8_t *src_data = + im->ptr() + i * origin_w * origin_h * FDDataTypeSize(mat->Type()); + cv::Mat src(origin_h, origin_w, CreateOpenCVDataType(mat->Type(), 1), + src_data); + + uint8_t *dst_data = + new_im.ptr() + i * width * height * FDDataTypeSize(mat->Type()); + cv::Mat dst(height, width, CreateOpenCVDataType(mat->Type(), 1), dst_data); + + cv::copyMakeBorder(src, dst, 0, height - origin_h, 0, width - origin_w, + cv::BORDER_CONSTANT, cv::Scalar(value[i])); + } + mat->SetMat(new_im); + mat->SetHeight(height); + mat->SetWidth(width); + return true; +} + +bool PadToSize::CheckArgs(FDMat *mat) { + if (mat->Channels() > 4) { + FDERROR << "PadToSize: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR + << "PadToSize: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() << ", the size of padding values = " << value_.size() + << "." << std::endl; + return false; + } + if (mat->Width() > width_) { + FDERROR << "PadToSize: the input width:" << mat->Width() + << " is greater than the target width: " << width_ << "." + << std::endl; + return false; + } + if (mat->Height() > height_) { + FDERROR << "PadToSize: the input height:" << mat->Height() + << " is greater than the target height: " << height_ << "." + << std::endl; + return false; + } + return true; +} + +bool PadToSize::ImplByOpenCV(FDMat *mat) { + if (width_ == -1 || height_ == -1 || + (mat->Width() == width_ && mat->Height() == height_)) { + return true; + } + if (CheckArgs(mat) == false) { + return false; + } + if (mat->layout == Layout::HWC) { + return PadHWCByOpenCV(mat, width_, height_, value_); + } else if (mat->layout == Layout::CHW) { + return PadCHWByOpenCV(mat, width_, height_, value_); + } + return false; +} + +#ifdef ENABLE_FLYCV +static bool PadHWCByFlyCV(FDMat *mat, int width, int height, + const std::vector &value) { + int origin_w = mat->Width(); + int origin_h = mat->Height(); + fcv::Mat *im = mat->GetFlyCVMat(); + fcv::Scalar scalar; + if (value.size() == 1) { + scalar = fcv::Scalar(value[0]); + } else if (value.size() == 2) { + scalar = fcv::Scalar(value[0], value[1]); + } else if (value.size() == 3) { + scalar = fcv::Scalar(value[0], value[1], value[2]); + } else { + scalar = fcv::Scalar(value[0], value[1], value[2], value[3]); + } + fcv::Mat new_im; + // top, bottom, left, right + fcv::copy_make_border(*im, new_im, 0, height - origin_h, 0, width - origin_w, + fcv::BorderType::BORDER_CONSTANT, scalar); + mat->SetMat(new_im); + mat->SetHeight(height); + mat->SetWidth(width); + return true; +} + +static bool PadCHWByFlyCV(FDMat *mat, int width, int height, + const std::vector &value) { + int origin_w = mat->Width(); + int origin_h = mat->Height(); + fcv::Mat new_im(height, width, + CreateFlyCVDataType(mat->Type(), mat->Channels())); + for (int i = 0; i < mat->Channels(); ++i) { + uint8_t *src_data = reinterpret_cast(mat->Data()) + + i * origin_w * origin_h * FDDataTypeSize(mat->Type()); + fcv::Mat src(origin_h, origin_w, CreateFlyCVDataType(mat->Type(), 1), + src_data); + + uint8_t *dst_data = reinterpret_cast(new_im.data()) + + i * width * height * FDDataTypeSize(mat->Type()); + fcv::Mat dst(height, width, CreateFlyCVDataType(mat->Type(), 1), dst_data); + + fcv::copy_make_border(src, dst, 0, height - origin_h, 0, width - origin_w, + fcv::BorderType::BORDER_CONSTANT, + fcv::Scalar(value[i])); + } + mat->SetMat(new_im); + mat->SetHeight(height); + mat->SetWidth(width); + return true; +} + +bool PadToSize::ImplByFlyCV(FDMat *mat) { + if (width_ == -1 || height_ == -1 || + (mat->Width() == width_ && mat->Height() == height_)) { + return true; + } + if (CheckArgs(mat) == false) { + return false; + } + if (mat->layout == Layout::HWC) { + return PadHWCByFlyCV(mat, width_, height_, value_); + } else if (mat->layout == Layout::CHW) { + return PadCHWByFlyCV(mat, width_, height_, value_); + } + return false; +} +#endif + +#ifdef ENABLE_CVCUDA +static bool PadHWCByCvCuda(cvcuda::CopyMakeBorder &pad_op, FDMat *mat, + int width, int height, + const std::vector &value) { + float4 border_value; + if (value.size() == 1) { + border_value = make_float4(value[0], 0.0f, 0.0f, 0.0f); + } else if (value.size() == 2) { + border_value = make_float4(value[0], value[1], 0.0f, 0.0f); + } else if (value.size() == 3) { + border_value = make_float4(value[0], value[1], value[2], 0.0f); + } else { + border_value = make_float4(value[0], value[1], value[2], value[3]); + } + + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + // Prepare output tensor + mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(), + "output_cache", Device::GPU); + auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); + + pad_op(mat->Stream(), *src_tensor, *dst_tensor, 0, 0, NVCV_BORDER_CONSTANT, + border_value); + + mat->SetTensor(mat->output_cache); + mat->mat_type = ProcLib::CVCUDA; + return true; +} + +static bool PadCHWByCvCuda(cvcuda::CopyMakeBorder &pad_op, FDMat *mat, + int width, int height, + const std::vector &value) { + float4 border_value = make_float4(value[0], 0.0f, 0.0f, 0.0f); + FDTensor *input = CreateCachedGpuInputTensor(mat); + int channels = input->shape[0]; + mat->output_cache->Resize({channels, height, width}, mat->Type(), + "output_cache", Device::GPU); + for (int i = 0; i < channels; ++i) { + uint8_t *src_data = + reinterpret_cast(input->Data()) + + i * mat->Width() * mat->Height() * FDDataTypeSize(mat->Type()); + FDTensor src; + src.SetExternalData({mat->Height(), mat->Width(), 1}, input->Dtype(), + src_data, input->device, input->device_id); + auto src_tensor = CreateCvCudaTensorWrapData(src); + + uint8_t *dst_data = reinterpret_cast(mat->output_cache->Data()) + + i * width * height * FDDataTypeSize(mat->Type()); + FDTensor dst; + dst.SetExternalData({height, width, 1}, input->Dtype(), dst_data, + input->device, input->device_id); + auto dst_tensor = CreateCvCudaTensorWrapData(dst); + + pad_op(mat->Stream(), (*src_tensor), (*dst_tensor), 0, 0, + NVCV_BORDER_CONSTANT, border_value); + } + mat->SetTensor(mat->output_cache); + mat->mat_type = ProcLib::CVCUDA; + return true; +} +bool PadToSize::ImplByCvCuda(FDMat *mat) { + if (width_ == -1 || height_ == -1 || + (mat->Width() == width_ && mat->Height() == height_)) { + return true; + } + if (CheckArgs(mat) == false) { + return false; + } + if (mat->layout == Layout::HWC) { + return PadHWCByCvCuda(cvcuda_pad_op_, mat, width_, height_, value_); + } else if (mat->layout == Layout::CHW) { + return PadCHWByCvCuda(cvcuda_pad_op_, mat, width_, height_, value_); + } + return false; +} +#endif + +bool PadToSize::Run(Mat *mat, int width, int height, + const std::vector &value, ProcLib lib) { + auto p = PadToSize(width, height, value); + return p(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h new file mode 100755 index 0000000000..1c7e49627a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for padding images to given size. + */ +class ULTRAINFER_DECL PadToSize : public Processor { +public: + // only support pad with right-bottom padding mode + PadToSize(int width, int height, const std::vector &value) { + width_ = width; + height_ = height; + value_ = value; + } + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); +#endif + std::string Name() { return "PadToSize"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data, `result = mat * alpha + beta` + * \param[in] width width of the output image. + * \param[in] height height of the output image. + * \param[in] value value vector used by padding of the output image. + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, int width, int height, + const std::vector &value, + ProcLib lib = ProcLib::DEFAULT); + + /** \brief Process the input images + * + * \param[in] width set the value of the width parameter + * \param[in] height set the value of the height parameter + */ + void SetWidthHeight(int width, int height) { + width_ = width; + height_ = height; + } + +private: + bool CheckArgs(FDMat *mat); + int width_; + int height_; + std::vector value_; +#ifdef ENABLE_CVCUDA + cvcuda::CopyMakeBorder cvcuda_pad_op_; +#endif +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc new file mode 100755 index 0000000000..3ea2694434 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPadToSize(pybind11::module &m) { + pybind11::class_(m, "PadToSize") + .def(pybind11::init>(), + "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc new file mode 100755 index 0000000000..017b2dd751 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/proc_lib.h" + +namespace ultrainfer { +namespace vision { + +ProcLib DefaultProcLib::default_lib = ProcLib::DEFAULT; + +std::ostream &operator<<(std::ostream &out, const ProcLib &p) { + switch (p) { + case ProcLib::DEFAULT: + out << "ProcLib::DEFAULT"; + break; + case ProcLib::OPENCV: + out << "ProcLib::OPENCV"; + break; + case ProcLib::FLYCV: + out << "ProcLib::FLYCV"; + break; + case ProcLib::CUDA: + out << "ProcLib::CUDA"; + break; + case ProcLib::CVCUDA: + out << "ProcLib::CVCUDA"; + break; + default: + FDASSERT(false, "Unknow type of ProcLib."); + } + return out; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h new file mode 100755 index 0000000000..5ec49687d6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace vision { + +enum class ULTRAINFER_DECL ProcLib { DEFAULT, OPENCV, FLYCV, CUDA, CVCUDA }; + +ULTRAINFER_DECL std::ostream &operator<<(std::ostream &out, const ProcLib &p); + +struct ULTRAINFER_DECL DefaultProcLib { + // default_lib has the highest priority + // all the function in `processor` will force to use + // default_lib if this flag is set. + // DEFAULT means this flag is not set + static ProcLib default_lib; +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc new file mode 100755 index 0000000000..28f1e7570d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindProcessorManager(pybind11::module &m); +void BindNormalizeAndPermute(pybind11::module &m); +void BindProcessor(pybind11::module &m); +void BindResizeByShort(pybind11::module &m); +void BindCenterCrop(pybind11::module &m); +void BindPad(pybind11::module &m); +void BindCast(pybind11::module &m); +void BindHWC2CHW(pybind11::module &m); +void BindNormalize(pybind11::module &m); +void BindPadToSize(pybind11::module &m); +void BindResize(pybind11::module &m); +void BindStridePad(pybind11::module &m); + +void BindProcessors(pybind11::module &m) { + auto processors_m = + m.def_submodule("processors", "Module to deploy Processors models"); + BindProcessorManager(processors_m); + BindProcessor(processors_m); + BindNormalizeAndPermute(processors_m); + BindResizeByShort(processors_m); + BindCenterCrop(processors_m); + BindPad(processors_m); + BindCast(processors_m); + BindHWC2CHW(processors_m); + BindNormalize(processors_m); + BindPadToSize(processors_m); + BindResize(processors_m); + BindStridePad(processors_m); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc new file mode 100755 index 0000000000..c4605ab421 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc @@ -0,0 +1,171 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/resize.h" + +namespace ultrainfer { +namespace vision { + +bool Resize::ImplByOpenCV(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Resize: The format of input is not HWC." << std::endl; + return false; + } + cv::Mat *im = mat->GetOpenCVMat(); + int origin_w = im->cols; + int origin_h = im->rows; + + if (width_ == origin_w && height_ == origin_h) { + return true; + } + if (fabs(scale_w_ - 1.0) < 1e-06 && fabs(scale_h_ - 1.0) < 1e-06) { + return true; + } + + if (width_ > 0 && height_ > 0) { + if (use_scale_) { + float scale_w = width_ * 1.0 / origin_w; + float scale_h = height_ * 1.0 / origin_h; + cv::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_); + } else { + cv::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_); + } + } else if (scale_w_ > 0 && scale_h_ > 0) { + cv::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_); + } else { + FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) " + "or (scale_w > 0 && scale_h > 0)." + << std::endl; + return false; + } + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + return true; +} + +#ifdef ENABLE_FLYCV +bool Resize::ImplByFlyCV(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Resize: The format of input is not HWC." << std::endl; + return false; + } + fcv::Mat *im = mat->GetFlyCVMat(); + int origin_w = im->width(); + int origin_h = im->height(); + + if (width_ == origin_w && height_ == origin_h) { + return true; + } + if (fabs(scale_w_ - 1.0) < 1e-06 && fabs(scale_h_ - 1.0) < 1e-06) { + return true; + } + + auto interp_method = fcv::InterpolationType::INTER_LINEAR; + if (interp_ == 0) { + interp_method = fcv::InterpolationType::INTER_NEAREST; + } else if (interp_ == 1) { + interp_method = fcv::InterpolationType::INTER_LINEAR; + } else if (interp_ == 2) { + interp_method = fcv::InterpolationType::INTER_CUBIC; + } else if (interp_ == 3) { + interp_method = fcv::InterpolationType::INTER_AREA; + } else { + FDERROR << "Resize: Only support interp_ be 0/1/2/3 with FlyCV, but " + "now it's " + << interp_ << "." << std::endl; + return false; + } + + if (width_ > 0 && height_ > 0) { + fcv::Mat new_im; + if (use_scale_) { + float scale_w = width_ * 1.0 / origin_w; + float scale_h = height_ * 1.0 / origin_h; + fcv::resize(*im, new_im, fcv::Size(), scale_w, scale_h, interp_method); + } else { + fcv::resize(*im, new_im, fcv::Size(width_, height_), 0, 0, interp_method); + } + mat->SetMat(new_im); + mat->SetWidth(new_im.width()); + mat->SetHeight(new_im.height()); + } else if (scale_w_ > 0 && scale_h_ > 0) { + fcv::Mat new_im; + fcv::resize(*im, new_im, fcv::Size(0, 0), scale_w_, scale_h_, + interp_method); + mat->SetMat(new_im); + mat->SetWidth(new_im.width()); + mat->SetHeight(new_im.height()); + } else { + FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) " + "or (scale_w > 0 && scale_h > 0)." + << std::endl; + return false; + } + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool Resize::ImplByCvCuda(FDMat *mat) { + if (width_ == mat->Width() && height_ == mat->Height()) { + return true; + } + if (fabs(scale_w_ - 1.0) < 1e-06 && fabs(scale_h_ - 1.0) < 1e-06) { + return true; + } + + if (width_ > 0 && height_ > 0) { + } else if (scale_w_ > 0 && scale_h_ > 0) { + width_ = std::round(scale_w_ * mat->Width()); + height_ = std::round(scale_h_ * mat->Height()); + } else { + FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) " + "or (scale_w > 0 && scale_h > 0)." + << std::endl; + return false; + } + + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + // Prepare output tensor + mat->output_cache->Resize({height_, width_, mat->Channels()}, mat->Type(), + "output_cache", Device::GPU); + auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); + + // CV-CUDA Interp value is compatible with OpenCV + cvcuda_resize_op_(mat->Stream(), *src_tensor, *dst_tensor, + CreateCvCudaInterp(interp_)); + + mat->SetTensor(mat->output_cache); + mat->SetWidth(width_); + mat->SetHeight(height_); + mat->device = Device::GPU; + mat->mat_type = ProcLib::CVCUDA; + return true; +} +#endif + +bool Resize::Run(FDMat *mat, int width, int height, float scale_w, + float scale_h, int interp, bool use_scale, ProcLib lib) { + if (mat->Height() == height && mat->Width() == width) { + return true; + } + auto r = Resize(width, height, scale_w, scale_h, interp, use_scale); + return r(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize.h b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.h new file mode 100755 index 0000000000..ee3e0dc31d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.h @@ -0,0 +1,93 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for Resize images. + */ +class ULTRAINFER_DECL Resize : public Processor { +public: + Resize(int width, int height, float scale_w = -1.0, float scale_h = -1.0, + int interp = 1, bool use_scale = false) { + width_ = width; + height_ = height; + scale_w_ = scale_w; + scale_h_ = scale_h; + interp_ = interp; + use_scale_ = use_scale; + } + + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); +#endif + std::string Name() { return "Resize"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data, `result = mat * alpha + beta` + * \param[in] width width of the output image. + * \param[in] height height of the output image. + * \param[in] scale_w scale of width, deafult is -1.0. + * \param[in] scale_h scale of height, deafult is -1.0. + * \param[in] interp interpolation method, deafult is 1. + * \param[in] use_scale to define wheather to scale the image, deafult is + * true. \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(FDMat *mat, int width, int height, float scale_w = -1.0, + float scale_h = -1.0, int interp = 1, bool use_scale = false, + ProcLib lib = ProcLib::DEFAULT); + + /** \brief Process the input images + * + * \param[in] width set the value of the width parameter + * \param[in] height set the value of the height parameter + */ + bool SetWidthAndHeight(int width, int height) { + width_ = width; + height_ = height; + return true; + } + + std::tuple GetWidthAndHeight() { + return std::make_tuple(width_, height_); + } + +private: + int width_; + int height_; + float scale_w_ = -1.0; + float scale_h_ = -1.0; + int interp_ = 1; + bool use_scale_ = false; +#ifdef ENABLE_CVCUDA + cvcuda::Resize cvcuda_resize_op_; +#endif +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc new file mode 100755 index 0000000000..91b568ae96 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc @@ -0,0 +1,188 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/resize_by_short.h" + +namespace ultrainfer { +namespace vision { + +bool ResizeByShort::ImplByOpenCV(FDMat *mat) { + cv::Mat *im = mat->GetOpenCVMat(); + int origin_w = im->cols; + int origin_h = im->rows; + double scale = GenerateScale(origin_w, origin_h); + if (use_scale_ && fabs(scale - 1.0) >= 1e-06) { + cv::resize(*im, *im, cv::Size(), scale, scale, interp_); + } else { + int width = static_cast(round(scale * im->cols)); + int height = static_cast(round(scale * im->rows)); + if (width != origin_w || height != origin_h) { + cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_); + } + } + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + return true; +} + +#ifdef ENABLE_FLYCV +bool ResizeByShort::ImplByFlyCV(FDMat *mat) { + fcv::Mat *im = mat->GetFlyCVMat(); + int origin_w = im->width(); + int origin_h = im->height(); + double scale = GenerateScale(origin_w, origin_h); + + auto interp_method = fcv::InterpolationType::INTER_LINEAR; + if (interp_ == 0) { + interp_method = fcv::InterpolationType::INTER_NEAREST; + } else if (interp_ == 1) { + interp_method = fcv::InterpolationType::INTER_LINEAR; + } else if (interp_ == 2) { + interp_method = fcv::InterpolationType::INTER_CUBIC; + } else if (interp_ == 3) { + interp_method = fcv::InterpolationType::INTER_AREA; + } else { + FDERROR << "LimitByShort: Only support interp_ be 0/1/2/3 with FlyCV, but " + "now it's " + << interp_ << "." << std::endl; + return false; + } + + if (use_scale_ && fabs(scale - 1.0) >= 1e-06) { + fcv::Mat new_im; + fcv::resize(*im, new_im, fcv::Size(), scale, scale, interp_method); + mat->SetMat(new_im); + mat->SetHeight(new_im.height()); + mat->SetWidth(new_im.width()); + } else { + int width = static_cast(round(scale * im->width())); + int height = static_cast(round(scale * im->height())); + if (width != origin_w || height != origin_h) { + fcv::Mat new_im; + fcv::resize(*im, new_im, fcv::Size(width, height), 0, 0, interp_method); + mat->SetMat(new_im); + mat->SetHeight(new_im.height()); + mat->SetWidth(new_im.width()); + } + } + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool ResizeByShort::ImplByCvCuda(FDMat *mat) { + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + double scale = GenerateScale(mat->Width(), mat->Height()); + int width = static_cast(round(scale * mat->Width())); + int height = static_cast(round(scale * mat->Height())); + + // Prepare output tensor + mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(), + "output_cache", Device::GPU); + auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); + + cvcuda_resize_op_(mat->Stream(), *src_tensor, *dst_tensor, + CreateCvCudaInterp(interp_)); + + mat->SetTensor(mat->output_cache); + mat->SetWidth(width); + mat->SetHeight(height); + mat->device = Device::GPU; + mat->mat_type = ProcLib::CVCUDA; + return true; +} + +bool ResizeByShort::ImplByCvCuda(FDMatBatch *mat_batch) { + // TODO(wangxinyu): to support batched tensor as input + FDASSERT(mat_batch->has_batched_tensor == false, + "ResizeByShort doesn't support batched tensor as input for now."); + // Prepare input batch + std::string tensor_name = Name() + "_cvcuda_src"; + std::vector src_tensors; + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + FDTensor *src = CreateCachedGpuInputTensor(&(*(mat_batch->mats))[i]); + src_tensors.push_back(src); + } + nvcv::ImageBatchVarShape src_batch(mat_batch->mats->size()); + CreateCvCudaImageBatchVarShape(src_tensors, src_batch); + + // Prepare output batch + tensor_name = Name() + "_cvcuda_dst"; + std::vector dst_tensors; + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + FDMat *mat = &(*(mat_batch->mats))[i]; + double scale = GenerateScale(mat->Width(), mat->Height()); + int width = static_cast(round(scale * mat->Width())); + int height = static_cast(round(scale * mat->Height())); + mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(), + "output_cache", Device::GPU); + dst_tensors.push_back(mat->output_cache); + } + nvcv::ImageBatchVarShape dst_batch(mat_batch->mats->size()); + CreateCvCudaImageBatchVarShape(dst_tensors, dst_batch); + + // CV-CUDA Interp value is compatible with OpenCV + cvcuda_resize_op_(mat_batch->Stream(), src_batch, dst_batch, + CreateCvCudaInterp(interp_)); + + for (size_t i = 0; i < mat_batch->mats->size(); ++i) { + FDMat *mat = &(*(mat_batch->mats))[i]; + mat->SetTensor(dst_tensors[i]); + mat->SetWidth(dst_tensors[i]->Shape()[1]); + mat->SetHeight(dst_tensors[i]->Shape()[0]); + mat->device = Device::GPU; + mat->mat_type = ProcLib::CVCUDA; + } + mat_batch->device = Device::GPU; + mat_batch->mat_type = ProcLib::CVCUDA; + return true; +} +#endif + +double ResizeByShort::GenerateScale(const int origin_w, const int origin_h) { + int im_size_max = std::max(origin_w, origin_h); + int im_size_min = std::min(origin_w, origin_h); + double scale = + static_cast(target_size_) / static_cast(im_size_min); + + if (max_hw_.size() > 0) { + FDASSERT(max_hw_.size() == 2, + "Require size of max_hw_ be 2, but now it's %zu.", max_hw_.size()); + FDASSERT( + max_hw_[0] > 0 && max_hw_[1] > 0, + "Require elements in max_hw_ greater than 0, but now it's [%d, %d].", + max_hw_[0], max_hw_[1]); + + double scale_h = + static_cast(max_hw_[0]) / static_cast(origin_h); + double scale_w = + static_cast(max_hw_[1]) / static_cast(origin_w); + double min_scale = std::min(scale_h, scale_w); + if (min_scale < scale) { + scale = min_scale; + } + } + return scale; +} + +bool ResizeByShort::Run(FDMat *mat, int target_size, int interp, bool use_scale, + const std::vector &max_hw, ProcLib lib) { + auto r = ResizeByShort(target_size, interp, use_scale, max_hw); + return r(mat, lib); +} +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h new file mode 100755 index 0000000000..b4d0f14251 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for resize images by short edge. + */ +class ULTRAINFER_DECL ResizeByShort : public Processor { +public: + ResizeByShort(int target_size, int interp = 1, bool use_scale = true, + const std::vector &max_hw = std::vector()) { + target_size_ = target_size; + max_hw_ = max_hw; + interp_ = interp; + use_scale_ = use_scale; + } + bool ImplByOpenCV(FDMat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); + bool ImplByCvCuda(FDMatBatch *mat_batch); +#endif + std::string Name() { return "ResizeByShort"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data, `result = mat * alpha + beta` + * \param[in] target_size target size of the output image. + * \param[in] interp interpolation method, deafult is 1. + * \param[in] use_scale to define wheather to scale the image, deafult is + * true. \param[in] max_hw max HW fo output image. \param[in] lib to define + * OpenCV or FlyCV or CVCUDA will be used. \return true if the process + * successed, otherwise false + */ + static bool Run(FDMat *mat, int target_size, int interp = 1, + bool use_scale = true, + const std::vector &max_hw = std::vector(), + ProcLib lib = ProcLib::DEFAULT); + +private: + double GenerateScale(const int origin_w, const int origin_h); + int target_size_; + std::vector max_hw_; + int interp_; + bool use_scale_; +#ifdef ENABLE_CVCUDA + cvcuda::Resize cvcuda_resize_op_; +#endif +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc new file mode 100755 index 0000000000..79ad41037d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindResizeByShort(pybind11::module &m) { + pybind11::class_(m, "ResizeByShort") + .def(pybind11::init>(), + "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc new file mode 100755 index 0000000000..0f8a1f4ceb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindResize(pybind11::module &m) { + pybind11::class_(m, "Resize") + .def(pybind11::init(), + "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc new file mode 100755 index 0000000000..9d0f292e1c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc @@ -0,0 +1,186 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/stride_pad.h" + +namespace ultrainfer { +namespace vision { + +bool StridePad::ImplByOpenCV(Mat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "StridePad: The input data must be Layout::HWC format!" + << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "StridePad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR + << "StridePad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() << ", the size of padding values = " << value_.size() + << "." << std::endl; + return false; + } + int origin_w = mat->Width(); + int origin_h = mat->Height(); + + int pad_h = (mat->Height() / stride_) * stride_ + + (mat->Height() % stride_ != 0) * stride_ - mat->Height(); + int pad_w = (mat->Width() / stride_) * stride_ + + (mat->Width() % stride_ != 0) * stride_ - mat->Width(); + if (pad_h == 0 && pad_w == 0) { + return true; + } + cv::Mat *im = mat->GetOpenCVMat(); + cv::Scalar value; + if (value_.size() == 1) { + value = cv::Scalar(value_[0]); + } else if (value_.size() == 2) { + value = cv::Scalar(value_[0], value_[1]); + } else if (value_.size() == 3) { + value = cv::Scalar(value_[0], value_[1], value_[2]); + } else { + value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); + } + // top, bottom, left, right + cv::copyMakeBorder(*im, *im, 0, pad_h, 0, pad_w, cv::BORDER_CONSTANT, value); + mat->SetHeight(origin_h + pad_h); + mat->SetWidth(origin_w + pad_w); + return true; +} + +#ifdef ENABLE_FLYCV +bool StridePad::ImplByFlyCV(Mat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "StridePad: The input data must be Layout::HWC format!" + << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "StridePad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR + << "StridePad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() << ", the size of padding values = " << value_.size() + << "." << std::endl; + return false; + } + int origin_w = mat->Width(); + int origin_h = mat->Height(); + + int pad_h = (mat->Height() / stride_) * stride_ + + (mat->Height() % stride_ != 0) * stride_ - mat->Height(); + int pad_w = (mat->Width() / stride_) * stride_ + + (mat->Width() % stride_ != 0) * stride_ - mat->Width(); + if (pad_h == 0 && pad_w == 0) { + return true; + } + fcv::Mat *im = mat->GetFlyCVMat(); + fcv::Scalar value; + if (value_.size() == 1) { + value = fcv::Scalar(value_[0]); + } else if (value_.size() == 2) { + value = fcv::Scalar(value_[0], value_[1]); + } else if (value_.size() == 3) { + value = fcv::Scalar(value_[0], value_[1], value_[2]); + } else { + value = fcv::Scalar(value_[0], value_[1], value_[2], value_[3]); + } + fcv::Mat new_im; + // top, bottom, left, right + fcv::copy_make_border(*im, new_im, 0, pad_h, 0, pad_w, + fcv::BorderType::BORDER_CONSTANT, value); + mat->SetMat(new_im); + mat->SetHeight(new_im.height()); + mat->SetWidth(new_im.width()); + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool StridePad::ImplByCvCuda(FDMat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "StridePad: The input data must be Layout::HWC format!" + << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "StridePad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR + << "StridePad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() << ", the size of padding values = " << value_.size() + << "." << std::endl; + return false; + } + int origin_w = mat->Width(); + int origin_h = mat->Height(); + + int pad_h = (mat->Height() / stride_) * stride_ + + (mat->Height() % stride_ != 0) * stride_ - mat->Height(); + int pad_w = (mat->Width() / stride_) * stride_ + + (mat->Width() % stride_ != 0) * stride_ - mat->Width(); + if (pad_h == 0 && pad_w == 0) { + return true; + } + + float4 value; + if (value_.size() == 1) { + value = make_float4(value_[0], 0.0f, 0.0f, 0.0f); + } else if (value_.size() == 2) { + value = make_float4(value_[0], value_[1], 0.0f, 0.0f); + } else if (value_.size() == 3) { + value = make_float4(value_[0], value_[1], value_[2], 0.0f); + } else { + value = make_float4(value_[0], value_[1], value_[2], value_[3]); + } + + // Prepare input tensor + FDTensor *src = CreateCachedGpuInputTensor(mat); + auto src_tensor = CreateCvCudaTensorWrapData(*src); + + int height = mat->Height() + pad_h; + int width = mat->Width() + pad_w; + + // Prepare output tensor + mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(), + "output_cache", Device::GPU); + auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); + + cvcuda_pad_op_(mat->Stream(), *src_tensor, *dst_tensor, 0, 0, + NVCV_BORDER_CONSTANT, value); + + mat->SetTensor(mat->output_cache); + mat->mat_type = ProcLib::CVCUDA; + return true; +} +#endif + +bool StridePad::Run(Mat *mat, int stride, const std::vector &value, + ProcLib lib) { + auto p = StridePad(stride, value); + return p(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h new file mode 100755 index 0000000000..5e873c4b4f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h @@ -0,0 +1,65 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" +#ifdef ENABLE_CVCUDA +#include + +#include "ultrainfer/vision/common/processors/cvcuda_utils.h" +#endif + +namespace ultrainfer { +namespace vision { + +/*! @brief Processor for padding images with stride. + */ +class ULTRAINFER_DECL StridePad : public Processor { +public: + // only support pad with left-top padding mode + StridePad(int stride, const std::vector &value) { + stride_ = stride; + value_ = value; + } + bool ImplByOpenCV(Mat *mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(Mat *mat); +#endif +#ifdef ENABLE_CVCUDA + bool ImplByCvCuda(FDMat *mat); +#endif + std::string Name() { return "StridePad"; } + + /** \brief Process the input images + * + * \param[in] mat The input image data, `result = mat * alpha + beta` + * \param[in] stride stride of the padding. + * \param[in] value value vector used by padding of the output image. + * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used. + * \return true if the process successed, otherwise false + */ + static bool Run(Mat *mat, int stride, + const std::vector &value = std::vector(), + ProcLib lib = ProcLib::DEFAULT); + +private: + int stride_ = 32; + std::vector value_; +#ifdef ENABLE_CVCUDA + cvcuda::CopyMakeBorder cvcuda_pad_op_; +#endif +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc new file mode 100755 index 0000000000..ddc579bdba --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindStridePad(pybind11::module &m) { + pybind11::class_(m, "StridePad") + .def(pybind11::init>(), "Default constructor"); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc new file mode 100755 index 0000000000..7bc2818945 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc @@ -0,0 +1,169 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/transform.h" + +namespace ultrainfer { +namespace vision { + +void FuseNormalizeCast(std::vector> *processors) { + // Fuse Normalize and Cast + int cast_index = -1; + for (size_t i = 0; i < processors->size(); ++i) { + if ((*processors)[i]->Name() == "Cast") { + if (i == 0) { + continue; + } + if ((*processors)[i - 1]->Name() != "Normalize" && + (*processors)[i - 1]->Name() != "NormalizeAndPermute") { + continue; + } + cast_index = i; + } + } + if (cast_index < 0) { + return; + } + + if (dynamic_cast((*processors)[cast_index].get())->GetDtype() != + "float") { + return; + } + processors->erase(processors->begin() + cast_index); + FDINFO << (*processors)[cast_index - 1]->Name() << " and Cast are fused to " + << (*processors)[cast_index - 1]->Name() + << " in preprocessing pipeline." << std::endl; +} + +void FuseNormalizeHWC2CHW(std::vector> *processors) { + // Fuse Normalize and HWC2CHW to NormalizeAndPermute + int hwc2chw_index = -1; + for (size_t i = 0; i < processors->size(); ++i) { + if ((*processors)[i]->Name() == "HWC2CHW") { + if (i == 0) { + continue; + } + if ((*processors)[i - 1]->Name() != "Normalize") { + continue; + } + hwc2chw_index = i; + } + } + + if (hwc2chw_index < 0) { + return; + } + + // Get alpha and beta of Normalize + std::vector alpha = + dynamic_cast((*processors)[hwc2chw_index - 1].get()) + ->GetAlpha(); + std::vector beta = + dynamic_cast((*processors)[hwc2chw_index - 1].get()) + ->GetBeta(); + + // Delete Normalize and HWC2CHW + processors->erase(processors->begin() + hwc2chw_index); + processors->erase(processors->begin() + hwc2chw_index - 1); + + // Add NormalizeAndPermute + std::vector mean({0.0, 0.0, 0.0}); + std::vector std({1.0, 1.0, 1.0}); + processors->push_back(std::make_shared(mean, std)); + + // Set alpha and beta + auto processor = dynamic_cast( + (*processors)[hwc2chw_index - 1].get()); + + processor->SetAlpha(alpha); + processor->SetBeta(beta); + FDINFO << "Normalize and HWC2CHW are fused to NormalizeAndPermute " + " in preprocessing pipeline." + << std::endl; +} + +void FuseNormalizeColorConvert( + std::vector> *processors) { + // Fuse Normalize and BGR2RGB/RGB2BGR + int normalize_index = -1; + int color_convert_index = -1; + // If these middle processors are after BGR2RGB/RGB2BGR and before Normalize, + // we can still fuse Normalize and BGR2RGB/RGB2BGR + static std::unordered_set middle_processors( + {"Resize", "ResizeByShort", "ResizeByLong", "Crop", "CenterCrop", + "LimitByStride", "LimitShort", "Pad", "PadToSize", "StridePad", + "WarpAffine"}); + + for (size_t i = 0; i < processors->size(); ++i) { + if ((*processors)[i]->Name() == "BGR2RGB" || + (*processors)[i]->Name() == "RGB2BGR") { + color_convert_index = i; + for (size_t j = color_convert_index + 1; j < processors->size(); ++j) { + if ((*processors)[j]->Name() == "Normalize" || + (*processors)[j]->Name() == "NormalizeAndPermute") { + normalize_index = j; + break; + } + } + if (normalize_index < 0) { + return; + } + for (size_t j = color_convert_index + 1; j < normalize_index; ++j) { + if (middle_processors.count((*processors)[j]->Name())) { + continue; + } + return; + } + } + } + + if (color_convert_index < 0) { + return; + } + + // Delete Color Space Convert + std::string color_processor_name = (*processors)[color_convert_index]->Name(); + processors->erase(processors->begin() + color_convert_index); + + // Toggle the swap_rb option of the Normalize processor + std::string normalize_processor_name = + (*processors)[normalize_index - 1]->Name(); + bool swap_rb; + if (normalize_processor_name == "Normalize") { + auto processor = + dynamic_cast((*processors)[normalize_index - 1].get()); + swap_rb = processor->GetSwapRB(); + processor->SetSwapRB(!swap_rb); + } else if (normalize_processor_name == "NormalizeAndPermute") { + auto processor = dynamic_cast( + (*processors)[normalize_index - 1].get()); + swap_rb = processor->GetSwapRB(); + processor->SetSwapRB(!swap_rb); + } else { + FDASSERT(false, "Something wrong in FuseNormalizeColorConvert()."); + } + + FDINFO << color_processor_name << " and " << normalize_processor_name + << " are fused to " << normalize_processor_name + << " with swap_rb=" << !swap_rb << std::endl; +} + +void FuseTransforms(std::vector> *processors) { + FuseNormalizeCast(processors); + FuseNormalizeHWC2CHW(processors); + FuseNormalizeColorConvert(processors); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/transform.h b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.h new file mode 100755 index 0000000000..dc3da75361 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/cast.h" +#include "ultrainfer/vision/common/processors/center_crop.h" +#include "ultrainfer/vision/common/processors/color_space_convert.h" +#include "ultrainfer/vision/common/processors/convert.h" +#include "ultrainfer/vision/common/processors/convert_and_permute.h" +#include "ultrainfer/vision/common/processors/crop.h" +#include "ultrainfer/vision/common/processors/hwc2chw.h" +#include "ultrainfer/vision/common/processors/limit_by_stride.h" +#include "ultrainfer/vision/common/processors/limit_short.h" +#include "ultrainfer/vision/common/processors/normalize.h" +#include "ultrainfer/vision/common/processors/normalize_and_permute.h" +#include "ultrainfer/vision/common/processors/pad.h" +#include "ultrainfer/vision/common/processors/pad_to_size.h" +#include "ultrainfer/vision/common/processors/resize.h" +#include "ultrainfer/vision/common/processors/resize_by_short.h" +#include "ultrainfer/vision/common/processors/stride_pad.h" +#include "ultrainfer/vision/common/processors/warp_affine.h" +#include + +namespace ultrainfer { +namespace vision { + +void FuseTransforms(std::vector> *processors); +// Fuse Normalize + Cast(Float) to Normalize +void FuseNormalizeCast(std::vector> *processors); +// Fuse Normalize + HWC2CHW to NormalizeAndPermute +void FuseNormalizeHWC2CHW(std::vector> *processors); +// Fuse Normalize + Color Convert +void FuseNormalizeColorConvert( + std::vector> *processors); + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc new file mode 100755 index 0000000000..0ca94fd063 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc @@ -0,0 +1,279 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/utils/utils.h" + +#include "ultrainfer/vision/common/processors/utils.h" + +namespace ultrainfer { +namespace vision { + +FDDataType OpenCVDataTypeToFD(int type) { + type = type % 8; + if (type == 0) { + return FDDataType::UINT8; + } else if (type == 1) { + return FDDataType::INT8; + } else if (type == 2) { + FDASSERT(false, + "While calling OpenCVDataTypeToFD(), get UINT16 type which is not " + "supported now."); + } else if (type == 3) { + return FDDataType::INT16; + } else if (type == 4) { + return FDDataType::INT32; + } else if (type == 5) { + return FDDataType::FP32; + } else if (type == 6) { + return FDDataType::FP64; + } else { + FDASSERT(false, + "While calling OpenCVDataTypeToFD(), get type = %d, which is not " + "expected.", + type); + } +} + +int CreateOpenCVDataType(FDDataType type, int channel) { + FDASSERT(channel == 1 || channel == 3 || channel == 4, + "Only support channel be 1/3/4 in OpenCV."); + if (type == FDDataType::UINT8) { + if (channel == 1) { + return CV_8UC1; + } else if (channel == 3) { + return CV_8UC3; + } else { + return CV_8UC4; + } + } else if (type == FDDataType::FP32) { + if (channel == 1) { + return CV_32FC1; + } else if (channel == 3) { + return CV_32FC3; + } else { + return CV_32FC4; + } + } + FDASSERT(false, "Data type of %s is not supported.", Str(type).c_str()); + return CV_32FC3; +} + +#ifdef ENABLE_FLYCV +FDDataType FlyCVDataTypeToFD(fcv::FCVImageType type) { + if (type == fcv::FCVImageType::GRAY_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_BGR_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_RGB_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_BGR_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_RGB_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PLA_BGR_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PLA_RGB_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PLA_BGRA_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PLA_RGBA_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PLA_BGR_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PLA_RGB_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PLA_BGRA_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PLA_RGBA_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_BGRA_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_RGBA_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_BGRA_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_RGBA_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_BGR565_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::PKG_RGB565_U8) { + return FDDataType::UINT8; + } else if (type == fcv::FCVImageType::GRAY_S32) { + return FDDataType::INT32; + } else if (type == fcv::FCVImageType::GRAY_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_BGR_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_RGB_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_BGR_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_RGB_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_BGRA_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_RGBA_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_BGRA_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::PKG_RGBA_F32) { + return FDDataType::FP32; + } else if (type == fcv::FCVImageType::GRAY_F64) { + return FDDataType::FP64; + } + FDASSERT(false, "While calling FlyCVDataTypeToFD(), get unexpected type:%d.", + int(type)); + return FDDataType::UNKNOWN1; +} + +fcv::FCVImageType CreateFlyCVDataType(FDDataType type, int channel) { + FDASSERT(channel == 1 || channel == 3 || channel == 4, + "Only support channel be 1/3/4 in FlyCV."); + if (type == FDDataType::UINT8) { + if (channel == 1) { + return fcv::FCVImageType::GRAY_U8; + } else if (channel == 3) { + return fcv::FCVImageType::PKG_BGR_U8; + } else { + return fcv::FCVImageType::PKG_BGRA_U8; + } + } else if (type == FDDataType::FP32) { + if (channel == 1) { + return fcv::FCVImageType::GRAY_F32; + } else if (channel == 3) { + return fcv::FCVImageType::PKG_BGR_F32; + } else { + return fcv::FCVImageType::PKG_BGRA_F32; + } + } + FDASSERT(false, "Data type of %s is not supported.", Str(type).c_str()); + return fcv::FCVImageType::PKG_BGR_F32; +} + +fcv::Mat ConvertOpenCVMatToFlyCV(cv::Mat &im) { + int type = im.type() % 8; + // 0: uint8; 5: float32; 6: float64 + if (type != 0 && type != 5 && type != 6) { + FDASSERT(false, "Only support type of uint8/float/double, but now it's %d.", + im.type()); + } + auto fcv_type = + CreateFlyCVDataType(OpenCVDataTypeToFD(im.type()), im.channels()); + return fcv::Mat(im.cols, im.rows, fcv_type, im.ptr()); // reference only +} + +cv::Mat ConvertFlyCVMatToOpenCV(fcv::Mat &fim) { + auto fd_dtype = FlyCVDataTypeToFD(fim.type()); + if (fd_dtype != FDDataType::UINT8 && fd_dtype != FDDataType::FP32 && + fd_dtype != FDDataType::FP64) { + FDASSERT(false, "Only support type of uint8/float/double, but now it's %s.", + Str(fd_dtype).c_str()); + } + auto ocv_type = CreateOpenCVDataType(fd_dtype, fim.channels()); + return cv::Mat(fim.height(), fim.width(), ocv_type, + fim.data()); // reference only +} +#endif + +cv::Mat CreateZeroCopyOpenCVMatFromBuffer(int height, int width, int channels, + FDDataType type, void *data) { + cv::Mat ocv_mat; + switch (type) { + case FDDataType::UINT8: + ocv_mat = cv::Mat(height, width, CV_8UC(channels), data); + break; + case FDDataType::INT8: + ocv_mat = cv::Mat(height, width, CV_8SC(channels), data); + break; + case FDDataType::INT16: + ocv_mat = cv::Mat(height, width, CV_16SC(channels), data); + break; + case FDDataType::INT32: + ocv_mat = cv::Mat(height, width, CV_32SC(channels), data); + break; + case FDDataType::FP32: + ocv_mat = cv::Mat(height, width, CV_32FC(channels), data); + break; + case FDDataType::FP64: + ocv_mat = cv::Mat(height, width, CV_64FC(channels), data); + break; + default: + FDASSERT(false, + "Tensor type %d is not supported While calling " + "CreateZeroCopyOpenCVMat.", + type); + break; + } + return ocv_mat; +} + +cv::Mat CreateZeroCopyOpenCVMatFromTensor(const FDTensor &tensor, + Layout layout) { + FDASSERT(tensor.shape.size() == 3, "When create OepnCV Mat from tensor," + "tensor shape should be 3-Dim"); + FDDataType type = tensor.dtype; + int height = static_cast(tensor.shape[0]); + int width = static_cast(tensor.shape[1]); + int channels = static_cast(tensor.shape[2]); + if (layout == Layout::CHW) { + channels = static_cast(tensor.shape[0]); + height = static_cast(tensor.shape[1]); + width = static_cast(tensor.shape[2]); + } + return CreateZeroCopyOpenCVMatFromBuffer( + height, width, channels, type, const_cast(tensor.CpuData())); +} + +#ifdef ENABLE_FLYCV +fcv::Mat CreateZeroCopyFlyCVMatFromBuffer(int height, int width, int channels, + FDDataType type, void *data) { + fcv::Mat fcv_mat; + auto fcv_type = CreateFlyCVDataType(type, channels); + switch (type) { + case FDDataType::UINT8: + fcv_mat = fcv::Mat(width, height, fcv_type, data); + break; + case FDDataType::FP32: + fcv_mat = fcv::Mat(width, height, fcv_type, data); + break; + case FDDataType::FP64: + fcv_mat = fcv::Mat(width, height, fcv_type, data); + break; + default: + FDASSERT(false, + "Tensor type %d is not supported While calling " + "CreateZeroCopyFlyCVMat.", + type); + break; + } + return fcv_mat; +} + +fcv::Mat CreateZeroCopyFlyCVMatFromTensor(const FDTensor &tensor) { + // TODO(qiuyanjun): Should add a Layout checking. Now, we + // assume that the input tensor is already in Layout::HWC. + FDASSERT(tensor.shape.size() == 3, + "When create FlyCV Mat from tensor," + "tensor shape should be 3-Dim, HWC layout"); + FDDataType type = tensor.dtype; + int height = static_cast(tensor.shape[0]); + int width = static_cast(tensor.shape[1]); + int channels = static_cast(tensor.shape[2]); + return CreateZeroCopyFlyCVMatFromBuffer(height, width, channels, type, + const_cast(tensor.Data())); +} +#endif + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/utils.h b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.h new file mode 100755 index 0000000000..0bf48e6d52 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.h @@ -0,0 +1,55 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "opencv2/core/core.hpp" +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/processors/mat.h" + +#ifdef ENABLE_FLYCV +#include "flycv.h" // NOLINT +#endif + +namespace ultrainfer { +namespace vision { + +// Convert data type of opencv to FDDataType +FDDataType OpenCVDataTypeToFD(int type); +// Create data type of opencv by FDDataType +int CreateOpenCVDataType(FDDataType type, int channel = 1); +#ifdef ENABLE_FLYCV +// Convert data type of flycv to FDDataType +FDDataType FlyCVDataTypeToFD(fcv::FCVImageType type); +// Create data type of flycv by FDDataType +fcv::FCVImageType CreateFlyCVDataType(FDDataType type, int channel = 1); +// Convert cv::Mat to fcv::Mat +fcv::Mat ConvertOpenCVMatToFlyCV(cv::Mat &im); +// Convert fcv::Mat to fcv::mat +cv::Mat ConvertFlyCVMatToOpenCV(fcv::Mat &fim); +#endif + +// Create zero copy OpenCV/FlyCV Mat from FD Tensor / Buffer +cv::Mat CreateZeroCopyOpenCVMatFromBuffer(int height, int width, int channels, + FDDataType type, void *data); +cv::Mat CreateZeroCopyOpenCVMatFromTensor(const FDTensor &tensor, + Layout layout = Layout::HWC); +#ifdef ENABLE_FLYCV +fcv::Mat CreateZeroCopyFlyCVMatFromBuffer(int height, int width, int channels, + FDDataType type, void *data); +fcv::Mat CreateZeroCopyFlyCVMatFromTensor(const FDTensor &tensor); +#endif +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc new file mode 100755 index 0000000000..e1707b05e4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/common/processors/warp_affine.h" + +namespace ultrainfer { +namespace vision { + +bool WarpAffine::ImplByOpenCV(Mat *mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "WarpAffine: The format of input is not HWC." << std::endl; + return false; + } + cv::Mat *im = mat->GetOpenCVMat(); + if (width_ > 0 && height_ > 0) { + cv::warpAffine(*im, *im, trans_matrix_, cv::Size(width_, height_), interp_, + border_mode_, borderValue_); + } else { + FDERROR + << "WarpAffine: the parameters must satisfy (width > 0 && height > 0) ." + << std::endl; + return false; + } + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + + return true; +} + +bool WarpAffine::Run(Mat *mat, const cv::Mat &trans_matrix, int width, + int height, int interp, int border_mode, + const cv::Scalar &borderValue, ProcLib lib) { + auto r = + WarpAffine(trans_matrix, width, height, interp, border_mode, borderValue); + return r(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h new file mode 100755 index 0000000000..9994acb6c0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { + +class WarpAffine : public Processor { +public: + WarpAffine(const cv::Mat &trans_matrix, int width, int height, int interp = 1, + int border_mode = 0, + const cv::Scalar &borderValue = cv::Scalar()) { + trans_matrix_ = trans_matrix; + width_ = width; + height_ = height; + interp_ = interp; + border_mode_ = border_mode; + borderValue_ = borderValue; + } + + bool ImplByOpenCV(Mat *mat); + std::string Name() { return "WarpAffine"; } + + bool SetTransformMatrix(const cv::Mat &trans_matrix) { + trans_matrix_ = trans_matrix; + return true; + } + + std::tuple GetWidthAndHeight() { + return std::make_tuple(width_, height_); + } + + static bool Run(Mat *mat, const cv::Mat &trans_matrix, int width, int height, + int interp = 1, int border_mode = 0, + const cv::Scalar &borderValue = cv::Scalar(), + ProcLib lib = ProcLib::DEFAULT); + +private: + cv::Mat trans_matrix_; + int width_; + int height_; + int interp_; + int border_mode_; + cv::Scalar borderValue_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/result.cc b/libs/ultrainfer/ultrainfer/vision/common/result.cc new file mode 100755 index 0000000000..461da828d1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/result.cc @@ -0,0 +1,944 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +void ClassifyResult::Free() { + std::vector().swap(label_ids); + std::vector().swap(scores); + std::vector().swap(feature); +} + +void ClassifyResult::Clear() { + label_ids.clear(); + scores.clear(); + feature.clear(); +} + +void ClassifyResult::Resize(int size) { + label_ids.resize(size); + scores.resize(size); + // TODO(qiuyanjun): feature not perform resize now. + // may need the code below for future. + // feature.resize(size); +} + +std::string ClassifyResult::Str() { + std::string out; + out = "ClassifyResult(\nlabel_ids: "; + for (size_t i = 0; i < label_ids.size(); ++i) { + out = out + std::to_string(label_ids[i]) + ", "; + } + out += "\nscores: "; + for (size_t i = 0; i < scores.size(); ++i) { + out = out + std::to_string(scores[i]) + ", "; + } + if (!feature.empty()) { + out += "\nfeature: size ("; + out += std::to_string(feature.size()) + "), only show first 100 values.\n"; + for (size_t i = 0; i < feature.size(); ++i) { + // only show first 100 values. + if ((i + 1) <= 100) { + out = out + std::to_string(feature[i]) + ", "; + if ((i + 1) % 10 == 0 && (i + 1) < 100) { + out += "\n"; + } + if ((i + 1) == 100) { + out += "\n......"; + } + } + } + } + out += "\n)"; + return out; +} + +ClassifyResult &ClassifyResult::operator=(ClassifyResult &&other) { + if (&other != this) { + label_ids = std::move(other.label_ids); + scores = std::move(other.scores); + feature = std::move(other.feature); + } + return *this; +} + +void Mask::Reserve(int size) { data.reserve(size); } + +void Mask::Resize(int size) { data.resize(size); } + +void Mask::Free() { + std::vector().swap(data); + std::vector().swap(shape); +} + +void Mask::Clear() { + data.clear(); + shape.clear(); +} + +std::string Mask::Str() { + std::string out = "Mask("; + size_t ndim = shape.size(); + for (size_t i = 0; i < ndim; ++i) { + if (i < ndim - 1) { + out += std::to_string(shape[i]) + ","; + } else { + out += std::to_string(shape[i]); + } + } + out += ")\n"; + return out; +} + +DetectionResult::DetectionResult(const DetectionResult &res) { + boxes.assign(res.boxes.begin(), res.boxes.end()); + rotated_boxes.assign(res.rotated_boxes.begin(), res.rotated_boxes.end()); + scores.assign(res.scores.begin(), res.scores.end()); + label_ids.assign(res.label_ids.begin(), res.label_ids.end()); + contain_masks = res.contain_masks; + if (contain_masks) { + masks.clear(); + size_t mask_size = res.masks.size(); + for (size_t i = 0; i < mask_size; ++i) { + masks.emplace_back(res.masks[i]); + } + } +} + +DetectionResult &DetectionResult::operator=(DetectionResult &&other) { + if (&other != this) { + boxes = std::move(other.boxes); + rotated_boxes = std::move(other.rotated_boxes); + scores = std::move(other.scores); + label_ids = std::move(other.label_ids); + contain_masks = std::move(other.contain_masks); + if (contain_masks) { + masks.clear(); + masks = std::move(other.masks); + } + } + return *this; +} + +void DetectionResult::Free() { + std::vector>().swap(boxes); + std::vector>().swap(rotated_boxes); + std::vector().swap(scores); + std::vector().swap(label_ids); + std::vector().swap(masks); + contain_masks = false; +} + +void DetectionResult::Clear() { + boxes.clear(); + rotated_boxes.clear(); + scores.clear(); + label_ids.clear(); + masks.clear(); + contain_masks = false; +} + +void DetectionResult::Reserve(int size) { + boxes.reserve(size); + rotated_boxes.reserve(size); + scores.reserve(size); + label_ids.reserve(size); + if (contain_masks) { + masks.reserve(size); + } +} + +void DetectionResult::Resize(int size) { + boxes.resize(size); + rotated_boxes.resize(size); + scores.resize(size); + label_ids.resize(size); + if (contain_masks) { + masks.resize(size); + } +} + +std::string DetectionResult::Str() { + std::string out; + if (!contain_masks) { + out = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]\n"; + if (!rotated_boxes.empty()) { + out = "DetectionResult: [x1, y1, x2, y2, x3, y3, x4, y4, score, " + "label_id]\n"; + } + } else { + out = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id, " + "mask_shape]\n"; + if (!rotated_boxes.empty()) { + out = + "DetectionResult: [x1, y1, x2, y2, x3, y3, x4, y4, score, label_id, " + "mask_shape]\n"; + } + } + for (size_t i = 0; i < boxes.size(); ++i) { + out = out + std::to_string(boxes[i][0]) + "," + + std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + + ", " + std::to_string(boxes[i][3]) + ", " + + std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]); + if (!contain_masks) { + out += "\n"; + } else { + out += ", " + masks[i].Str(); + } + } + + for (size_t i = 0; i < rotated_boxes.size(); ++i) { + out = out + std::to_string(rotated_boxes[i][0]) + "," + + std::to_string(rotated_boxes[i][1]) + ", " + + std::to_string(rotated_boxes[i][2]) + ", " + + std::to_string(rotated_boxes[i][3]) + ", " + + std::to_string(rotated_boxes[i][4]) + "," + + std::to_string(rotated_boxes[i][5]) + ", " + + std::to_string(rotated_boxes[i][6]) + ", " + + std::to_string(rotated_boxes[i][7]) + ", " + + std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]); + out += "\n"; + } + return out; +} + +// PerceptionResult ----------------------------------------------------- +PerceptionResult::PerceptionResult(const PerceptionResult &res) { + scores.assign(res.scores.begin(), res.scores.end()); + label_ids.assign(res.label_ids.begin(), res.label_ids.end()); + boxes.assign(res.boxes.begin(), res.boxes.end()); + center.assign(res.center.begin(), res.center.end()); + observation_angle.assign(res.observation_angle.begin(), + res.observation_angle.end()); + yaw_angle.assign(res.yaw_angle.begin(), res.yaw_angle.end()); + velocity.assign(res.velocity.begin(), res.velocity.end()); + valid.assign(res.valid.begin(), res.valid.end()); +} + +PerceptionResult &PerceptionResult::operator=(PerceptionResult &&other) { + if (&other != this) { + scores = std::move(other.scores); + label_ids = std::move(other.label_ids); + boxes = std::move(other.boxes); + center = std::move(other.center); + observation_angle = std::move(other.observation_angle); + yaw_angle = std::move(other.yaw_angle); + velocity = std::move(other.velocity); + valid = std::move(other.valid); + } + return *this; +} + +void PerceptionResult::Free() { + std::vector().swap(scores); + std::vector().swap(label_ids); + std::vector>().swap(boxes); + std::vector>().swap(center); + std::vector().swap(observation_angle); + std::vector().swap(yaw_angle); + std::vector>().swap(velocity); + std::vector().swap(valid); +} + +void PerceptionResult::Clear() { + scores.clear(); + label_ids.clear(); + boxes.clear(); + center.clear(); + observation_angle.clear(); + yaw_angle.clear(); + velocity.clear(); + valid.clear(); +} + +void PerceptionResult::Reserve(int size) { + scores.reserve(size); + label_ids.reserve(size); + boxes.reserve(size); + center.reserve(size); + observation_angle.reserve(size); + yaw_angle.reserve(size); + velocity.reserve(size); +} + +void PerceptionResult::Resize(int size) { + scores.resize(size); + label_ids.resize(size); + boxes.resize(size); + center.resize(size); + observation_angle.resize(size); + yaw_angle.resize(size); + velocity.resize(size); +} + +std::string PerceptionResult::Str() { + std::string out; + out = "PerceptionResult: ["; + if (valid[2]) { + out += "xmin, ymin, xmax, ymax, w, h, l,"; + } + if (valid[3]) { + out += " cx, cy, cz,"; + } + if (valid[5]) { + out += " yaw_angle,"; + } + if (valid[4]) { + out += " ob_angle,"; + } + if (valid[0]) { + out += " score,"; + } + if (valid[1]) { + out += " label_id,"; + } + out += "]\n"; + + for (size_t i = 0; i < boxes.size(); ++i) { + if (valid[2]) { + out = out + std::to_string(boxes[i][0]) + "," + + std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + + ", " + std::to_string(boxes[i][3]) + ", " + + std::to_string(boxes[i][4]) + ", " + std::to_string(boxes[i][5]) + + ", " + std::to_string(boxes[i][6]) + ", "; + } + if (valid[3]) { + out = out + std::to_string(center[i][0]) + ", " + + std::to_string(center[i][1]) + ", " + std::to_string(center[i][2]) + + ", "; + } + if (valid[5]) { + out = out + std::to_string(yaw_angle[i]) + ", "; + } + if (valid[4]) { + out = out + std::to_string(observation_angle[i]) + ", "; + } + if (valid[0]) { + out = out + std::to_string(scores[i]) + ", "; + } + if (valid[1]) { + out = out + std::to_string(label_ids[i]); + } + out += "\n"; + } + return out; +} + +// PerceptionResult finished + +void KeyPointDetectionResult::Free() { + std::vector>().swap(keypoints); + std::vector().swap(scores); + num_joints = -1; +} + +void KeyPointDetectionResult::Clear() { + keypoints.clear(); + scores.clear(); + num_joints = -1; +} + +void KeyPointDetectionResult::Reserve(int size) { keypoints.reserve(size); } + +void KeyPointDetectionResult::Resize(int size) { keypoints.resize(size); } + +std::string KeyPointDetectionResult::Str() { + std::string out; + + out = "KeyPointDetectionResult: [x, y, conf]\n"; + for (size_t i = 0; i < keypoints.size(); ++i) { + out = out + std::to_string(keypoints[i][0]) + "," + + std::to_string(keypoints[i][1]) + ", " + std::to_string(scores[i]) + + "\n"; + } + out += "num_joints:" + std::to_string(num_joints) + "\n"; + return out; +} + +void OCRResult::Clear() { + boxes.clear(); + text.clear(); + rec_scores.clear(); + cls_scores.clear(); + cls_labels.clear(); +} + +void OCRCURVEResult::Clear() { + boxes.clear(); + text.clear(); + rec_scores.clear(); + cls_scores.clear(); + cls_labels.clear(); +} + +void MOTResult::Clear() { + boxes.clear(); + ids.clear(); + scores.clear(); + class_ids.clear(); +} + +std::string MOTResult::Str() { + std::string out; + out = "MOTResult:\nall boxes counts: " + std::to_string(boxes.size()) + "\n"; + out += "[xmin\tymin\txmax\tymax\tid\tscore]\n"; + for (size_t i = 0; i < boxes.size(); ++i) { + out = out + "[" + std::to_string(boxes[i][0]) + "\t" + + std::to_string(boxes[i][1]) + "\t" + std::to_string(boxes[i][2]) + + "\t" + std::to_string(boxes[i][3]) + "\t" + std::to_string(ids[i]) + + "\t" + std::to_string(scores[i]) + "]\n"; + } + return out; +} + +FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult &res) { + boxes.assign(res.boxes.begin(), res.boxes.end()); + landmarks.assign(res.landmarks.begin(), res.landmarks.end()); + scores.assign(res.scores.begin(), res.scores.end()); + landmarks_per_face = res.landmarks_per_face; +} + +void FaceDetectionResult::Free() { + std::vector>().swap(boxes); + std::vector().swap(scores); + std::vector>().swap(landmarks); + landmarks_per_face = 0; +} + +void FaceDetectionResult::Clear() { + boxes.clear(); + scores.clear(); + landmarks.clear(); + landmarks_per_face = 0; +} + +void FaceDetectionResult::Reserve(int size) { + boxes.reserve(size); + scores.reserve(size); + if (landmarks_per_face > 0) { + landmarks.reserve(size * landmarks_per_face); + } +} + +void FaceDetectionResult::Resize(int size) { + boxes.resize(size); + scores.resize(size); + if (landmarks_per_face > 0) { + landmarks.resize(size * landmarks_per_face); + } +} + +std::string FaceDetectionResult::Str() { + std::string out; + // format without landmarks + if (landmarks_per_face <= 0) { + out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score]\n"; + for (size_t i = 0; i < boxes.size(); ++i) { + out = out + std::to_string(boxes[i][0]) + "," + + std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + + ", " + std::to_string(boxes[i][3]) + ", " + + std::to_string(scores[i]) + "\n"; + } + return out; + } + // format with landmarks + FDASSERT((landmarks.size() == boxes.size() * landmarks_per_face), + "The size of landmarks != boxes.size * landmarks_per_face."); + out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x " + + std::to_string(landmarks_per_face) + "]\n"; + for (size_t i = 0; i < boxes.size(); ++i) { + out = out + std::to_string(boxes[i][0]) + "," + + std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + + ", " + std::to_string(boxes[i][3]) + ", " + + std::to_string(scores[i]) + ", "; + for (size_t j = 0; j < landmarks_per_face; ++j) { + out = out + "(" + + std::to_string(landmarks[i * landmarks_per_face + j][0]) + "," + + std::to_string(landmarks[i * landmarks_per_face + j][1]); + if (j < landmarks_per_face - 1) { + out = out + "), "; + } else { + out = out + ")\n"; + } + } + } + return out; +} + +void FaceAlignmentResult::Free() { + std::vector>().swap(landmarks); +} + +void FaceAlignmentResult::Clear() { landmarks.clear(); } + +void FaceAlignmentResult::Reserve(int size) { landmarks.resize(size); } + +void FaceAlignmentResult::Resize(int size) { landmarks.resize(size); } + +std::string FaceAlignmentResult::Str() { + std::string out; + + out = "FaceAlignmentResult: [x, y]\n"; + out = out + "There are " + std::to_string(landmarks.size()) + + " landmarks, the top 10 are listed as below:\n"; + int landmarks_size = landmarks.size(); + size_t result_length = std::min(10, landmarks_size); + for (size_t i = 0; i < result_length; ++i) { + out = out + std::to_string(landmarks[i][0]) + "," + + std::to_string(landmarks[i][1]) + "\n"; + } + out += "num_landmarks:" + std::to_string(landmarks.size()) + "\n"; + return out; +} + +void SegmentationResult::Clear() { + label_map.clear(); + score_map.clear(); + shape.clear(); + contain_score_map = false; +} + +void SegmentationResult::Free() { + std::vector().swap(label_map); + std::vector().swap(score_map); + std::vector().swap(shape); + contain_score_map = false; +} + +void SegmentationResult::Reserve(int size) { + label_map.reserve(size); + if (contain_score_map) { + score_map.reserve(size); + } +} + +void SegmentationResult::Resize(int size) { + label_map.resize(size); + if (contain_score_map) { + score_map.resize(size); + } +} + +std::string SegmentationResult::Str() { + std::string out; + out = "SegmentationResult Image masks 10 rows x 10 cols: \n"; + for (size_t i = 0; i < 10; ++i) { + out += "["; + for (size_t j = 0; j < 10; ++j) { + out = out + std::to_string(label_map[i * 10 + j]) + ", "; + } + out += ".....]\n"; + } + out += "...........\n"; + if (contain_score_map) { + out += "SegmentationResult Score map 10 rows x 10 cols: \n"; + for (size_t i = 0; i < 10; ++i) { + out += "["; + for (size_t j = 0; j < 10; ++j) { + out = out + std::to_string(score_map[i * 10 + j]) + ", "; + } + out += ".....]\n"; + } + out += "...........\n"; + } + out += "result shape is: [" + std::to_string(shape[0]) + " " + + std::to_string(shape[1]) + "]"; + return out; +} + +SegmentationResult &SegmentationResult::operator=(SegmentationResult &&other) { + if (&other != this) { + label_map = std::move(other.label_map); + shape = std::move(other.shape); + contain_score_map = std::move(other.contain_score_map); + if (contain_score_map) { + score_map.clear(); + score_map = std::move(other.score_map); + } + } + return *this; +} +FaceRecognitionResult::FaceRecognitionResult(const FaceRecognitionResult &res) { + embedding.assign(res.embedding.begin(), res.embedding.end()); +} + +void FaceRecognitionResult::Free() { std::vector().swap(embedding); } + +void FaceRecognitionResult::Clear() { embedding.clear(); } + +void FaceRecognitionResult::Reserve(int size) { embedding.reserve(size); } + +void FaceRecognitionResult::Resize(int size) { embedding.resize(size); } + +std::string FaceRecognitionResult::Str() { + std::string out; + out = "FaceRecognitionResult: ["; + size_t numel = embedding.size(); + if (numel <= 0) { + return out + "Empty Result]"; + } + // max, min, mean + float min_val = embedding.at(0); + float max_val = embedding.at(0); + float total_val = embedding.at(0); + for (size_t i = 1; i < numel; ++i) { + float val = embedding.at(i); + total_val += val; + if (val < min_val) { + min_val = val; + } + if (val > max_val) { + max_val = val; + } + } + float mean_val = total_val / static_cast(numel); + out = out + "Dim(" + std::to_string(numel) + "), " + "Min(" + + std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) + + "), " + "Mean(" + std::to_string(mean_val) + ")]\n"; + return out; +} + +MattingResult::MattingResult(const MattingResult &res) { + alpha.assign(res.alpha.begin(), res.alpha.end()); + foreground.assign(res.foreground.begin(), res.foreground.end()); + shape.assign(res.shape.begin(), res.shape.end()); + contain_foreground = res.contain_foreground; +} + +void MattingResult::Clear() { + alpha.clear(); + foreground.clear(); + shape.clear(); + contain_foreground = false; +} + +void MattingResult::Free() { + std::vector().swap(alpha); + std::vector().swap(foreground); + std::vector().swap(shape); + contain_foreground = false; +} + +void MattingResult::Reserve(int size) { + alpha.reserve(size); + if (contain_foreground) { + FDASSERT((shape.size() == 3), + "Please initial shape (h,w,c) before call Reserve."); + int c = static_cast(shape[2]); + foreground.reserve(size * c); + } +} + +void MattingResult::Resize(int size) { + alpha.resize(size); + if (contain_foreground) { + FDASSERT((shape.size() == 3), + "Please initial shape (h,w,c) before call Resize."); + int c = static_cast(shape[2]); + foreground.resize(size * c); + } +} + +std::string MattingResult::Str() { + std::string out; + out = "MattingResult["; + if (contain_foreground) { + out += "Foreground(true)"; + } else { + out += "Foreground(false)"; + } + out += ", Alpha("; + size_t numel = alpha.size(); + if (numel <= 0) { + return out + "[Empty Result]"; + } + // max, min, mean + float min_val = alpha.at(0); + float max_val = alpha.at(0); + float total_val = alpha.at(0); + for (size_t i = 1; i < numel; ++i) { + float val = alpha.at(i); + total_val += val; + if (val < min_val) { + min_val = val; + } + if (val > max_val) { + max_val = val; + } + } + float mean_val = total_val / static_cast(numel); + // shape + std::string shape_str = "Shape("; + for (size_t i = 0; i < shape.size(); ++i) { + if ((i + 1) != shape.size()) { + shape_str += std::to_string(shape[i]) + ","; + } else { + shape_str += std::to_string(shape[i]) + ")"; + } + } + out = out + "Numel(" + std::to_string(numel) + "), " + shape_str + ", Min(" + + std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) + + "), " + "Mean(" + std::to_string(mean_val) + "))]\n"; + return out; +} + +std::string OCRResult::Str() { + std::string no_result; + if (boxes.size() > 0) { + std::string out; + for (int n = 0; n < boxes.size(); n++) { + out = out + "det boxes: ["; + for (int i = 0; i < 4; i++) { + out = out + "[" + std::to_string(boxes[n][i * 2]) + "," + + std::to_string(boxes[n][i * 2 + 1]) + "]"; + + if (i != 3) { + out = out + ","; + } + } + out = out + "]"; + + if (rec_scores.size() > 0) { + out = out + "rec text: " + text[n] + + " rec score:" + std::to_string(rec_scores[n]) + " "; + } + if (cls_labels.size() > 0) { + out = out + "cls label: " + std::to_string(cls_labels[n]) + + " cls score: " + std::to_string(cls_scores[n]); + } + out = out + "\n"; + } + + if (table_boxes.size() > 0 && table_structure.size() > 0) { + for (int n = 0; n < boxes.size(); n++) { + out = out + "table boxes: ["; + for (int i = 0; i < 4; i++) { + out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + + std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + + if (i != 3) { + out = out + ","; + } + } + out = out + "]\n"; + } + + out = out + "\ntable structure: \n"; + for (int m = 0; m < table_structure.size(); m++) { + out += table_structure[m]; + } + + if (!table_html.empty()) { + out = out + "\n" + "table html: \n" + table_html; + } + } + std::vector> table_boxes; + std::vector table_structure; + return out; + + } else if (boxes.size() == 0 && rec_scores.size() > 0 && + cls_scores.size() > 0) { + std::string out; + for (int i = 0; i < rec_scores.size(); i++) { + out = out + "rec text: " + text[i] + + " rec score:" + std::to_string(rec_scores[i]) + " "; + out = out + "cls label: " + std::to_string(cls_labels[i]) + + " cls score: " + std::to_string(cls_scores[i]); + out = out + "\n"; + } + return out; + } else if (boxes.size() == 0 && rec_scores.size() == 0 && + cls_scores.size() > 0) { + std::string out; + for (int i = 0; i < cls_scores.size(); i++) { + out = out + "cls label: " + std::to_string(cls_labels[i]) + + " cls score: " + std::to_string(cls_scores[i]); + out = out + "\n"; + } + return out; + } else if (boxes.size() == 0 && rec_scores.size() > 0 && + cls_scores.size() == 0) { + std::string out; + for (int i = 0; i < rec_scores.size(); i++) { + out = out + "rec text: " + text[i] + + " rec score:" + std::to_string(rec_scores[i]) + " "; + out = out + "\n"; + } + return out; + } else if (boxes.size() == 0 && table_boxes.size() > 0 && + table_structure.size() > 0) { + std::string out; + for (int n = 0; n < table_boxes.size(); n++) { + out = out + "table boxes: ["; + for (int i = 0; i < 4; i++) { + out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + + std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + + if (i != 3) { + out = out + ","; + } + } + out = out + "]\n"; + } + + out = out + "\ntable structure: \n"; + for (int m = 0; m < table_structure.size(); m++) { + out += table_structure[m]; + } + + if (!table_html.empty()) { + out = out + "\n" + "table html: \n" + table_html; + } + return out; + } + + no_result = no_result + "No Results!"; + return no_result; +} + +std::string OCRCURVEResult::Str() { + std::string no_result; + if (boxes.size() > 0) { + std::string out; + for (int n = 0; n < boxes.size(); n++) { + out = out + "det boxes: ["; + for (int i = 0; i < boxes[n].size() / 2; i++) { + out = out + "[" + std::to_string(boxes[n][i * 2]) + "," + + std::to_string(boxes[n][i * 2 + 1]) + "]"; + + if (i != boxes[n].size() / 2 - 1) { + out = out + ","; + } + } + out = out + "]"; + + if (rec_scores.size() > 0) { + out = out + "rec text: " + text[n] + + " rec score:" + std::to_string(rec_scores[n]) + " "; + } + if (cls_labels.size() > 0) { + out = out + "cls label: " + std::to_string(cls_labels[n]) + + " cls score: " + std::to_string(cls_scores[n]); + } + out = out + "\n"; + } + + if (table_boxes.size() > 0 && table_structure.size() > 0) { + for (int n = 0; n < boxes.size(); n++) { + out = out + "table boxes: ["; + for (int i = 0; i < 4; i++) { + out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + + std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + + if (i != 3) { + out = out + ","; + } + } + out = out + "]\n"; + } + + out = out + "\ntable structure: \n"; + for (int m = 0; m < table_structure.size(); m++) { + out += table_structure[m]; + } + + if (!table_html.empty()) { + out = out + "\n" + "table html: \n" + table_html; + } + } + std::vector> table_boxes; + std::vector table_structure; + return out; + + } else if (boxes.size() == 0 && rec_scores.size() > 0 && + cls_scores.size() > 0) { + std::string out; + for (int i = 0; i < rec_scores.size(); i++) { + out = out + "rec text: " + text[i] + + " rec score:" + std::to_string(rec_scores[i]) + " "; + out = out + "cls label: " + std::to_string(cls_labels[i]) + + " cls score: " + std::to_string(cls_scores[i]); + out = out + "\n"; + } + return out; + } else if (boxes.size() == 0 && rec_scores.size() == 0 && + cls_scores.size() > 0) { + std::string out; + for (int i = 0; i < cls_scores.size(); i++) { + out = out + "cls label: " + std::to_string(cls_labels[i]) + + " cls score: " + std::to_string(cls_scores[i]); + out = out + "\n"; + } + return out; + } else if (boxes.size() == 0 && rec_scores.size() > 0 && + cls_scores.size() == 0) { + std::string out; + for (int i = 0; i < rec_scores.size(); i++) { + out = out + "rec text: " + text[i] + + " rec score:" + std::to_string(rec_scores[i]) + " "; + out = out + "\n"; + } + return out; + } else if (boxes.size() == 0 && table_boxes.size() > 0 && + table_structure.size() > 0) { + std::string out; + for (int n = 0; n < table_boxes.size(); n++) { + out = out + "table boxes: ["; + for (int i = 0; i < 4; i++) { + out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + + std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + + if (i != 3) { + out = out + ","; + } + } + out = out + "]\n"; + } + + out = out + "\ntable structure: \n"; + for (int m = 0; m < table_structure.size(); m++) { + out += table_structure[m]; + } + + if (!table_html.empty()) { + out = out + "\n" + "table html: \n" + table_html; + } + return out; + } + + no_result = no_result + "No Results!"; + return no_result; +} +void HeadPoseResult::Free() { std::vector().swap(euler_angles); } + +void HeadPoseResult::Clear() { euler_angles.clear(); } + +void HeadPoseResult::Reserve(int size) { euler_angles.resize(size); } + +void HeadPoseResult::Resize(int size) { euler_angles.resize(size); } + +std::string HeadPoseResult::Str() { + std::string out; + + out = "HeadPoseResult: [yaw, pitch, roll]\n"; + out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + + "pitch: " + std::to_string(euler_angles[1]) + "\n" + + "roll: " + std::to_string(euler_angles[2]) + "\n"; + return out; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/common/result.h b/libs/ultrainfer/ultrainfer/vision/common/result.h new file mode 100755 index 0000000000..456b894205 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/common/result.h @@ -0,0 +1,494 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "opencv2/core/core.hpp" +#include "ultrainfer/ultrainfer_model.h" +#include + +namespace ultrainfer { +/** \brief All C++ UltraInfer Vision Models APIs are defined inside this + * namespace + * + */ +namespace vision { +enum ULTRAINFER_DECL ResultType { + UNKNOWN_RESULT, + CLASSIFY, + DETECTION, + SEGMENTATION, + OCR, + MOT, + FACE_DETECTION, + FACE_ALIGNMENT, + FACE_RECOGNITION, + MATTING, + MASK, + KEYPOINT_DETECTION, + HEADPOSE, + PERCEPTION, +}; + +struct ULTRAINFER_DECL BaseResult { + ResultType type = ResultType::UNKNOWN_RESULT; +}; + +/*! @brief Classify result structure for all the image classify models + */ +struct ULTRAINFER_DECL ClassifyResult : public BaseResult { + ClassifyResult() = default; + /// Classify result for an image + std::vector label_ids; + /// The confidence for each classify result + std::vector scores; + /// The feature vector of recognizer, e.g, PP-ShiTuV2 Recognizer + std::vector feature; + ResultType type = ResultType::CLASSIFY; + + /// Resize ClassifyResult data buffer + void Resize(int size); + + /// Clear ClassifyResult + void Clear(); + + /// Clear ClassifyResult and free the memory + void Free(); + + /// Copy constructor + ClassifyResult(const ClassifyResult &other) = default; + /// Move assignment + ClassifyResult &operator=(ClassifyResult &&other); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! Mask structure, used in DetectionResult for instance segmentation models + */ +struct ULTRAINFER_DECL Mask : public BaseResult { + /// Mask data buffer + std::vector data; + /// Shape of mask + std::vector shape; // (H,W) ... + ResultType type = ResultType::MASK; + + /// clear Mask result + void Clear(); + + /// Clear Mask result and free the memory + void Free(); + + /// Return a mutable pointer of the mask data buffer + void *Data() { return data.data(); } + + /// Return a pointer of the mask data buffer for read only + const void *Data() const { return data.data(); } + + /// Reserve size for mask data buffer + void Reserve(int size); + + /// Resize the mask data buffer + void Resize(int size); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief Detection result structure for all the object detection models and + * instance segmentation models + */ +struct ULTRAINFER_DECL DetectionResult : public BaseResult { + DetectionResult() = default; + /** \brief All the detected object boxes for an input image, the size of + * `boxes` is the number of detected objects, and the element of `boxes` is a + * array of 4 float values, means [xmin, ymin, xmax, ymax] + */ + std::vector> boxes; + /** \brief All the detected rotated object boxes for an input image, the size + * of `boxes` is the number of detected objects, and the element of + * `rotated_boxes` is an array of 8 float values, means [x1, y1, x2, y2, x3, + * y3, x4, y4] + */ + std::vector> rotated_boxes; + /** \brief The confidence for all the detected objects + */ + std::vector scores; + /// The classify label for all the detected objects + std::vector label_ids; + /** \brief For instance segmentation model, `masks` is the predict mask for + * all the deteced objects + */ + std::vector masks; + /// Shows if the DetectionResult has mask + bool contain_masks = false; + + ResultType type = ResultType::DETECTION; + + /// Copy constructor + DetectionResult(const DetectionResult &res); + /// Move assignment + DetectionResult &operator=(DetectionResult &&other); + + /// Clear DetectionResult + void Clear(); + + /// Clear DetectionResult and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief Detection result structure for all the object detection models and + * instance segmentation models + */ +struct ULTRAINFER_DECL PerceptionResult : public BaseResult { + PerceptionResult() = default; + + std::vector scores; + + std::vector label_ids; + // xmin, ymin, xmax, ymax, h, w, l + std::vector> boxes; + // cx, cy, cz + std::vector> center; + + std::vector observation_angle; + + std::vector yaw_angle; + // vx, vy, vz + std::vector> velocity; + + // valid results for func Str(): True for printing + // 0 scores + // 1 label_ids + // 2 boxes + // 3 center + // 4 observation_angle + // 5 yaw_angle + // 6 velocity + std::vector valid; + + /// Copy constructor + PerceptionResult(const PerceptionResult &res); + /// Move assignment + PerceptionResult &operator=(PerceptionResult &&other); + + /// Clear PerceptionResult + void Clear(); + + /// Clear PerceptionResult and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief KeyPoint Detection result structure for all the keypoint detection + * models + */ +struct ULTRAINFER_DECL KeyPointDetectionResult : public BaseResult { + /** \brief All the coordinates of detected keypoints for an input image, the + * size of `keypoints` is num_detected_objects * num_joints, and the element + * of `keypoint` is a array of 2 float values, means [x, y] + */ + std::vector> keypoints; + //// The confidence for all the detected points + std::vector scores; + //// Number of joints for a detected object + int num_joints = -1; + + ResultType type = ResultType::KEYPOINT_DETECTION; + /// Clear KeyPointDetectionResult + void Clear(); + + /// Clear KeyPointDetectionResult and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +struct ULTRAINFER_DECL OCRResult : public BaseResult { + std::vector> boxes; + + std::vector text; + std::vector rec_scores; + + std::vector cls_scores; + std::vector cls_labels; + + std::vector> table_boxes; + std::vector table_structure; + std::string table_html; + + ResultType type = ResultType::OCR; + + void Clear(); + + std::string Str(); +}; + +struct ULTRAINFER_DECL OCRCURVEResult : public BaseResult { + std::vector> boxes; + std::vector text; + std::vector rec_scores; + + std::vector cls_scores; + std::vector cls_labels; + + std::vector> table_boxes; + std::vector table_structure; + std::string table_html; + + ResultType type = ResultType::OCR; + + void Clear(); + + std::string Str(); +}; +/*! @brief MOT(Multi-Object Tracking) result structure for all the MOT models + */ +struct ULTRAINFER_DECL MOTResult : public BaseResult { + /** \brief All the tracking object boxes for an input image, the size of + * `boxes` is the number of tracking objects, and the element of `boxes` is a + * array of 4 float values, means [xmin, ymin, xmax, ymax] + */ + std::vector> boxes; + /** \brief All the tracking object ids + */ + std::vector ids; + /** \brief The confidence for all the tracking objects + */ + std::vector scores; + /** \brief The classify label id for all the tracking object + */ + std::vector class_ids; + + ResultType type = ResultType::MOT; + /// Clear MOT result + void Clear(); + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief Face detection result structure for all the face detection models + */ +struct ULTRAINFER_DECL FaceDetectionResult : public BaseResult { + /** \brief All the detected object boxes for an input image, the size of + * `boxes` is the number of detected objects, and the element of `boxes` is a + * array of 4 float values, means [xmin, ymin, xmax, ymax] + */ + std::vector> boxes; + /** \brief + * If the model detect face with landmarks, every detected object box + * correspoing to a landmark, which is a array of 2 float values, means + * location [x,y] + */ + std::vector> landmarks; + /** \brief + * Indicates the confidence of all targets detected from a single image, and + * the number of elements is consistent with boxes.size() + */ + std::vector scores; + ResultType type = ResultType::FACE_DETECTION; + /** \brief + * `landmarks_per_face` indicates the number of face landmarks for each + * detected face if the model's output contains face landmarks (such as + * YOLOv5Face, SCRFD, ...) + */ + int landmarks_per_face; + + FaceDetectionResult() { landmarks_per_face = 0; } + FaceDetectionResult(const FaceDetectionResult &res); + /// Clear FaceDetectionResult + void Clear(); + + /// Clear FaceDetectionResult and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief Face Alignment result structure for all the face alignment models + */ +struct ULTRAINFER_DECL FaceAlignmentResult : public BaseResult { + /** \brief All the coordinates of detected landmarks for an input image, and + * the element of `landmarks` is a array of 2 float values, means [x, y] + */ + std::vector> landmarks; + + ResultType type = ResultType::FACE_ALIGNMENT; + /// Clear FaceAlignmentResult + void Clear(); + + /// Clear FaceAlignmentResult and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief Segmentation result structure for all the segmentation models + */ +struct ULTRAINFER_DECL SegmentationResult : public BaseResult { + SegmentationResult() = default; + /** \brief + * `label_map` stores the pixel-level category labels for input image. the + * number of pixels is equal to label_map.size() + */ + std::vector label_map; + /** \brief + * `score_map` stores the probability of the predicted label for each pixel of + * input image. + */ + std::vector score_map; + /// The output shape, means [H, W] + std::vector shape; + /// SegmentationResult whether containing score_map + bool contain_score_map = false; + + /// Copy constructor + SegmentationResult(const SegmentationResult &other) = default; + /// Move assignment + SegmentationResult &operator=(SegmentationResult &&other); + + ResultType type = ResultType::SEGMENTATION; + /// Clear Segmentation result + void Clear(); + + /// Clear Segmentation result and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief Face recognition result structure for all the Face recognition models + */ +struct ULTRAINFER_DECL FaceRecognitionResult : public BaseResult { + /** \brief The feature embedding that represents the final extraction of the + * face recognition model can be used to calculate the feature similarity + * between faces. + */ + std::vector embedding; + + ResultType type = ResultType::FACE_RECOGNITION; + + FaceRecognitionResult() {} + FaceRecognitionResult(const FaceRecognitionResult &res); + /// Clear FaceRecognitionResult + void Clear(); + + /// Clear FaceRecognitionResult and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief Matting result structure for all the Matting models + */ +struct ULTRAINFER_DECL MattingResult : public BaseResult { + /** \brief + `alpha` is a one-dimensional vector, which is the predicted alpha transparency + value. The range of values is [0., 1.], and the length is hxw. h, w are the + height and width of the input image + */ + std::vector alpha; // h x w + /** \brief + If the model can predict foreground, `foreground` save the predicted + foreground image, the shape is [hight,width,channel] generally. + */ + std::vector foreground; // h x w x c (c=3 default) + /** \brief + * The shape of output result, when contain_foreground == false, shape only + * contains (h, w), when contain_foreground == true, shape contains (h, w, c), + * and c is generally 3 + */ + std::vector shape; + /** \brief + If the model can predict alpha matte and foreground, contain_foreground = + true, default false + */ + bool contain_foreground = false; + + ResultType type = ResultType::MATTING; + + MattingResult() {} + MattingResult(const MattingResult &res); + /// Clear matting result + void Clear(); + + /// Free matting result + void Free(); + + void Reserve(int size); + + void Resize(int size); + /// Debug function, convert the result to string to print + std::string Str(); +}; + +/*! @brief HeadPose result structure for all the headpose models + */ +struct ULTRAINFER_DECL HeadPoseResult : public BaseResult { + /** \brief EulerAngles for an input image, and the element of `euler_angles` + * is a vector, contains {yaw, pitch, roll} + */ + std::vector euler_angles; + + ResultType type = ResultType::HEADPOSE; + /// Clear HeadPoseResult + void Clear(); + + /// Clear HeadPoseResult and free the memory + void Free(); + + void Reserve(int size); + + void Resize(int size); + + /// Debug function, convert the result to string to print + std::string Str(); +}; + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc new file mode 100755 index 0000000000..94a6fd0d8b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +FastestDet::FastestDet(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool FastestDet::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool FastestDet::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool FastestDet::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector>> ims_info; + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h new file mode 100755 index 0000000000..c63d7e1c59 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h" +#include "ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief FastestDet model object used when to load a FastestDet model exported + * by FastestDet. + */ +class ULTRAINFER_DECL FastestDet : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./fastestdet.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + FastestDet(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "fastestdet"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, DetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of FastestDet + virtual FastestDetPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of FastestDet + virtual FastestDetPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + FastestDetPreprocessor preprocessor_; + FastestDetPostprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc new file mode 100755 index 0000000000..c7546f00a1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindFastestDet(pybind11::module &m) { + pybind11::class_( + m, "FastestDetPreprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::FastestDetPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error( + "raise Exception('Failed to preprocess the input data in " + "FastestDetPreprocessor.')"); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::detection::FastestDetPreprocessor::GetSize, + &vision::detection::FastestDetPreprocessor::SetSize); + + pybind11::class_( + m, "FastestDetPostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::FastestDetPostprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "raise Exception('Failed to postprocess the runtime result " + "in FastestDetPostprocessor.')"); + } + return results; + }) + .def("run", + [](vision::detection::FastestDetPostprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "raise Exception('Failed to postprocess the runtime result " + "in FastestDetPostprocessor.')"); + } + return results; + }) + .def_property( + "conf_threshold", + &vision::detection::FastestDetPostprocessor::GetConfThreshold, + &vision::detection::FastestDetPostprocessor::SetConfThreshold) + .def_property( + "nms_threshold", + &vision::detection::FastestDetPostprocessor::GetNMSThreshold, + &vision::detection::FastestDetPostprocessor::SetNMSThreshold); + + pybind11::class_(m, + "FastestDet") + .def(pybind11::init()) + .def("predict", + [](vision::detection::FastestDet &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::FastestDet &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::FastestDet::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::FastestDet::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc new file mode 100755 index 0000000000..ee65d26279 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc @@ -0,0 +1,133 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +FastestDetPostprocessor::FastestDetPostprocessor() { + conf_threshold_ = 0.65; + nms_threshold_ = 0.45; +} +float FastestDetPostprocessor::Sigmoid(float x) { + return 1.0f / (1.0f + exp(-x)); +} + +float FastestDetPostprocessor::Tanh(float x) { + return 2.0f / (1.0f + exp(-2 * x)) - 1; +} + +bool FastestDetPostprocessor::Run( + const std::vector &tensors, std::vector *results, + const std::vector>> &ims_info) { + int batch = 1; + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + + (*results)[bs].Clear(); + // output (1,85,22,22) CHW + const float *output = + reinterpret_cast(tensors[0].Data()) + + bs * tensors[0].shape[1] * tensors[0].shape[2] * tensors[0].shape[3]; + int output_h = tensors[0].shape[2]; // out map height + int output_w = tensors[0].shape[3]; // out map weight + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + + // handle output boxes from out map + for (int h = 0; h < output_h; h++) { + for (int w = 0; w < output_w; w++) { + // object score + int obj_score_index = (h * output_w) + w; + float obj_score = output[obj_score_index]; + + // find max class + int category = 0; + float max_score = 0.0f; + int class_num = tensors[0].shape[1] - 5; + for (size_t i = 0; i < class_num; i++) { + obj_score_index = + ((5 + i) * output_h * output_w) + (h * output_w) + w; + float cls_score = output[obj_score_index]; + if (cls_score > max_score) { + max_score = cls_score; + category = i; + } + } + float score = pow(max_score, 0.4) * pow(obj_score, 0.6); + + // score threshold + if (score <= conf_threshold_) { + continue; + } + if (score > conf_threshold_) { + // handle box x y w h + int x_offset_index = (1 * output_h * output_w) + (h * output_w) + w; + int y_offset_index = (2 * output_h * output_w) + (h * output_w) + w; + int box_width_index = (3 * output_h * output_w) + (h * output_w) + w; + int box_height_index = (4 * output_h * output_w) + (h * output_w) + w; + + float x_offset = Tanh(output[x_offset_index]); + float y_offset = Tanh(output[y_offset_index]); + float box_width = Sigmoid(output[box_width_index]); + float box_height = Sigmoid(output[box_height_index]); + + float cx = (w + x_offset) / output_w; + float cy = (h + y_offset) / output_h; + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + cx - box_width / 2.0f, cy - box_height / 2.0f, + cx + box_width / 2.0f, cy + box_height / 2.0f}); + (*results)[bs].label_ids.push_back(category); + (*results)[bs].scores.push_back(score); + } + } + } + if ((*results)[bs].boxes.size() == 0) { + return true; + } + + // scale boxes to origin shape + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + (*results)[bs].boxes[i][0] = ((*results)[bs].boxes[i][0]) * ipt_w; + (*results)[bs].boxes[i][1] = ((*results)[bs].boxes[i][1]) * ipt_h; + (*results)[bs].boxes[i][2] = ((*results)[bs].boxes[i][2]) * ipt_w; + (*results)[bs].boxes[i][3] = ((*results)[bs].boxes[i][3]) * ipt_h; + } + // NMS + utils::NMS(&((*results)[bs]), nms_threshold_); + // clip box + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + (*results)[bs].boxes[i][0] = std::max((*results)[bs].boxes[i][0], 0.0f); + (*results)[bs].boxes[i][1] = std::max((*results)[bs].boxes[i][1], 0.0f); + (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w); + (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h); + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h new file mode 100755 index 0000000000..aecd7460b8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Postprocessor object for FastestDet serials model. + */ +class ULTRAINFER_DECL FastestDetPostprocessor { +public: + /** \brief Create a postprocessor instance for FastestDet serials model + */ + FastestDetPostprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &tensors, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.65 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.65 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.45 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.45 + float GetNMSThreshold() const { return nms_threshold_; } + +protected: + float conf_threshold_; + float nms_threshold_; + float Sigmoid(float x); + float Tanh(float x); +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc new file mode 100755 index 0000000000..f8b9a02939 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h" +#include "ultrainfer/function/concat.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +FastestDetPreprocessor::FastestDetPreprocessor() { + size_ = {352, 352}; //{h,w} +} + +bool FastestDetPreprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + // process after image load + double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), + static_cast(mat->Width())); + + // fastestdet's preprocess steps + // 1. resize + // 2. convert_and_permute(swap_rb=false) + Resize::Run(mat, size_[0], size_[1]); // resize + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + // convert to float and HWC2CHW + ConvertAndPermute::Run(mat, alpha, beta, false); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool FastestDetPreprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h new file mode 100755 index 0000000000..12a41329b2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for FastestDet serials model. + */ +class ULTRAINFER_DECL FastestDetPreprocessor { +public: + /** \brief Create a preprocessor instance for FastestDet serials model + */ + FastestDetPreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {352, 352} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {352, 352} + std::vector GetSize() const { return size_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + // target size, tuple of (width, height), default size = {352, 352} + std::vector size_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc new file mode 100755 index 0000000000..6f0d1c4efa --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc @@ -0,0 +1,338 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/nanodet_plus.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace detection { + +struct NanoDetPlusCenterPoint { + int grid0; + int grid1; + int stride; +}; + +void GenerateNanoDetPlusCenterPoints( + const std::vector &size, const std::vector &downsample_strides, + std::vector *center_points) { + // size: tuple of input (width, height), e.g (320, 320) + // downsample_strides: downsample strides in NanoDet and + // NanoDet-Plus, e.g (8, 16, 32, 64) + const int width = size[0]; + const int height = size[1]; + for (const auto &ds : downsample_strides) { + int num_grid_w = width / ds; + int num_grid_h = height / ds; + for (int g1 = 0; g1 < num_grid_h; ++g1) { + for (int g0 = 0; g0 < num_grid_w; ++g0) { + (*center_points).emplace_back(NanoDetPlusCenterPoint{g0, g1, ds}); + } + } + } +} + +void WrapAndResize(Mat *mat, std::vector size, std::vector color, + bool keep_ratio = false) { + // Reference: nanodet/data/transform/warp.py#L139 + // size: tuple of input (width, height) + // The default value of `keep_ratio` is `fasle` in + // `config/nanodet-plus-m-1.5x_320.yml` for both + // train and val processes. So, we just let this + // option default `false` according to the official + // implementation in NanoDet and NanoDet-Plus. + // Note, this function will apply a normal resize + // operation to input Mat if the keep_ratio option + // is fasle and the behavior will be the same as + // yolov5's letterbox if keep_ratio is true. + + // with keep_ratio = false (default) + if (!keep_ratio) { + int resize_h = size[1]; + int resize_w = size[0]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + return; + } + // with keep_ratio = true, same as yolov5's letterbox + float r = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + + int resize_h = int(round(static_cast(mat->Height()) * r)); + int resize_w = int(round(static_cast(mat->Width()) * r)); + + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +void GFLRegression(const float *logits, size_t reg_num, float *offset) { + // Hint: reg_num = reg_max + 1 + FDASSERT(((nullptr != logits) && (reg_num != 0)), + "NanoDetPlus: logits is nullptr or reg_num is 0 in GFLRegression."); + // softmax + float total_exp = 0.f; + std::vector softmax_probs(reg_num); + for (size_t i = 0; i < reg_num; ++i) { + softmax_probs[i] = std::exp(logits[i]); + total_exp += softmax_probs[i]; + } + for (size_t i = 0; i < reg_num; ++i) { + softmax_probs[i] = softmax_probs[i] / total_exp; + } + // gfl regression -> offset + for (size_t i = 0; i < reg_num; ++i) { + (*offset) += static_cast(i) * softmax_probs[i]; + } +} + +NanoDetPlus::NanoDetPlus(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool NanoDetPlus::Initialize() { + // parameters for preprocess + size = {320, 320}; + padding_value = {0.0f, 0.0f, 0.0f}; + keep_ratio = false; + downsample_strides = {8, 16, 32, 64}; + max_wh = 4096.0f; + reg_max = 7; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + return true; +} + +bool NanoDetPlus::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // NanoDet-Plus preprocess steps + // 1. WrapAndResize + // 2. HWC->CHW + // 3. Normalize or Convert (keep BGR order) + WrapAndResize(mat, size, padding_value, keep_ratio); + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + // Compute `result = mat * alpha + beta` directly by channel + // Reference: /config/nanodet-plus-m-1.5x_320.yml#L89 + // from mean: [103.53, 116.28, 123.675], std: [57.375, 57.12, 58.395] + // x' = (x - mean) / std to x'= x * alpha + beta. + // e.g alpha[0] = 0.017429f = 1.0f / 57.375f + // e.g beta[0] = -103.53f * 0.0174291f + std::vector alpha = {0.017429f, 0.017507f, 0.017125f}; + std::vector beta = {-103.53f * 0.0174291f, -116.28f * 0.0175070f, + -123.675f * 0.0171247f}; // BGR order + Convert::Run(mat, alpha, beta); + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool NanoDetPlus::Postprocess( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + // generate center points with dowmsample strides + std::vector center_points; + GenerateNanoDetPlusCenterPoints(size, downsample_strides, ¢er_points); + + // infer_result shape might look like (1,2125,112) + const int num_cls_reg = infer_result.shape[2]; // e.g 112 + const int num_classes = num_cls_reg - (reg_max + 1) * 4; // e.g 80 + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + float *scores = data + i * num_cls_reg; + float *max_class_score = std::max_element(scores, scores + num_classes); + float confidence = (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(scores, max_class_score); + // fetch i-th center point + float grid0 = static_cast(center_points.at(i).grid0); + float grid1 = static_cast(center_points.at(i).grid1); + float downsample_stride = static_cast(center_points.at(i).stride); + // apply gfl regression to get offsets (l,t,r,b) + float *logits = data + i * num_cls_reg + num_classes; // 32|44... + std::vector offsets(4); + for (size_t j = 0; j < 4; ++j) { + GFLRegression(logits + j * (reg_max + 1), reg_max + 1, &offsets[j]); + } + // convert from offsets to [x1, y1, x2, y2] + float l = offsets[0]; // left + float t = offsets[1]; // top + float r = offsets[2]; // right + float b = offsets[3]; // bottom + + float x1 = (grid0 - l) * downsample_stride; // cx - l x1 + float y1 = (grid1 - t) * downsample_stride; // cy - t y1 + float x2 = (grid0 + r) * downsample_stride; // cx + r x2 + float y2 = (grid1 + b) * downsample_stride; // cy + b y2 + + result->boxes.emplace_back( + std::array{x1 + label_id * max_wh, y1 + label_id * max_wh, + x2 + label_id * max_wh, y2 + label_id * max_wh}); + // label_id * max_wh for multi classes NMS + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + // without keep_ratio + if (!keep_ratio) { + // x' = (x / out_w) * ipt_w = x / (out_w / ipt_w) + // y' = (y / out_h) * ipt_h = y / (out_h / ipt_h) + float r_w = out_w / ipt_w; + float r_h = out_h / ipt_h; + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max(result->boxes[i][0] / r_w, 0.0f); + result->boxes[i][1] = std::max(result->boxes[i][1] / r_h, 0.0f); + result->boxes[i][2] = std::max(result->boxes[i][2] / r_w, 0.0f); + result->boxes[i][3] = std::max(result->boxes[i][3] / r_h, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; + } + // with keep_ratio + float r = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * r) / 2; + float pad_w = (out_w - ipt_w * r) / 2; + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / r, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / r, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / r, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / r, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool NanoDetPlus::Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold, float nms_iou_threshold) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h new file mode 100755 index 0000000000..745da299a0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace detection { +/*! @brief NanoDetPlus model object used when to load a NanoDetPlus model + * exported by NanoDet. + */ +class ULTRAINFER_DECL NanoDetPlus : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./nanodet_plus_320.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + NanoDetPlus(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + /// Get model's name + std::string ModelName() const { return "nanodet"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.35 \param[in] nms_iou_threshold + * iou threashold for NMS, default is 0.5 \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.35f, + float nms_iou_threshold = 0.5f); + + /*! @brief + Argument for image preprocessing step, tuple of input size (width, height), + default (320, 320) + */ + std::vector size; + // padding value, size should be the same as channels + std::vector padding_value; + // keep aspect ratio or not when perform resize operation. + // This option is set as `false` by default in NanoDet-Plus + bool keep_ratio; + // downsample strides for NanoDet-Plus to generate anchors, + // will take (8, 16, 32, 64) as default values + std::vector downsample_strides; + // for offseting the boxes by classes when using NMS, default 4096 + float max_wh; + /*! @brief + Argument for image postprocessing step, reg_max for GFL regression, default 7 + */ + int reg_max; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + bool IsDynamicInput() const { return is_dynamic_input_; } + + // whether to inference with dynamic shape (e.g ONNX export with dynamic shape + // or not.) + // RangiLyu/nanodet official 'export_onnx.py' script will export static ONNX + // by default. + // This value will auto check by ultrainfer after the internal Runtime + // initialized. + bool is_dynamic_input_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc new file mode 100755 index 0000000000..70364a7269 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindNanoDetPlus(pybind11::module &m) { + pybind11::class_( + m, "NanoDetPlus") + .def(pybind11::init()) + .def("predict", + [](vision::detection::NanoDetPlus &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::detection::NanoDetPlus::size) + .def_readwrite("padding_value", + &vision::detection::NanoDetPlus::padding_value) + .def_readwrite("keep_ratio", &vision::detection::NanoDetPlus::keep_ratio) + .def_readwrite("downsample_strides", + &vision::detection::NanoDetPlus::downsample_strides) + .def_readwrite("max_wh", &vision::detection::NanoDetPlus::max_wh) + .def_readwrite("reg_max", &vision::detection::NanoDetPlus::reg_max); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h new file mode 100755 index 0000000000..53e2d6a963 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h @@ -0,0 +1,104 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h" +namespace ultrainfer { +namespace vision { +namespace detection { + +class ULTRAINFER_DECL RKYOLOV5 : public RKYOLO { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g picodet/model.pdmodel + * \param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends` \param[in] + * model_format Model format of the loaded model, default is Paddle format + */ + RKYOLOV5(const std::string &model_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::RKNN) + : RKYOLO(model_file, custom_option, model_format) { + valid_cpu_backends = {}; + valid_gpu_backends = {}; + valid_rknpu_backends = {Backend::RKNPU2}; + std::vector anchors = {10, 13, 16, 30, 33, 23, 30, 61, 62, + 45, 59, 119, 116, 90, 156, 198, 373, 326}; + int anchor_per_branch_ = 3; + GetPostprocessor().SetAnchor(anchors); + GetPostprocessor().SetAnchorPerBranch(anchor_per_branch_); + } + + virtual std::string ModelName() const { return "RKYOLOV5"; } +}; + +class ULTRAINFER_DECL RKYOLOV7 : public RKYOLO { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g picodet/model.pdmodel + * \param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends` \param[in] + * model_format Model format of the loaded model, default is Paddle format + */ + RKYOLOV7(const std::string &model_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::RKNN) + : RKYOLO(model_file, custom_option, model_format) { + valid_cpu_backends = {}; + valid_gpu_backends = {}; + valid_rknpu_backends = {Backend::RKNPU2}; + std::vector anchors = {12, 16, 19, 36, 40, 28, 36, 75, 76, + 55, 72, 146, 142, 110, 192, 243, 459, 401}; + int anchor_per_branch_ = 3; + GetPostprocessor().SetAnchor(anchors); + GetPostprocessor().SetAnchorPerBranch(anchor_per_branch_); + } + + virtual std::string ModelName() const { return "RKYOLOV7"; } +}; + +class ULTRAINFER_DECL RKYOLOX : public RKYOLO { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g picodet/model.pdmodel + * \param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends` \param[in] + * model_format Model format of the loaded model, default is Paddle format + */ + RKYOLOX(const std::string &model_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::RKNN) + : RKYOLO(model_file, custom_option, model_format) { + valid_cpu_backends = {}; + valid_gpu_backends = {}; + valid_rknpu_backends = {Backend::RKNPU2}; + std::vector anchors = {1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int anchor_per_branch_ = 1; + GetPostprocessor().SetAnchor(anchors); + GetPostprocessor().SetAnchorPerBranch(anchor_per_branch_); + } + + virtual std::string ModelName() const { return "RKYOLOV7"; } +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc new file mode 100755 index 0000000000..ae566ca4a9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc @@ -0,0 +1,212 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +RKYOLOPostprocessor::RKYOLOPostprocessor() {} + +bool RKYOLOPostprocessor::Run(const std::vector &tensors, + std::vector *results) { + results->resize(tensors[0].shape[0]); + for (int num = 0; num < tensors[0].shape[0]; ++num) { + int validCount = 0; + std::vector filterBoxes; + std::vector boxesScore; + std::vector classId; + for (int i = 0; i < tensors.size(); i++) { + auto tensor_shape = tensors[i].shape; + auto skip_num = std::accumulate(tensor_shape.begin(), tensor_shape.end(), + 1, std::multiplies()); + int skip_address = num * skip_num; + int stride = strides_[i]; + int grid_h = height_ / stride; + int grid_w = width_ / stride; + int *anchor = &(anchors_.data()[i * 2 * anchor_per_branch_]); + if (tensors[i].dtype == FDDataType::FP32) { + validCount = validCount + + ProcessFP16((float *)tensors[i].Data() + skip_address, + anchor, grid_h, grid_w, stride, filterBoxes, + boxesScore, classId, conf_threshold_); + } else { + FDERROR << "RKYOLO Only Support FP32 Model." + << "But the result's type is " << Str(tensors[i].dtype) + << std::endl; + } + } + + // no object detect + if (validCount <= 0) { + FDINFO << "The number of object detect is 0." << std::endl; + return true; + } + + std::vector indexArray; + for (int i = 0; i < validCount; ++i) { + indexArray.push_back(i); + } + + QuickSortIndiceInverse(boxesScore, 0, validCount - 1, indexArray); + + if (anchor_per_branch_ == 3) { + NMS(validCount, filterBoxes, classId, indexArray, nms_threshold_, false); + } else if (anchor_per_branch_ == 1) { + NMS(validCount, filterBoxes, classId, indexArray, nms_threshold_, true); + } else { + FDERROR << "anchor_per_branch_ only support 3 or 1." << std::endl; + return false; + } + + int last_count = 0; + (*results)[num].Clear(); + (*results)[num].Reserve(validCount); + + /* box valid detect target */ + for (int i = 0; i < validCount; ++i) { + if (indexArray[i] == -1 || boxesScore[i] < conf_threshold_ || + last_count >= obj_num_bbox_max_size) { + continue; + } + int n = indexArray[i]; + float x1 = filterBoxes[n * 4 + 0]; + float y1 = filterBoxes[n * 4 + 1]; + float x2 = x1 + filterBoxes[n * 4 + 2]; + float y2 = y1 + filterBoxes[n * 4 + 3]; + int id = classId[n]; + (*results)[num].boxes.emplace_back(std::array{ + (float)((Clamp(x1, 0, width_) - pad_hw_values_[num][1] / 2) / + scale_[num]), + (float)((Clamp(y1, 0, height_) - pad_hw_values_[num][0] / 2) / + scale_[num]), + (float)((Clamp(x2, 0, width_) - pad_hw_values_[num][1] / 2) / + scale_[num]), + (float)((Clamp(y2, 0, height_) - pad_hw_values_[num][0] / 2) / + scale_[0])}); + (*results)[num].label_ids.push_back(id); + (*results)[num].scores.push_back(boxesScore[i]); + last_count++; + } + } + return true; +} + +int RKYOLOPostprocessor::ProcessFP16(float *input, int *anchor, int grid_h, + int grid_w, int stride, + std::vector &boxes, + std::vector &boxScores, + std::vector &classId, + float threshold) { + + int validCount = 0; + int grid_len = grid_h * grid_w; + // float thres_sigmoid = threshold; + for (int a = 0; a < anchor_per_branch_; a++) { + for (int i = 0; i < grid_h; i++) { + for (int j = 0; j < grid_w; j++) { + float box_confidence = + input[(prob_box_size_ * a + 4) * grid_len + i * grid_w + j]; + if (box_confidence >= threshold) { + int offset = (prob_box_size_ * a) * grid_len + i * grid_w + j; + float *in_ptr = input + offset; + + float maxClassProbs = in_ptr[5 * grid_len]; + int maxClassId = 0; + for (int k = 1; k < obj_class_num_; ++k) { + float prob = in_ptr[(5 + k) * grid_len]; + if (prob > maxClassProbs) { + maxClassId = k; + maxClassProbs = prob; + } + } + float box_conf_f32 = (box_confidence); + float class_prob_f32 = (maxClassProbs); + float limit_score = 0; + if (anchor_per_branch_ == 1) { + limit_score = class_prob_f32; + } else { + limit_score = box_conf_f32 * class_prob_f32; + } + if (limit_score > conf_threshold_) { + float box_x, box_y, box_w, box_h; + if (anchor_per_branch_ == 1) { + box_x = *in_ptr; + box_y = (in_ptr[grid_len]); + box_w = exp(in_ptr[2 * grid_len]) * stride; + box_h = exp(in_ptr[3 * grid_len]) * stride; + } else { + box_x = *in_ptr * 2.0 - 0.5; + box_y = (in_ptr[grid_len]) * 2.0 - 0.5; + box_w = (in_ptr[2 * grid_len]) * 2.0; + box_h = (in_ptr[3 * grid_len]) * 2.0; + box_w *= box_w; + box_h *= box_h; + } + box_x = (box_x + j) * (float)stride; + box_y = (box_y + i) * (float)stride; + box_w *= (float)anchor[a * 2]; + box_h *= (float)anchor[a * 2 + 1]; + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + boxScores.push_back(box_conf_f32 * class_prob_f32); + classId.push_back(maxClassId); + validCount++; + } + } + } + } + } + return validCount; +} + +int RKYOLOPostprocessor::QuickSortIndiceInverse(std::vector &input, + int left, int right, + std::vector &indices) { + float key; + int key_index; + int low = left; + int high = right; + if (left < right) { + key_index = indices[left]; + key = input[left]; + while (low < high) { + while (low < high && input[high] <= key) { + high--; + } + input[low] = input[high]; + indices[low] = indices[high]; + while (low < high && input[low] >= key) { + low++; + } + input[high] = input[low]; + indices[high] = indices[low]; + } + input[low] = key; + indices[low] = key_index; + QuickSortIndiceInverse(input, left, low - 1, indices); + QuickSortIndiceInverse(input, low + 1, right, indices); + } + return low; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h new file mode 100755 index 0000000000..9329bf4155 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h @@ -0,0 +1,115 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/detection/contrib/rknpu2/utils.h" +#include +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief Postprocessor object for YOLOv5 serials model. + */ +class ULTRAINFER_DECL RKYOLOPostprocessor { +public: + /** \brief Create a postprocessor instance for YOLOv5 serials model + */ + RKYOLOPostprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *results); + + /// Set nms_threshold, default 0.45 + void SetNMSThreshold(float nms_threshold) { nms_threshold_ = nms_threshold; } + + /// Set conf_threshold, default 0.25 + void SetConfThreshold(float conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.25 + const float GetConfThreshold() { return conf_threshold_; } + + /// Get nms_threshold, default 0.45 + const float GetNMSThreshold() { return nms_threshold_; } + + /// Set height and weight + void SetHeightAndWeight(int height, int width) { + height_ = height; + width_ = width; + } + + /// Set pad_hw_values + void SetPadHWValues(const std::vector> &pad_hw_values) { + pad_hw_values_ = pad_hw_values; + } + + /// Set scale + void SetScale(const std::vector &scale) { scale_ = scale; } + + /// Get Anchor + const std::vector &GetAnchor() { return anchors_; } + + /// Set Anchor + void SetAnchor(const std::vector &anchors) { anchors_ = anchors; } + + void SetAnchorPerBranch(int anchor_per_branch) { + anchor_per_branch_ = anchor_per_branch; + } + + /// Set the number of class + void SetClassNum(int num) { + obj_class_num_ = num; + prob_box_size_ = obj_class_num_ + 5; + } + /// Get the number of class + int GetClassNum() { return obj_class_num_; } + +private: + std::vector anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62, + 45, 59, 119, 116, 90, 156, 198, 373, 326}; + int strides_[3] = {8, 16, 32}; + int height_ = 0; + int width_ = 0; + int anchor_per_branch_ = 0; + + int ProcessFP16(float *input, int *anchor, int grid_h, int grid_w, int stride, + std::vector &boxes, std::vector &boxScores, + std::vector &classId, float threshold); + // Model + int QuickSortIndiceInverse(std::vector &input, int left, int right, + std::vector &indices); + + // post_process values + std::vector> pad_hw_values_; + std::vector scale_; + float nms_threshold_ = 0.45; + float conf_threshold_ = 0.25; + int prob_box_size_ = 85; + int obj_class_num_ = 80; + int obj_num_bbox_max_size = 200; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc new file mode 100755 index 0000000000..3aeb302767 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc @@ -0,0 +1,109 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h" +#include "ultrainfer/function/concat.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +RKYOLOPreprocessor::RKYOLOPreprocessor() { + size_ = {640, 640}; + padding_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = true; + stride_ = 32; + max_wh_ = 7680.0; +} + +void RKYOLOPreprocessor::LetterBox(FDMat *mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + scale_.push_back(scale); + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + + pad_hw_values_.push_back({pad_h, pad_w}); + + if (std::fabs(scale - 1.0f) > 1e-06) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_value_); + } +} + +bool RKYOLOPreprocessor::Preprocess(FDMat *mat, FDTensor *output) { + // RKYOLO's preprocess steps + // 1. letterbox + // 2. convert_and_permute(swap_rb=true) + LetterBox(mat); + BGR2RGB::Run(mat); + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, h, w, c + return true; +} + +bool RKYOLOPreprocessor::Run(std::vector *images, + std::vector *outputs) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h new file mode 100755 index 0000000000..2da7d78b0a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h @@ -0,0 +1,99 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief Preprocessor object for YOLOv5 serials model. + */ +class ULTRAINFER_DECL RKYOLOPreprocessor { +public: + /** \brief Create a preprocessor instance for YOLOv5 serials model + */ + RKYOLOPreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingValue(const std::vector &padding_value) { + padding_value_ = padding_value; + } + + /// Get padding value, size should be the same as channels + std::vector GetPaddingValue() const { return padding_value_; } + + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; } + + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } + + std::vector> GetPadHWValues() const { + return pad_hw_values_; + } + std::vector GetScale() const { return scale_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output); + + void LetterBox(FDMat *mat); + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; + + std::vector> pad_hw_values_; + std::vector scale_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc new file mode 100755 index 0000000000..fec34e74e4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +RKYOLO::RKYOLO(const std::string &model_file, + const ultrainfer::RuntimeOption &custom_option, + const ultrainfer::ModelFormat &model_format) { + if (model_format == ModelFormat::RKNN) { + valid_cpu_backends = {}; + valid_gpu_backends = {}; + valid_rknpu_backends = {Backend::RKNPU2}; + } else { + FDERROR << "RKYOLO Only Support run in RKNPU2" << std::endl; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + initialized = Initialize(); +} + +bool RKYOLO::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + auto size = GetPreprocessor().GetSize(); + GetPostprocessor().SetHeightAndWeight(size[0], size[1]); + return true; +} + +bool RKYOLO::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool RKYOLO::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + auto pad_hw_values_ = preprocessor_.GetPadHWValues(); + postprocessor_.SetPadHWValues(preprocessor_.GetPadHWValues()); + postprocessor_.SetScale(preprocessor_.GetScale()); + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h new file mode 100755 index 0000000000..9eafe1a6de --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h @@ -0,0 +1,65 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h" +#include "ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +class ULTRAINFER_DECL RKYOLO : public UltraInferModel { +public: + RKYOLO(const std::string &model_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::RKNN); + + std::string ModelName() const { return "RKYOLO"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, DetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of YOLOv5 + RKYOLOPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of YOLOv5 + RKYOLOPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + RKYOLOPreprocessor preprocessor_; + RKYOLOPostprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc new file mode 100755 index 0000000000..bd7bd50c16 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindRKYOLO(pybind11::module &m) { + pybind11::class_(m, + "RKYOLOPreprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::RKYOLOPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "PaddleClasPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def_property("size", &vision::detection::RKYOLOPreprocessor::GetSize, + &vision::detection::RKYOLOPreprocessor::SetSize) + .def_property("padding_value", + &vision::detection::RKYOLOPreprocessor::GetPaddingValue, + &vision::detection::RKYOLOPreprocessor::SetPaddingValue) + .def_property("is_scale_up", + &vision::detection::RKYOLOPreprocessor::GetScaleUp, + &vision::detection::RKYOLOPreprocessor::SetScaleUp); + + pybind11::class_( + m, "RKYOLOPostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::RKYOLOPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "RKYOLOV5Postprocessor."); + } + return results; + }) + .def("run", + [](vision::detection::RKYOLOPostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "RKYOLOV5Postprocessor."); + } + return results; + }) + .def("set_anchor", [](vision::detection::RKYOLOPostprocessor &self, + std::vector &data) { self.SetAnchor(data); }) + .def_property("conf_threshold", + &vision::detection::RKYOLOPostprocessor::GetConfThreshold, + &vision::detection::RKYOLOPostprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::detection::RKYOLOPostprocessor::GetNMSThreshold, + &vision::detection::RKYOLOPostprocessor::SetNMSThreshold) + .def_property("class_num", + &vision::detection::RKYOLOPostprocessor::GetClassNum, + &vision::detection::RKYOLOPostprocessor::SetClassNum); + + pybind11::class_(m, "RKYOLOV5") + .def(pybind11::init()) + .def("predict", + [](vision::detection::RKYOLOV5 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::RKYOLOV5 &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::RKYOLOV5::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::RKYOLOV5::GetPostprocessor); + + pybind11::class_(m, "RKYOLOX") + .def(pybind11::init()) + .def("predict", + [](vision::detection::RKYOLOX &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::RKYOLOX &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::RKYOLOX::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::RKYOLOX::GetPostprocessor); + + pybind11::class_(m, "RKYOLOV7") + .def(pybind11::init()) + .def("predict", + [](vision::detection::RKYOLOV7 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::RKYOLOV7 &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::RKYOLOV7::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::RKYOLOV7::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc new file mode 100755 index 0000000000..7e534ac14c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/detection/contrib/rknpu2/utils.h" +namespace ultrainfer { +namespace vision { +namespace detection { +float Clamp(float val, int min, int max) { + return val > min ? (val < max ? val : max) : min; +} +static float CalculateOverlap(float xmin0, float ymin0, float xmax0, + float ymax0, float xmin1, float ymin1, + float xmax1, float ymax1) { + float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0); + float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0); + float i = w * h; + float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i; + return u <= 0.f ? 0.f : (i / u); +} + +int NMS(int valid_count, std::vector &output_locations, + std::vector &class_id, std::vector &order, float threshold, + bool class_agnostic) { + for (int i = 0; i < valid_count; ++i) { + if (order[i] == -1) { + continue; + } + int n = order[i]; + for (int j = i + 1; j < valid_count; ++j) { + int m = order[j]; + if (m == -1) { + continue; + } + + if (!class_agnostic && class_id[n] != class_id[m]) { + continue; + } + + float xmin0 = output_locations[n * 4 + 0]; + float ymin0 = output_locations[n * 4 + 1]; + float xmax0 = output_locations[n * 4 + 0] + output_locations[n * 4 + 2]; + float ymax0 = output_locations[n * 4 + 1] + output_locations[n * 4 + 3]; + + float xmin1 = output_locations[m * 4 + 0]; + float ymin1 = output_locations[m * 4 + 1]; + float xmax1 = output_locations[m * 4 + 0] + output_locations[m * 4 + 2]; + float ymax1 = output_locations[m * 4 + 1] + output_locations[m * 4 + 3]; + + float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, + xmax1, ymax1); + + if (iou > threshold) { + order[j] = -1; + } + } + } + return 0; +} +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h new file mode 100755 index 0000000000..c357212770 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include + +namespace ultrainfer { +namespace vision { +namespace detection { +float Clamp(float val, int min, int max); +int NMS(int valid_count, std::vector &output_locations, + std::vector &class_id, std::vector &order, float threshold, + bool class_agnostic); + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc new file mode 100755 index 0000000000..a91ce09886 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc @@ -0,0 +1,254 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/scaledyolov4.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +void ScaledYOLOv4::LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill, bool scale_up, int stride) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +ScaledYOLOv4::ScaledYOLOv4(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool ScaledYOLOv4::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + max_wh = 7680.0; + reused_input_tensors_.resize(1); + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + return true; +} + +bool ScaledYOLOv4::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // process after image load + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + // ScaledYOLOv4's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + ScaledYOLOv4::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, + is_scale_up, stride); + BGR2RGB::Run(mat); + // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), + // std::vector(mat->Channels(), 1.0)); + // Compute `result = mat * alpha + beta` directly by channel + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool ScaledYOLOv4::Postprocess( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float *max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2.0f; + float pad_w = (out_w - ipt_w * scale) / 2.0f; + if (is_mini_pad) { + // 和 LetterBox中_auto=true的处理逻辑对应 + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool ScaledYOLOv4::Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold, float nms_iou_threshold) { + Mat mat(*im); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer()) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h new file mode 100755 index 0000000000..ba160ca702 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief ScaledYOLOv4 model object used when to load a ScaledYOLOv4 model + * exported by ScaledYOLOv4. + */ +class ULTRAINFER_DECL ScaledYOLOv4 : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./scaled_yolov4.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + + ScaledYOLOv4(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + virtual std::string ModelName() const { return "ScaledYOLOv4"; } + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.25 \param[in] nms_iou_threshold + * iou threashold for NMS, default is 0.5 \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25, + float nms_iou_threshold = 0.5); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {640, 640} + */ + std::vector size; + // padding value, size should be the same as channels + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + // for offseting the boxes by classes when using NMS + float max_wh; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + void LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill = false, bool scale_up = true, + int stride = 32); + + // whether to inference with dynamic shape (e.g ONNX export with dynamic shape + // or not.) + // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This + // value will + // auto check by ultrainfer after the internal Runtime already initialized + bool is_dynamic_input_; +}; +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc new file mode 100755 index 0000000000..f3961bbb11 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindScaledYOLOv4(pybind11::module &m) { + pybind11::class_( + m, "ScaledYOLOv4") + .def(pybind11::init()) + .def("predict", + [](vision::detection::ScaledYOLOv4 &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::detection::ScaledYOLOv4::size) + .def_readwrite("padding_value", + &vision::detection::ScaledYOLOv4::padding_value) + .def_readwrite("is_mini_pad", + &vision::detection::ScaledYOLOv4::is_mini_pad) + .def_readwrite("is_no_pad", &vision::detection::ScaledYOLOv4::is_no_pad) + .def_readwrite("is_scale_up", + &vision::detection::ScaledYOLOv4::is_scale_up) + .def_readwrite("stride", &vision::detection::ScaledYOLOv4::stride) + .def_readwrite("max_wh", &vision::detection::ScaledYOLOv4::max_wh); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc new file mode 100755 index 0000000000..11e945e0d2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc @@ -0,0 +1,252 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolor.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +void YOLOR::LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill, bool scale_up, int stride) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +YOLOR::YOLOR(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOR::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + max_wh = 7680.0; + reused_input_tensors_.resize(1); + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + return true; +} + +bool YOLOR::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + // process after image load + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + // yolor's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + YOLOR::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, + is_scale_up, stride); + BGR2RGB::Run(mat); + // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), + // std::vector(mat->Channels(), 1.0)); + // Compute `result = mat * alpha + beta` directly by channel + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool YOLOR::Postprocess( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float *max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2.0f; + float pad_w = (out_w - ipt_w * scale) / 2.0f; + if (is_mini_pad) { + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOR::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold, + float nms_iou_threshold) { + Mat mat(*im); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer()) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h new file mode 100755 index 0000000000..335c80391a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h @@ -0,0 +1,101 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOR model object used when to load a YOLOR model exported by YOLOR. + */ +class ULTRAINFER_DECL YOLOR : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolor.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOR(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + virtual std::string ModelName() const { return "YOLOR"; } + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread() + * \param[in] result The output detection result will be writen to this + * structure \param[in] conf_threshold confidence threashold for + * postprocessing, default is 0.25 \param[in] nms_iou_threshold iou threashold + * for NMS, default is 0.5 \return true if the prediction successed, otherwise + * false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25, + float nms_iou_threshold = 0.5); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {640, 640} + */ + std::vector size; + // padding value, size should be the same as channels + + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + // for offseting the boxes by classes when using NMS + float max_wh; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + void LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill = false, bool scale_up = true, + int stride = 32); + + // whether to inference with dynamic shape (e.g ONNX export with dynamic shape + // or not.) + // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This + // value will + // auto check by ultrainfer after the internal Runtime already initialized. + bool is_dynamic_input_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc new file mode 100755 index 0000000000..2e226f65a9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOR(pybind11::module &m) { + pybind11::class_(m, "YOLOR") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOR &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::detection::YOLOR::size) + .def_readwrite("padding_value", &vision::detection::YOLOR::padding_value) + .def_readwrite("is_mini_pad", &vision::detection::YOLOR::is_mini_pad) + .def_readwrite("is_no_pad", &vision::detection::YOLOR::is_no_pad) + .def_readwrite("is_scale_up", &vision::detection::YOLOR::is_scale_up) + .def_readwrite("stride", &vision::detection::YOLOR::stride) + .def_readwrite("max_wh", &vision::detection::YOLOR::max_wh); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc new file mode 100755 index 0000000000..1f19d0dba2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc @@ -0,0 +1,140 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov5/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv5Postprocessor::YOLOv5Postprocessor() { + conf_threshold_ = 0.25; + nms_threshold_ = 0.5; + multi_label_ = true; + max_wh_ = 7680.0; +} + +bool YOLOv5Postprocessor::Run( + const std::vector &tensors, std::vector *results, + const std::vector>> &ims_info) { + int batch = tensors[0].shape[0]; + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + (*results)[bs].Clear(); + if (multi_label_) { + (*results)[bs].Reserve(tensors[0].shape[1] * (tensors[0].shape[2] - 5)); + } else { + (*results)[bs].Reserve(tensors[0].shape[1]); + } + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data = reinterpret_cast(tensors[0].Data()) + + bs * tensors[0].shape[1] * tensors[0].shape[2]; + for (size_t i = 0; i < tensors[0].shape[1]; ++i) { + int s = i * tensors[0].shape[2]; + float confidence = data[s + 4]; + if (multi_label_) { + for (size_t j = 5; j < tensors[0].shape[2]; ++j) { + confidence = data[s + 4]; + const float *class_score = data + s + j; + confidence *= (*class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, class_score); + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + } + } else { + const float *max_class_score = + std::max_element(data + s + 5, data + s + tensors[0].shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + } + } + + if ((*results)[bs].boxes.size() == 0) { + return true; + } + + utils::NMS(&((*results)[bs]), nms_threshold_); + + // scale the boxes to the origin image shape + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + int32_t label_id = ((*results)[bs].label_ids)[i]; + // clip box + (*results)[bs].boxes[i][0] = + (*results)[bs].boxes[i][0] - max_wh_ * label_id; + (*results)[bs].boxes[i][1] = + (*results)[bs].boxes[i][1] - max_wh_ * label_id; + (*results)[bs].boxes[i][2] = + (*results)[bs].boxes[i][2] - max_wh_ * label_id; + (*results)[bs].boxes[i][3] = + (*results)[bs].boxes[i][3] - max_wh_ * label_id; + (*results)[bs].boxes[i][0] = + std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][1] = + std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][2] = + std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][3] = + std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w); + (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h); + (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w); + (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h); + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h new file mode 100755 index 0000000000..ac437c6ffa --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Postprocessor object for YOLOv5 serials model. + */ +class ULTRAINFER_DECL YOLOv5Postprocessor { +public: + /** \brief Create a postprocessor instance for YOLOv5 serials model + */ + YOLOv5Postprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &tensors, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.25 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.25 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.5 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.5 + float GetNMSThreshold() const { return nms_threshold_; } + + /// Set multi_label, set true for eval, default true + void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; } + + /// Get multi_label, default true + bool GetMultiLabel() const { return multi_label_; } + +protected: + float conf_threshold_; + float nms_threshold_; + bool multi_label_; + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc new file mode 100755 index 0000000000..7f012f09c9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov5/preprocessor.h" +#include "ultrainfer/function/concat.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv5Preprocessor::YOLOv5Preprocessor() { + size_ = {640, 640}; + padding_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = true; + stride_ = 32; + max_wh_ = 7680.0; +} + +void YOLOv5Preprocessor::LetterBox(FDMat *mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + if (std::fabs(scale - 1.0f) > 1e-06) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_value_); + } +} + +bool YOLOv5Preprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + // yolov5's preprocess steps + // 1. letterbox + // 2. convert_and_permute(swap_rb=true) + LetterBox(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, true); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool YOLOv5Preprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h new file mode 100755 index 0000000000..47331719be --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for YOLOv5 serials model. + */ +class ULTRAINFER_DECL YOLOv5Preprocessor { +public: + /** \brief Create a preprocessor instance for YOLOv5 serials model + */ + YOLOv5Preprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingValue(const std::vector &padding_value) { + padding_value_ = padding_value; + } + + /// Get padding value, size should be the same as channels + std::vector GetPaddingValue() const { return padding_value_; } + + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; } + + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } + + /// Set is_mini_pad, pad to the minimum rectange + /// which height and width is times of stride + void SetMiniPad(bool is_mini_pad) { is_mini_pad_ = is_mini_pad; } + + /// Get is_mini_pad, default false + bool GetMiniPad() const { return is_mini_pad_; } + + /// Set padding stride, only for mini_pad mode + void SetStride(int stride) { stride_ = stride; } + + /// Get padding stride, default 32 + bool GetStride() const { return stride_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + void LetterBox(FDMat *mat); + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc new file mode 100755 index 0000000000..61e7998d64 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc @@ -0,0 +1,97 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov5/yolov5.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv5::YOLOv5(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_timvx_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv5::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv5::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold, + float nms_threshold) { + postprocessor_.SetConfThreshold(conf_threshold); + postprocessor_.SetNMSThreshold(nms_threshold); + if (!Predict(*im, result)) { + return false; + } + return true; +} + +bool YOLOv5::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool YOLOv5::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector>> ims_info; + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h new file mode 100755 index 0000000000..f968c5bd78 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/detection/contrib/yolov5/postprocessor.h" +#include "ultrainfer/vision/detection/contrib/yolov5/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOv5 model object used when to load a YOLOv5 model exported by + * YOLOv5. + */ +class ULTRAINFER_DECL YOLOv5 : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv5(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "yolov5"; } + + /** \brief DEPRECATED Predict the detection result for an input image, remove + * at 1.0 version + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.25 \param[in] nms_threshold iou + * threashold for NMS, default is 0.5 \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25, float nms_threshold = 0.5); + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, DetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of YOLOv5 + virtual YOLOv5Preprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of YOLOv5 + virtual YOLOv5Postprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + YOLOv5Preprocessor preprocessor_; + YOLOv5Postprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc new file mode 100755 index 0000000000..d81c13ef8a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv5(pybind11::module &m) { + pybind11::class_(m, + "YOLOv5Preprocessor") + .def(pybind11::init<>()) + .def( + "run", + [](vision::detection::YOLOv5Preprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in YOLOv5Preprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, + &vision::detection::YOLOv5Preprocessor::SetSize) + .def_property("padding_value", + &vision::detection::YOLOv5Preprocessor::GetPaddingValue, + &vision::detection::YOLOv5Preprocessor::SetPaddingValue) + .def_property("is_scale_up", + &vision::detection::YOLOv5Preprocessor::GetScaleUp, + &vision::detection::YOLOv5Preprocessor::SetScaleUp) + .def_property("is_mini_pad", + &vision::detection::YOLOv5Preprocessor::GetMiniPad, + &vision::detection::YOLOv5Preprocessor::SetMiniPad) + .def_property("stride", &vision::detection::YOLOv5Preprocessor::GetStride, + &vision::detection::YOLOv5Preprocessor::SetStride); + + pybind11::class_( + m, "YOLOv5Postprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::YOLOv5Postprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "YOLOv5Postprocessor."); + } + return results; + }) + .def("run", + [](vision::detection::YOLOv5Postprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "YOLOv5Postprocessor."); + } + return results; + }) + .def_property("conf_threshold", + &vision::detection::YOLOv5Postprocessor::GetConfThreshold, + &vision::detection::YOLOv5Postprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::detection::YOLOv5Postprocessor::GetNMSThreshold, + &vision::detection::YOLOv5Postprocessor::SetNMSThreshold) + .def_property("multi_label", + &vision::detection::YOLOv5Postprocessor::GetMultiLabel, + &vision::detection::YOLOv5Postprocessor::SetMultiLabel); + + pybind11::class_(m, "YOLOv5") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv5 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::YOLOv5 &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::YOLOv5::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::YOLOv5::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc new file mode 100755 index 0000000000..f9574ddf4c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc @@ -0,0 +1,471 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov5lite.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" +#ifdef WITH_GPU +#include "ultrainfer/vision/utils/cuda_utils.h" +#endif // WITH_GPU + +namespace ultrainfer { +namespace vision { +namespace detection { + +void YOLOv5Lite::LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill, bool scale_up, int stride) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +void YOLOv5Lite::GenerateAnchors(const std::vector &size, + const std::vector &downsample_strides, + std::vector *anchors, + int num_anchors) { + // size: tuple of input (width, height) + // downsample_strides: downsample strides in YOLOv5Lite, e.g (8,16,32) + const int width = size[0]; + const int height = size[1]; + for (int i = 0; i < downsample_strides.size(); ++i) { + const int ds = downsample_strides[i]; + int num_grid_w = width / ds; + int num_grid_h = height / ds; + for (int an = 0; an < num_anchors; ++an) { + float anchor_w = anchor_config[i][an * 2]; + float anchor_h = anchor_config[i][an * 2 + 1]; + for (int g1 = 0; g1 < num_grid_h; ++g1) { + for (int g0 = 0; g0 < num_grid_w; ++g0) { + (*anchors).emplace_back(Anchor{g0, g1, ds, anchor_w, anchor_h}); + } + } + } + } +} + +YOLOv5Lite::YOLOv5Lite(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; +#ifdef WITH_GPU + cudaSetDevice(runtime_option.device_id); + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + cuda_stream_ = reinterpret_cast(stream); + runtime_option.SetExternalStream(cuda_stream_); +#endif // WITH_GPU + initialized = Initialize(); +} + +bool YOLOv5Lite::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + downsample_strides = {8, 16, 32}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + max_wh = 7680.0; + is_decode_exported = false; + anchor_config = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, + {30.0, 61.0, 62.0, 45.0, 59.0, 119.0}, + {116.0, 90.0, 156.0, 198.0, 373.0, 326.0}}; + reused_input_tensors_.resize(1); + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + return true; +} + +YOLOv5Lite::~YOLOv5Lite() { +#ifdef WITH_GPU + if (use_cuda_preprocessing_) { + CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_)); + CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_)); + CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_)); + CUDA_CHECK(cudaStreamDestroy(reinterpret_cast(cuda_stream_))); + } +#endif // WITH_GPU +} + +bool YOLOv5Lite::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // process after image load + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + // yolov5lite's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + YOLOv5Lite::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, + is_scale_up, stride); + BGR2RGB::Run(mat); + // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), + // std::vector(mat->Channels(), 1.0)); + // Compute `result = mat * alpha + beta` directly by channel + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +void YOLOv5Lite::UseCudaPreprocessing(int max_image_size) { +#ifdef WITH_GPU + use_cuda_preprocessing_ = true; + is_scale_up = true; + if (input_img_cuda_buffer_host_ == nullptr) { + // prepare input data cache in GPU pinned memory + CUDA_CHECK(cudaMallocHost((void **)&input_img_cuda_buffer_host_, + max_image_size * 3)); + // prepare input data cache in GPU device memory + CUDA_CHECK(cudaMalloc((void **)&input_img_cuda_buffer_device_, + max_image_size * 3)); + CUDA_CHECK(cudaMalloc((void **)&input_tensor_cuda_buffer_device_, + 3 * size[0] * size[1] * sizeof(float))); + } +#else + FDWARNING << "The UltraInfer didn't compile with WITH_GPU=ON." << std::endl; + use_cuda_preprocessing_ = false; +#endif +} + +bool YOLOv5Lite::CudaPreprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { +#ifdef WITH_GPU + if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) { + FDERROR << "Preprocessing with CUDA is only available when the arguments " + "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)." + << std::endl; + return false; + } + + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + cudaStream_t stream = reinterpret_cast(cuda_stream_); + int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels(); + memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size); + CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_, + input_img_cuda_buffer_host_, src_img_buf_size, + cudaMemcpyHostToDevice, stream)); + utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(), + mat->Height(), input_tensor_cuda_buffer_device_, + size[0], size[1], padding_value, stream); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(size[0]), + static_cast(size[1])}; + + output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, + input_tensor_cuda_buffer_device_); + output->device = Device::GPU; + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +#else + FDERROR << "CUDA src code was not enabled." << std::endl; + return false; +#endif // WITH_GPU +} + +bool YOLOv5Lite::PostprocessWithDecode( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + // generate anchors with dowmsample strides + std::vector anchors; + int num_anchors = anchor_config[0].size() / 2; + GenerateAnchors(size, downsample_strides, &anchors, num_anchors); + // infer_result shape might look like (1,n,85=5+80) + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float *max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // fetch i-th anchor + float grid0 = static_cast(anchors.at(i).grid0); + float grid1 = static_cast(anchors.at(i).grid1); + float downsample_stride = static_cast(anchors.at(i).stride); + float anchor_w = static_cast(anchors.at(i).anchor_w); + float anchor_h = static_cast(anchors.at(i).anchor_h); + // convert from offsets to [x, y, w, h] + float dx = data[s]; + float dy = data[s + 1]; + float dw = data[s + 2]; + float dh = data[s + 3]; + + float x = (dx * 2.0f - 0.5f + grid0) * downsample_stride; + float y = (dy * 2.0f - 0.5f + grid1) * downsample_stride; + float w = std::pow(dw * 2.0f, 2.0f) * anchor_w; + float h = std::pow(dh * 2.0f, 2.0f) * anchor_h; + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh, + x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh}); + // label_id * max_wh for multi classes NMS + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2.0f; + float pad_w = (out_w - ipt_w * scale) / 2.0f; + if (is_mini_pad) { + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOv5Lite::Postprocess( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float *max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2.0f; + float pad_w = (out_w - ipt_w * scale) / 2.0f; + if (is_mini_pad) { + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOv5Lite::Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold, float nms_iou_threshold) { + Mat mat(*im); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (use_cuda_preprocessing_) { + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } else { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer()) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (is_decode_exported) { + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + } else { + if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info, + conf_threshold, nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h new file mode 100755 index 0000000000..ec94fdc808 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h @@ -0,0 +1,156 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOv5Lite model object used when to load a YOLOv5Lite model exported + * by YOLOv5Lite. + */ +class ULTRAINFER_DECL YOLOv5Lite : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5lite.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv5Lite(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + ~YOLOv5Lite(); + + virtual std::string ModelName() const { return "YOLOv5-Lite"; } + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.45 \param[in] nms_iou_threshold + * iou threashold for NMS, default is 0.25 \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.45, + float nms_iou_threshold = 0.25); + + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, size = {640, 640} + */ + std::vector size; + // padding value, size should be the same as channels + + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + // for offseting the boxes by classes when using NMS + float max_wh; + // downsample strides for YOLOv5Lite to generate anchors, + // will take (8,16,32) as default values, might have stride=64. + std::vector downsample_strides; + // anchors parameters, downsample_strides will take (8,16,32), + // each stride has three anchors with width and hight + std::vector> anchor_config; + /*! @brief + whether the model_file was exported with decode module. The official + YOLOv5Lite/export.py script will export ONNX file without + decode module. Please set it 'true' manually if the model file + was exported with decode module. + false : ONNX files without decode module. + true : ONNX file with decode module. default false. + */ + bool is_decode_exported; + +private: + // necessary parameters for GenerateAnchors to generate anchors when ONNX file + // without decode module. + struct Anchor { + int grid0; + int grid1; + int stride; + float anchor_w; + float anchor_h; + }; + + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool CudaPreprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + // the official YOLOv5Lite/export.py will export ONNX file without decode + // module. + // this fuction support the postporocess for ONNX file without decode module. + // set the `is_decode_exported = false`, this function will work. + bool PostprocessWithDecode( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + void LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill = false, bool scale_up = true, + int stride = 32); + + // generate anchors for decodeing when ONNX file without decode module. + void GenerateAnchors(const std::vector &size, + const std::vector &downsample_strides, + std::vector *anchors, const int num_anchors = 3); + + // whether to inference with dynamic shape (e.g ONNX export with dynamic shape + // or not.) + // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This + // value will + // auto check by ultrainfer after the internal Runtime already initialized. + bool is_dynamic_input_; + // CUDA host buffer for input image + uint8_t *input_img_cuda_buffer_host_ = nullptr; + // CUDA device buffer for input image + uint8_t *input_img_cuda_buffer_device_ = nullptr; + // CUDA device buffer for TRT input tensor + float *input_tensor_cuda_buffer_device_ = nullptr; + // Whether to use CUDA preprocessing + bool use_cuda_preprocessing_ = false; + // CUDA stream + void *cuda_stream_ = nullptr; +}; +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc new file mode 100755 index 0000000000..9f1524ef28 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv5Lite(pybind11::module &m) { + pybind11::class_(m, + "YOLOv5Lite") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv5Lite &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def("use_cuda_preprocessing", + [](vision::detection::YOLOv5Lite &self, int max_image_size) { + self.UseCudaPreprocessing(max_image_size); + }) + .def_readwrite("size", &vision::detection::YOLOv5Lite::size) + .def_readwrite("padding_value", + &vision::detection::YOLOv5Lite::padding_value) + .def_readwrite("downsample_strides", + &vision::detection::YOLOv5Lite::downsample_strides) + .def_readwrite("is_mini_pad", &vision::detection::YOLOv5Lite::is_mini_pad) + .def_readwrite("is_no_pad", &vision::detection::YOLOv5Lite::is_no_pad) + .def_readwrite("is_scale_up", &vision::detection::YOLOv5Lite::is_scale_up) + .def_readwrite("stride", &vision::detection::YOLOv5Lite::stride) + .def_readwrite("max_wh", &vision::detection::YOLOv5Lite::max_wh) + .def_readwrite("anchor_config", + &vision::detection::YOLOv5Lite::anchor_config) + .def_readwrite("is_decode_exported", + &vision::detection::YOLOv5Lite::is_decode_exported); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc new file mode 100755 index 0000000000..da980d2031 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc @@ -0,0 +1,217 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv5SegPostprocessor::YOLOv5SegPostprocessor() { + conf_threshold_ = 0.25; + nms_threshold_ = 0.5; + mask_threshold_ = 0.5; + multi_label_ = true; + max_wh_ = 7680.0; + mask_nums_ = 32; +} + +bool YOLOv5SegPostprocessor::Run( + const std::vector &tensors, std::vector *results, + const std::vector>> &ims_info) { + int batch = tensors[0].shape[0]; + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + // store mask information + std::vector> mask_embeddings; + (*results)[bs].Clear(); + if (multi_label_) { + (*results)[bs].Reserve(tensors[0].shape[1] * + (tensors[0].shape[2] - mask_nums_ - 5)); + } else { + (*results)[bs].Reserve(tensors[0].shape[1]); + } + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data = reinterpret_cast(tensors[0].Data()) + + bs * tensors[0].shape[1] * tensors[0].shape[2]; + for (size_t i = 0; i < tensors[0].shape[1]; ++i) { + int s = i * tensors[0].shape[2]; + float cls_conf = data[s + 4]; + float confidence = data[s + 4]; + std::vector mask_embedding(data + s + tensors[0].shape[2] - + mask_nums_, + data + s + tensors[0].shape[2]); + for (size_t k = 0; k < mask_embedding.size(); ++k) { + mask_embedding[k] *= cls_conf; + } + if (multi_label_) { + for (size_t j = 5; j < tensors[0].shape[2] - mask_nums_; ++j) { + confidence = data[s + 4]; + const float *class_score = data + s + j; + confidence *= (*class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, class_score); + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + // TODO(wangjunjie06): No zero copy + mask_embeddings.push_back(mask_embedding); + } + } else { + const float *max_class_score = std::max_element( + data + s + 5, data + s + tensors[0].shape[2] - mask_nums_); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + mask_embeddings.push_back(mask_embedding); + } + } + + if ((*results)[bs].boxes.size() == 0) { + return true; + } + // get box index after nms + std::vector index; + utils::NMS(&((*results)[bs]), nms_threshold_, &index); + + // deal with mask + // step1: MatMul, (box_nums * 32) x (32 * 160 * 160) = box_nums * 160 * 160 + // step2: Sigmoid + // step3: Resize to original image size + // step4: Select pixels greater than threshold and crop + (*results)[bs].contain_masks = true; + (*results)[bs].masks.resize((*results)[bs].boxes.size()); + const float *data_mask = + reinterpret_cast(tensors[1].Data()) + + bs * tensors[1].shape[1] * tensors[1].shape[2] * tensors[1].shape[3]; + cv::Mat mask_proto = + cv::Mat(tensors[1].shape[1], tensors[1].shape[2] * tensors[1].shape[3], + CV_32FC(1), const_cast(data_mask)); + // vector to cv::Mat for MatMul + // after push_back, Mat of m*n becomes (m + 1) * n + cv::Mat mask_proposals; + for (size_t i = 0; i < index.size(); ++i) { + mask_proposals.push_back(cv::Mat(mask_embeddings[index[i]]).t()); + } + cv::Mat matmul_result = (mask_proposals * mask_proto).t(); + cv::Mat masks = matmul_result.reshape( + (*results)[bs].boxes.size(), {static_cast(tensors[1].shape[2]), + static_cast(tensors[1].shape[3])}); + // split for boxes nums + std::vector mask_channels; + cv::split(masks, mask_channels); + + // scale the boxes to the origin image shape + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + // for mask + float pad_h_mask = (float)pad_h / out_h * tensors[1].shape[2]; + float pad_w_mask = (float)pad_w / out_w * tensors[1].shape[3]; + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + int32_t label_id = ((*results)[bs].label_ids)[i]; + // clip box + (*results)[bs].boxes[i][0] = + (*results)[bs].boxes[i][0] - max_wh_ * label_id; + (*results)[bs].boxes[i][1] = + (*results)[bs].boxes[i][1] - max_wh_ * label_id; + (*results)[bs].boxes[i][2] = + (*results)[bs].boxes[i][2] - max_wh_ * label_id; + (*results)[bs].boxes[i][3] = + (*results)[bs].boxes[i][3] - max_wh_ * label_id; + (*results)[bs].boxes[i][0] = + std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][1] = + std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][2] = + std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][3] = + std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w); + (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h); + (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w); + (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h); + // deal with mask + cv::Mat dest, mask; + // sigmoid + cv::exp(-mask_channels[i], dest); + dest = 1.0 / (1.0 + dest); + // crop mask for feature map + int x1 = static_cast(pad_w_mask); + int y1 = static_cast(pad_h_mask); + int x2 = static_cast(tensors[1].shape[3] - pad_w_mask); + int y2 = static_cast(tensors[1].shape[2] - pad_h_mask); + cv::Rect roi(x1, y1, x2 - x1, y2 - y1); + dest = dest(roi); + cv::resize(dest, mask, cv::Size(ipt_w, ipt_h), 0, 0, cv::INTER_LINEAR); + // crop mask for source img + int x1_src = static_cast(round((*results)[bs].boxes[i][0])); + int y1_src = static_cast(round((*results)[bs].boxes[i][1])); + int x2_src = static_cast(round((*results)[bs].boxes[i][2])); + int y2_src = static_cast(round((*results)[bs].boxes[i][3])); + cv::Rect roi_src(x1_src, y1_src, x2_src - x1_src, y2_src - y1_src); + mask = mask(roi_src); + mask = mask > mask_threshold_; + // save mask in DetectionResult + int keep_mask_h = y2_src - y1_src; + int keep_mask_w = x2_src - x1_src; + int keep_mask_numel = keep_mask_h * keep_mask_w; + (*results)[bs].masks[i].Resize(keep_mask_numel); + (*results)[bs].masks[i].shape = {keep_mask_h, keep_mask_w}; + uint8_t *keep_mask_ptr = + reinterpret_cast((*results)[bs].masks[i].Data()); + std::memcpy(keep_mask_ptr, reinterpret_cast(mask.ptr()), + keep_mask_numel * sizeof(uint8_t)); + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h new file mode 100755 index 0000000000..37694d07df --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h @@ -0,0 +1,78 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Postprocessor object for YOLOv5Seg serials model. + */ +class ULTRAINFER_DECL YOLOv5SegPostprocessor { +public: + /** \brief Create a postprocessor instance for YOLOv5Seg serials model + */ + YOLOv5SegPostprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &tensors, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.25 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.25 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.5 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.5 + float GetNMSThreshold() const { return nms_threshold_; } + + /// Set multi_label, set true for eval, default true + void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; } + + /// Get multi_label, default true + bool GetMultiLabel() const { return multi_label_; } + +protected: + float conf_threshold_; + float nms_threshold_; + bool multi_label_; + float max_wh_; + // channel nums of masks + int mask_nums_; + // mask threshold + float mask_threshold_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc new file mode 100755 index 0000000000..ce820d8037 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h" +#include "ultrainfer/function/concat.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv5SegPreprocessor::YOLOv5SegPreprocessor() { + size_ = {640, 640}; + padding_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = true; + stride_ = 32; + max_wh_ = 7680.0; +} + +void YOLOv5SegPreprocessor::LetterBox(FDMat *mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + if (std::fabs(scale - 1.0f) > 1e-06) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_value_); + } +} + +bool YOLOv5SegPreprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + // yolov5seg's preprocess steps + // 1. letterbox + // 2. convert_and_permute(swap_rb=true) + LetterBox(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, true); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool YOLOv5SegPreprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h new file mode 100755 index 0000000000..fca6ba0025 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for YOLOv5Seg serials model. + */ +class ULTRAINFER_DECL YOLOv5SegPreprocessor { +public: + /** \brief Create a preprocessor instance for YOLOv5Seg serials model + */ + YOLOv5SegPreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingValue(const std::vector &padding_value) { + padding_value_ = padding_value; + } + + /// Get padding value, size should be the same as channels + std::vector GetPaddingValue() const { return padding_value_; } + + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; } + + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } + + /// Set is_mini_pad, pad to the minimum rectange + /// which height and width is times of stride + void SetMiniPad(bool is_mini_pad) { is_mini_pad_ = is_mini_pad; } + + /// Get is_mini_pad, default false + bool GetMiniPad() const { return is_mini_pad_; } + + /// Set padding stride, only for mini_pad mode + void SetStride(int stride) { stride_ = stride; } + + /// Get padding stride, default 32 + bool GetStride() const { return stride_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + void LetterBox(FDMat *mat); + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc new file mode 100755 index 0000000000..481ee254ba --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv5Seg::YOLOv5Seg(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv5Seg::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv5Seg::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool YOLOv5Seg::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector>> ims_info; + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h new file mode 100755 index 0000000000..f384f095a8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h" +#include "ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOv5Seg model object used when to load a YOLOv5Seg model exported + * by YOLOv5. + */ +class ULTRAINFER_DECL YOLOv5Seg : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5seg.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv5Seg(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "yolov5seg"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, DetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of YOLOv5Seg + virtual YOLOv5SegPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of YOLOv5Seg + virtual YOLOv5SegPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + YOLOv5SegPreprocessor preprocessor_; + YOLOv5SegPostprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc new file mode 100755 index 0000000000..066d27ebe4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv5Seg(pybind11::module &m) { + pybind11::class_( + m, "YOLOv5SegPreprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::YOLOv5SegPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error("Failed to preprocess the input data " + "in PaddleClasPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::detection::YOLOv5SegPreprocessor::GetSize, + &vision::detection::YOLOv5SegPreprocessor::SetSize) + .def_property("padding_value", + &vision::detection::YOLOv5SegPreprocessor::GetPaddingValue, + &vision::detection::YOLOv5SegPreprocessor::SetPaddingValue) + .def_property("is_scale_up", + &vision::detection::YOLOv5SegPreprocessor::GetScaleUp, + &vision::detection::YOLOv5SegPreprocessor::SetScaleUp) + .def_property("is_mini_pad", + &vision::detection::YOLOv5SegPreprocessor::GetMiniPad, + &vision::detection::YOLOv5SegPreprocessor::SetMiniPad) + .def_property("stride", + &vision::detection::YOLOv5SegPreprocessor::GetStride, + &vision::detection::YOLOv5SegPreprocessor::SetStride); + + pybind11::class_( + m, "YOLOv5SegPostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::YOLOv5SegPostprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in YOLOv5SegPostprocessor."); + } + return results; + }) + .def("run", + [](vision::detection::YOLOv5SegPostprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in YOLOv5SegPostprocessor."); + } + return results; + }) + .def_property( + "conf_threshold", + &vision::detection::YOLOv5SegPostprocessor::GetConfThreshold, + &vision::detection::YOLOv5SegPostprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::detection::YOLOv5SegPostprocessor::GetNMSThreshold, + &vision::detection::YOLOv5SegPostprocessor::SetNMSThreshold) + .def_property("multi_label", + &vision::detection::YOLOv5SegPostprocessor::GetMultiLabel, + &vision::detection::YOLOv5SegPostprocessor::SetMultiLabel); + + pybind11::class_(m, + "YOLOv5Seg") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv5Seg &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::YOLOv5Seg &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::YOLOv5Seg::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::YOLOv5Seg::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc new file mode 100755 index 0000000000..5277adecf5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc @@ -0,0 +1,342 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov6.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" +#ifdef WITH_GPU +#include "ultrainfer/vision/utils/cuda_utils.h" +#endif // WITH_GPU + +namespace ultrainfer { + +namespace vision { + +namespace detection { + +void YOLOv6::LetterBox(Mat *mat, std::vector size, + std::vector color, bool _auto, bool scale_fill, + bool scale_up, int stride) { + float scale = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(static_cast(mat->Height()) * scale)); + int resize_w = int(round(static_cast(mat->Width()) * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +YOLOv6::YOLOv6(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; +#ifdef WITH_GPU + cudaSetDevice(runtime_option.device_id); + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + cuda_stream_ = reinterpret_cast(stream); + runtime_option.SetExternalStream(cuda_stream_); +#endif // WITH_GPU + initialized = Initialize(); +} + +bool YOLOv6::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + max_wh = 4096.0f; + reused_input_tensors_.resize(1); + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + return true; +} + +YOLOv6::~YOLOv6() { +#ifdef WITH_GPU + if (use_cuda_preprocessing_) { + CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_)); + CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_)); + CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_)); + CUDA_CHECK(cudaStreamDestroy(reinterpret_cast(cuda_stream_))); + } +#endif // WITH_GPU +} + +bool YOLOv6::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + // process after image load + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(round(static_cast(mat->Height()) * ratio)); + int resize_w = int(round(static_cast(mat->Width()) * ratio)); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + // yolov6's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up, + stride); + BGR2RGB::Run(mat); + // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), + // std::vector(mat->Channels(), 1.0)); + // Compute `result = mat * alpha + beta` directly by channel + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +void YOLOv6::UseCudaPreprocessing(int max_image_size) { +#ifdef WITH_GPU + use_cuda_preprocessing_ = true; + is_scale_up = true; + if (input_img_cuda_buffer_host_ == nullptr) { + // prepare input data cache in GPU pinned memory + CUDA_CHECK(cudaMallocHost((void **)&input_img_cuda_buffer_host_, + max_image_size * 3)); + // prepare input data cache in GPU device memory + CUDA_CHECK(cudaMalloc((void **)&input_img_cuda_buffer_device_, + max_image_size * 3)); + CUDA_CHECK(cudaMalloc((void **)&input_tensor_cuda_buffer_device_, + 3 * size[0] * size[1] * sizeof(float))); + } +#else + FDWARNING << "The UltraInfer didn't compile with WITH_GPU=ON." << std::endl; + use_cuda_preprocessing_ = false; +#endif +} + +bool YOLOv6::CudaPreprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { +#ifdef WITH_GPU + if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) { + FDERROR << "Preprocessing with CUDA is only available when the arguments " + "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)." + << std::endl; + return false; + } + + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + cudaStream_t stream = reinterpret_cast(cuda_stream_); + int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels(); + memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size); + CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_, + input_img_cuda_buffer_host_, src_img_buf_size, + cudaMemcpyHostToDevice, stream)); + utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(), + mat->Height(), input_tensor_cuda_buffer_device_, + size[0], size[1], padding_value, stream); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(size[0]), + static_cast(size[1])}; + + output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, + input_tensor_cuda_buffer_device_); + output->device = Device::GPU; + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +#else + FDERROR << "CUDA src code was not enabled." << std::endl; + return false; +#endif // WITH_GPU +} + +bool YOLOv6::Postprocess( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float *max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + for (size_t i = 0; i < result->boxes.size(); ++i) { + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOv6::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold, + float nms_iou_threshold) { + Mat mat(*im); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (use_cuda_preprocessing_) { + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } else { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer()) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h new file mode 100755 index 0000000000..1d0762ea7b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h @@ -0,0 +1,125 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace detection { +/*! @brief YOLOv6 model object used when to load a YOLOv6 model exported by + * YOLOv6. + */ +class ULTRAINFER_DECL YOLOv6 : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov6.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv6(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + ~YOLOv6(); + + std::string ModelName() const { return "YOLOv6"; } + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.25 \param[in] nms_iou_threshold + * iou threashold for NMS, default is 0.5 \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25, + float nms_iou_threshold = 0.5); + + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {640, 640}; + */ + std::vector size; + // padding value, size should be the same as channels + + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + // for offseting the boxes by classes when using NMS, + // default 4096 in meituan/YOLOv6 + float max_wh; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool CudaPreprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + bool IsDynamicInput() const { return is_dynamic_input_; } + + void LetterBox(Mat *mat, std::vector size, std::vector color, + bool _auto, bool scale_fill = false, bool scale_up = true, + int stride = 32); + + // whether to inference with dynamic shape (e.g ONNX export with dynamic shape + // or not.) + // meituan/YOLOv6 official 'export_onnx.py' script will export static ONNX by + // default. + // while is_dynamic_input if 'false', is_mini_pad will force 'false'. This + // value will + // auto check by ultrainfer after the internal Runtime already initialized. + bool is_dynamic_input_; + // CUDA host buffer for input image + uint8_t *input_img_cuda_buffer_host_ = nullptr; + // CUDA device buffer for input image + uint8_t *input_img_cuda_buffer_device_ = nullptr; + // CUDA device buffer for TRT input tensor + float *input_tensor_cuda_buffer_device_ = nullptr; + // Whether to use CUDA preprocessing + bool use_cuda_preprocessing_ = false; + // CUDA stream + void *cuda_stream_ = nullptr; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc new file mode 100755 index 0000000000..2e629f66dc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv6(pybind11::module &m) { + pybind11::class_(m, "YOLOv6") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv6 &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def("use_cuda_preprocessing", + [](vision::detection::YOLOv6 &self, int max_image_size) { + self.UseCudaPreprocessing(max_image_size); + }) + .def_readwrite("size", &vision::detection::YOLOv6::size) + .def_readwrite("padding_value", &vision::detection::YOLOv6::padding_value) + .def_readwrite("is_mini_pad", &vision::detection::YOLOv6::is_mini_pad) + .def_readwrite("is_no_pad", &vision::detection::YOLOv6::is_no_pad) + .def_readwrite("is_scale_up", &vision::detection::YOLOv6::is_scale_up) + .def_readwrite("stride", &vision::detection::YOLOv6::stride) + .def_readwrite("max_wh", &vision::detection::YOLOv6::max_wh); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc new file mode 100755 index 0000000000..814dc1969e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc @@ -0,0 +1,117 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov7/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv7Postprocessor::YOLOv7Postprocessor() { + conf_threshold_ = 0.25; + nms_threshold_ = 0.5; + max_wh_ = 7680.0; +} + +bool YOLOv7Postprocessor::Run( + const std::vector &tensors, std::vector *results, + const std::vector>> &ims_info) { + int batch = tensors[0].shape[0]; + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + (*results)[bs].Clear(); + (*results)[bs].Reserve(tensors[0].shape[1]); + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data = reinterpret_cast(tensors[0].Data()) + + bs * tensors[0].shape[1] * tensors[0].shape[2]; + for (size_t i = 0; i < tensors[0].shape[1]; ++i) { + int s = i * tensors[0].shape[2]; + float confidence = data[s + 4]; + const float *max_class_score = + std::max_element(data + s + 5, data + s + tensors[0].shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + } + + if ((*results)[bs].boxes.size() == 0) { + return true; + } + + utils::NMS(&((*results)[bs]), nms_threshold_); + + // scale the boxes to the origin image shape + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + int32_t label_id = ((*results)[bs].label_ids)[i]; + // clip box + (*results)[bs].boxes[i][0] = + (*results)[bs].boxes[i][0] - max_wh_ * label_id; + (*results)[bs].boxes[i][1] = + (*results)[bs].boxes[i][1] - max_wh_ * label_id; + (*results)[bs].boxes[i][2] = + (*results)[bs].boxes[i][2] - max_wh_ * label_id; + (*results)[bs].boxes[i][3] = + (*results)[bs].boxes[i][3] - max_wh_ * label_id; + (*results)[bs].boxes[i][0] = + std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][1] = + std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][2] = + std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][3] = + std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][0] = + std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f); + (*results)[bs].boxes[i][1] = + std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f); + (*results)[bs].boxes[i][2] = + std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f); + (*results)[bs].boxes[i][3] = + std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f); + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h new file mode 100755 index 0000000000..fc29fb7bf2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Postprocessor object for YOLOv7 serials model. + */ +class ULTRAINFER_DECL YOLOv7Postprocessor { +public: + /** \brief Create a postprocessor instance for YOLOv7 serials model + */ + YOLOv7Postprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &tensors, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.25 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.25 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.5 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.5 + float GetNMSThreshold() const { return nms_threshold_; } + +protected: + float conf_threshold_; + float nms_threshold_; + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc new file mode 100755 index 0000000000..e54c5107f6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov7/preprocessor.h" +#include "ultrainfer/function/concat.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv7Preprocessor::YOLOv7Preprocessor() { + size_ = {640, 640}; + padding_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = true; + stride_ = 32; + max_wh_ = 7680.0; +} + +void YOLOv7Preprocessor::LetterBox(FDMat *mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + if (std::fabs(scale - 1.0f) > 1e-06) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_value_); + } +} + +bool YOLOv7Preprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + // yolov7's preprocess steps + // 1. letterbox + // 2. convert_and_permute(swap_rb=true) + LetterBox(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, true); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool YOLOv7Preprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h new file mode 100755 index 0000000000..6ba82f9a31 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h @@ -0,0 +1,94 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for YOLOv7 serials model. + */ +class ULTRAINFER_DECL YOLOv7Preprocessor { +public: + /** \brief Create a preprocessor instance for YOLOv7 serials model + */ + YOLOv7Preprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingValue(const std::vector &padding_value) { + padding_value_ = padding_value; + } + + /// Get padding value, size should be the same as channels + std::vector GetPaddingValue() const { return padding_value_; } + + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; } + + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + void LetterBox(FDMat *mat); + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc new file mode 100755 index 0000000000..2646be429a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc @@ -0,0 +1,94 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov7/yolov7.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv7::YOLOv7(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv7::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv7::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold, + float nms_threshold) { + postprocessor_.SetConfThreshold(conf_threshold); + postprocessor_.SetNMSThreshold(nms_threshold); + if (!Predict(*im, result)) { + return false; + } + return true; +} + +bool YOLOv7::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool YOLOv7::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector>> ims_info; + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h new file mode 100755 index 0000000000..a38c806e97 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/detection/contrib/yolov7/postprocessor.h" +#include "ultrainfer/vision/detection/contrib/yolov7/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOv7 model object used when to load a YOLOv7 model exported by + * YOLOv7. + */ +class ULTRAINFER_DECL YOLOv7 : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv7(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "yolov7"; } + + /** \brief DEPRECATED Predict the detection result for an input image, remove + * at 1.0 version + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.25 \param[in] nms_threshold iou + * threashold for NMS, default is 0.5 \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25, float nms_threshold = 0.5); + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, DetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of YOLOv7 + virtual YOLOv7Preprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of YOLOv7 + virtual YOLOv7Postprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + YOLOv7Preprocessor preprocessor_; + YOLOv7Postprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc new file mode 100755 index 0000000000..3def25b5fc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv7(pybind11::module &m) { + pybind11::class_(m, + "YOLOv7Preprocessor") + .def(pybind11::init<>()) + .def( + "run", + [](vision::detection::YOLOv7Preprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in YOLOV7Preprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::detection::YOLOv7Preprocessor::GetSize, + &vision::detection::YOLOv7Preprocessor::SetSize) + .def_property("padding_value", + &vision::detection::YOLOv7Preprocessor::GetPaddingValue, + &vision::detection::YOLOv7Preprocessor::SetPaddingValue) + .def_property("is_scale_up", + &vision::detection::YOLOv7Preprocessor::GetScaleUp, + &vision::detection::YOLOv7Preprocessor::SetScaleUp); + + pybind11::class_( + m, "YOLOv7Postprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::YOLOv7Postprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in YOLOv7Postprocessor."); + } + return results; + }) + .def("run", + [](vision::detection::YOLOv7Postprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in YOLOv7Postprocessor."); + } + return results; + }) + .def_property("conf_threshold", + &vision::detection::YOLOv7Postprocessor::GetConfThreshold, + &vision::detection::YOLOv7Postprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::detection::YOLOv7Postprocessor::GetNMSThreshold, + &vision::detection::YOLOv7Postprocessor::SetNMSThreshold); + + pybind11::class_(m, "YOLOv7") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv7 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::YOLOv7 &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::YOLOv7::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::YOLOv7::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc new file mode 100755 index 0000000000..e2979f9a09 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc @@ -0,0 +1,249 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov7end2end_ort.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +void YOLOv7End2EndORT::LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill, bool scale_up, int stride) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +YOLOv7End2EndORT::YOLOv7End2EndORT(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT}; // NO TRT + } else { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + if (custom_option.backend == Backend::TRT) { + FDWARNING << "Backend::TRT is not support for YOLOv7End2EndORT, " + << "will fallback to Backend::ORT." << std::endl; + } + initialized = Initialize(); +} + +bool YOLOv7End2EndORT::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + reused_input_tensors_.resize(1); + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + return true; +} + +bool YOLOv7End2EndORT::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + YOLOv7End2EndORT::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, + is_scale_up, stride); + BGR2RGB::Run(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool YOLOv7End2EndORT::Postprocess( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold) { + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + // detected success without valid objects. + if (infer_result.shape[0] == 0) { + return true; + } + + result->Clear(); + result->Reserve(infer_result.shape[0]); + // (?,7) (batch_id,x0,y0,x1,y1,cls_id,score) after nms + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[0]; ++i) { + const float *box_cls_ptr = data + (i * 7); + int64_t batch_id = static_cast(box_cls_ptr[0] + 0.5f); // 0,1, ... + FDASSERT(batch_id == 0, + "Only support batch=1 now, but found batch_id != 0."); + float confidence = box_cls_ptr[6]; + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = static_cast(box_cls_ptr[5] + 0.5f); + float x1 = box_cls_ptr[1]; + float y1 = box_cls_ptr[2]; + float x2 = box_cls_ptr[3]; + float y2 = box_cls_ptr[4]; + + result->boxes.emplace_back(std::array{x1, y1, x2, y2}); + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + + if (result->boxes.size() == 0) { + return true; + } + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2.0f; + float pad_w = (out_w - ipt_w * scale) / 2.0f; + if (is_mini_pad) { + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOv7End2EndORT::Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold) { + Mat mat(*im); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer()) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(reused_output_tensors_[0], result, im_info, + conf_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h new file mode 100755 index 0000000000..b6352a6333 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h @@ -0,0 +1,92 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOv7End2EndORT model object used when to load a YOLOv7End2EndORT + * model exported by YOLOv7. + */ +class ULTRAINFER_DECL YOLOv7End2EndORT : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7end2end_ort.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv7End2EndORT(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + virtual std::string ModelName() const { return "yolov7end2end_ort"; } + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.25 \return true if the + * prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {640, 640} + */ + std::vector size; + // padding value, size should be the same as channels + + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold); + + void LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill = false, bool scale_up = true, + int stride = 32); + + bool is_dynamic_input_; +}; +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc new file mode 100755 index 0000000000..0c82edc2c8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv7End2EndORT(pybind11::module &m) { + pybind11::class_( + m, "YOLOv7End2EndORT") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv7End2EndORT &self, pybind11::array &data, + float conf_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold); + return res; + }) + .def_readwrite("size", &vision::detection::YOLOv7End2EndORT::size) + .def_readwrite("padding_value", + &vision::detection::YOLOv7End2EndORT::padding_value) + .def_readwrite("is_mini_pad", + &vision::detection::YOLOv7End2EndORT::is_mini_pad) + .def_readwrite("is_no_pad", + &vision::detection::YOLOv7End2EndORT::is_no_pad) + .def_readwrite("is_scale_up", + &vision::detection::YOLOv7End2EndORT::is_scale_up) + .def_readwrite("stride", &vision::detection::YOLOv7End2EndORT::stride); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc new file mode 100755 index 0000000000..80b8e2787f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc @@ -0,0 +1,357 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov7end2end_trt.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" +#ifdef WITH_GPU +#include "ultrainfer/vision/utils/cuda_utils.h" +#endif // WITH_GPU + +namespace ultrainfer { +namespace vision { +namespace detection { + +void YOLOv7End2EndTRT::LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill, bool scale_up, int stride) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +YOLOv7End2EndTRT::YOLOv7End2EndTRT(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {}; // NO CPU + valid_gpu_backends = {Backend::TRT}; // NO ORT + } else { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + if (runtime_option.device != Device::GPU) { + FDWARNING << runtime_option.device + << " is not support for YOLOv7End2EndTRT," + << "will fallback to Device::GPU." << std::endl; + runtime_option.device = Device::GPU; + } + if (runtime_option.backend != Backend::UNKNOWN) { + if (runtime_option.backend != Backend::TRT) { + FDWARNING << runtime_option.backend + << " is not support for YOLOv7End2EndTRT," + << "will fallback to Backend::TRT." << std::endl; + runtime_option.backend = Backend::TRT; + } + } +#ifdef WITH_GPU + cudaSetDevice(runtime_option.device_id); + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + cuda_stream_ = reinterpret_cast(stream); + runtime_option.SetExternalStream(cuda_stream_); +#endif // WITH_GPU + initialized = Initialize(); +} + +bool YOLOv7End2EndTRT::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + reused_input_tensors_.resize(1); + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + return true; +} + +YOLOv7End2EndTRT::~YOLOv7End2EndTRT() { +#ifdef WITH_GPU + if (use_cuda_preprocessing_) { + CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_)); + CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_)); + CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_)); + CUDA_CHECK(cudaStreamDestroy(reinterpret_cast(cuda_stream_))); + } +#endif // WITH_GPU +} + +bool YOLOv7End2EndTRT::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + YOLOv7End2EndTRT::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, + is_scale_up, stride); + BGR2RGB::Run(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +void YOLOv7End2EndTRT::UseCudaPreprocessing(int max_image_size) { +#ifdef WITH_GPU + use_cuda_preprocessing_ = true; + is_scale_up = true; + if (input_img_cuda_buffer_host_ == nullptr) { + // prepare input data cache in GPU pinned memory + CUDA_CHECK(cudaMallocHost((void **)&input_img_cuda_buffer_host_, + max_image_size * 3)); + // prepare input data cache in GPU device memory + CUDA_CHECK(cudaMalloc((void **)&input_img_cuda_buffer_device_, + max_image_size * 3)); + CUDA_CHECK(cudaMalloc((void **)&input_tensor_cuda_buffer_device_, + 3 * size[0] * size[1] * sizeof(float))); + } +#else + FDWARNING << "The UltraInfer didn't compile with WITH_GPU=ON." << std::endl; + use_cuda_preprocessing_ = false; +#endif +} + +bool YOLOv7End2EndTRT::CudaPreprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { +#ifdef WITH_GPU + if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) { + FDERROR << "Preprocessing with CUDA is only available when the arguments " + "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)." + << std::endl; + return false; + } + + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + cudaStream_t stream = reinterpret_cast(cuda_stream_); + int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels(); + memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size); + CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_, + input_img_cuda_buffer_host_, src_img_buf_size, + cudaMemcpyHostToDevice, stream)); + utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(), + mat->Height(), input_tensor_cuda_buffer_device_, + size[0], size[1], padding_value, stream); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(size[0]), + static_cast(size[1])}; + + output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, + input_tensor_cuda_buffer_device_); + output->device = Device::GPU; + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +#else + FDERROR << "CUDA src code was not enabled." << std::endl; + return false; +#endif // WITH_GPU +} + +bool YOLOv7End2EndTRT::Postprocess( + std::vector &infer_results, DetectionResult *result, + const std::map> &im_info, + float conf_threshold) { + FDASSERT(infer_results.size() == 4, "Output tensor size must be 4."); + FDTensor &num_tensor = infer_results.at(0); // INT32 + FDTensor &boxes_tensor = infer_results.at(1); // FLOAT + FDTensor &scores_tensor = infer_results.at(2); // FLOAT + FDTensor &classes_tensor = infer_results.at(3); // INT32 + FDASSERT(num_tensor.dtype == FDDataType::INT32, + "The dtype of num_dets must be INT32."); + FDASSERT(boxes_tensor.dtype == FDDataType::FP32, + "The dtype of det_boxes_tensor must be FP32."); + FDASSERT(scores_tensor.dtype == FDDataType::FP32, + "The dtype of det_scores_tensor must be FP32."); + FDASSERT(classes_tensor.dtype == FDDataType::INT32, + "The dtype of det_classes_tensor must be INT32."); + FDASSERT(num_tensor.shape[0] == 1, "Only support batch=1 now."); + // post-process for end2end yolov7 after trt nms. + float *boxes_data = static_cast(boxes_tensor.Data()); // (1,100,4) + float *scores_data = static_cast(scores_tensor.Data()); // (1,100) + int32_t *classes_data = + static_cast(classes_tensor.Data()); // (1,100) + int32_t num_dets_after_trt_nms = static_cast(num_tensor.Data())[0]; + if (num_dets_after_trt_nms == 0) { + return true; + } + result->Clear(); + result->Reserve(num_dets_after_trt_nms); + for (size_t i = 0; i < num_dets_after_trt_nms; ++i) { + float confidence = scores_data[i]; + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = classes_data[i]; + float x1 = boxes_data[(i * 4) + 0]; + float y1 = boxes_data[(i * 4) + 1]; + float x2 = boxes_data[(i * 4) + 2]; + float y2 = boxes_data[(i * 4) + 3]; + + result->boxes.emplace_back(std::array{x1, y1, x2, y2}); + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + + if (result->boxes.size() == 0) { + return true; + } + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2.0f; + float pad_w = (out_w - ipt_w * scale) / 2.0f; + if (is_mini_pad) { + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOv7End2EndTRT::Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold) { + Mat mat(*im); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (use_cuda_preprocessing_) { + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } else { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer()) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h new file mode 100755 index 0000000000..512b4d0e3f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h @@ -0,0 +1,110 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOv7End2EndTRT model object used when to load a YOLOv7End2EndTRT + * model exported by YOLOv7. + */ +class ULTRAINFER_DECL YOLOv7End2EndTRT : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7end2end_trt.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv7End2EndTRT(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + ~YOLOv7End2EndTRT(); + + virtual std::string ModelName() const { return "yolov7end2end_trt"; } + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.25 \return true if the + * prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25); + + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {640, 640} + */ + std::vector size; + // padding value, size should be the same as channels + + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool CudaPreprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(std::vector &infer_results, + DetectionResult *result, + const std::map> &im_info, + float conf_threshold); + + void LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill = false, bool scale_up = true, + int stride = 32); + + bool is_dynamic_input_; + // CUDA host buffer for input image + uint8_t *input_img_cuda_buffer_host_ = nullptr; + // CUDA device buffer for input image + uint8_t *input_img_cuda_buffer_device_ = nullptr; + // CUDA device buffer for TRT input tensor + float *input_tensor_cuda_buffer_device_ = nullptr; + // Whether to use CUDA preprocessing + bool use_cuda_preprocessing_ = false; + // CUDA stream + void *cuda_stream_ = nullptr; +}; +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc new file mode 100755 index 0000000000..c677a3ecc5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv7End2EndTRT(pybind11::module &m) { + pybind11::class_( + m, "YOLOv7End2EndTRT") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv7End2EndTRT &self, pybind11::array &data, + float conf_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold); + return res; + }) + .def("use_cuda_preprocessing", + [](vision::detection::YOLOv7End2EndTRT &self, int max_image_size) { + self.UseCudaPreprocessing(max_image_size); + }) + .def_readwrite("size", &vision::detection::YOLOv7End2EndTRT::size) + .def_readwrite("padding_value", + &vision::detection::YOLOv7End2EndTRT::padding_value) + .def_readwrite("is_mini_pad", + &vision::detection::YOLOv7End2EndTRT::is_mini_pad) + .def_readwrite("is_no_pad", + &vision::detection::YOLOv7End2EndTRT::is_no_pad) + .def_readwrite("is_scale_up", + &vision::detection::YOLOv7End2EndTRT::is_scale_up) + .def_readwrite("stride", &vision::detection::YOLOv7End2EndTRT::stride); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc new file mode 100755 index 0000000000..695d585285 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc @@ -0,0 +1,143 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov8/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv8Postprocessor::YOLOv8Postprocessor() { + conf_threshold_ = 0.25; + nms_threshold_ = 0.5; + multi_label_ = true; + max_wh_ = 7680.0; +} + +bool YOLOv8Postprocessor::Run( + const std::vector &tensors, std::vector *results, + const std::vector>> &ims_info) { + int batch = tensors[0].shape[0]; + // transpose + std::vector dim{0, 2, 1}; + FDTensor tensor_transpose; + function::Transpose(tensors[0], &tensor_transpose, dim); + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + (*results)[bs].Clear(); + if (multi_label_) { + (*results)[bs].Reserve(tensor_transpose.shape[1] * + (tensor_transpose.shape[2] - 4)); + } else { + (*results)[bs].Reserve(tensor_transpose.shape[1]); + } + if (tensor_transpose.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data = + reinterpret_cast(tensor_transpose.Data()) + + bs * tensor_transpose.shape[1] * tensor_transpose.shape[2]; + for (size_t i = 0; i < tensor_transpose.shape[1]; ++i) { + int s = i * tensor_transpose.shape[2]; + if (multi_label_) { + for (size_t j = 4; j < tensor_transpose.shape[2]; ++j) { + float confidence = data[s + j]; + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = j - 4; + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + } + } else { + const float *max_class_score = std::max_element( + data + s + 4, data + s + tensor_transpose.shape[2]); + float confidence = *max_class_score; + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 4, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + } + } + + if ((*results)[bs].boxes.size() == 0) { + return true; + } + + utils::NMS(&((*results)[bs]), nms_threshold_); + + // scale the boxes to the origin image shape + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + int32_t label_id = ((*results)[bs].label_ids)[i]; + // clip box + (*results)[bs].boxes[i][0] = + (*results)[bs].boxes[i][0] - max_wh_ * label_id; + (*results)[bs].boxes[i][1] = + (*results)[bs].boxes[i][1] - max_wh_ * label_id; + (*results)[bs].boxes[i][2] = + (*results)[bs].boxes[i][2] - max_wh_ * label_id; + (*results)[bs].boxes[i][3] = + (*results)[bs].boxes[i][3] - max_wh_ * label_id; + (*results)[bs].boxes[i][0] = + std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][1] = + std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][2] = + std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][3] = + std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w); + (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h); + (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w); + (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h); + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h new file mode 100755 index 0000000000..c78eefb688 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Postprocessor object for YOLOv8 serials model. + */ +class ULTRAINFER_DECL YOLOv8Postprocessor { +public: + /** \brief Create a postprocessor instance for YOLOv8 serials model + */ + YOLOv8Postprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &tensors, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.25 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.25 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.5 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.5 + float GetNMSThreshold() const { return nms_threshold_; } + + /// Set multi_label, set true for eval, default true + void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; } + + /// Get multi_label, default true + bool GetMultiLabel() const { return multi_label_; } + +protected: + float conf_threshold_; + float nms_threshold_; + bool multi_label_; + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc new file mode 100755 index 0000000000..606549da45 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov8/preprocessor.h" +#include "ultrainfer/function/concat.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv8Preprocessor::YOLOv8Preprocessor() { + size_ = {640, 640}; + padding_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = true; + stride_ = 32; + max_wh_ = 7680.0; +} + +void YOLOv8Preprocessor::LetterBox(FDMat *mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + if (std::fabs(scale - 1.0f) > 1e-06) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_value_); + } +} + +bool YOLOv8Preprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + // yolov8's preprocess steps + // 1. letterbox + // 2. convert_and_permute(swap_rb=true) + LetterBox(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, true); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool YOLOv8Preprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h new file mode 100755 index 0000000000..08185b1a2d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for YOLOv8 serials model. + */ +class ULTRAINFER_DECL YOLOv8Preprocessor { +public: + /** \brief Create a preprocessor instance for YOLOv8 serials model + */ + YOLOv8Preprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingValue(const std::vector &padding_value) { + padding_value_ = padding_value; + } + + /// Get padding value, size should be the same as channels + std::vector GetPaddingValue() const { return padding_value_; } + + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; } + + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } + + /// Set is_mini_pad, pad to the minimum rectange + /// which height and width is times of stride + void SetMiniPad(bool is_mini_pad) { is_mini_pad_ = is_mini_pad; } + + /// Get is_mini_pad, default false + bool GetMiniPad() const { return is_mini_pad_; } + + /// Set padding stride, only for mini_pad mode + void SetStride(int stride) { stride_ = stride; } + + /// Get padding stride, default 32 + bool GetStride() const { return stride_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + void LetterBox(FDMat *mat); + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc new file mode 100755 index 0000000000..2c66eb31ab --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolov8/yolov8.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +YOLOv8::YOLOv8(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv8::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv8::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool YOLOv8::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector>> ims_info; + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h new file mode 100755 index 0000000000..cf96376feb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/detection/contrib/yolov8/postprocessor.h" +#include "ultrainfer/vision/detection/contrib/yolov8/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief YOLOv8 model object used when to load a YOLOv8 model exported by + * YOLOv8. + */ +class ULTRAINFER_DECL YOLOv8 : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov8.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv8(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "yolov8"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, DetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of YOLOv8 + virtual YOLOv8Preprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of YOLOv8 + virtual YOLOv8Postprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + YOLOv8Preprocessor preprocessor_; + YOLOv8Postprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc new file mode 100755 index 0000000000..929e9dbf54 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv8(pybind11::module &m) { + pybind11::class_(m, + "YOLOv8Preprocessor") + .def(pybind11::init<>()) + .def( + "run", + [](vision::detection::YOLOv8Preprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in YOLOv8Preprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::detection::YOLOv8Preprocessor::GetSize, + &vision::detection::YOLOv8Preprocessor::SetSize) + .def_property("padding_value", + &vision::detection::YOLOv8Preprocessor::GetPaddingValue, + &vision::detection::YOLOv8Preprocessor::SetPaddingValue) + .def_property("is_scale_up", + &vision::detection::YOLOv8Preprocessor::GetScaleUp, + &vision::detection::YOLOv8Preprocessor::SetScaleUp) + .def_property("is_mini_pad", + &vision::detection::YOLOv8Preprocessor::GetMiniPad, + &vision::detection::YOLOv8Preprocessor::SetMiniPad) + .def_property("stride", &vision::detection::YOLOv8Preprocessor::GetStride, + &vision::detection::YOLOv8Preprocessor::SetStride); + + pybind11::class_( + m, "YOLOv8Postprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::detection::YOLOv8Postprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "YOLOv8Postprocessor."); + } + return results; + }) + .def("run", + [](vision::detection::YOLOv8Postprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "YOLOv8Postprocessor."); + } + return results; + }) + .def_property("conf_threshold", + &vision::detection::YOLOv8Postprocessor::GetConfThreshold, + &vision::detection::YOLOv8Postprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::detection::YOLOv8Postprocessor::GetNMSThreshold, + &vision::detection::YOLOv8Postprocessor::SetNMSThreshold) + .def_property("multi_label", + &vision::detection::YOLOv8Postprocessor::GetMultiLabel, + &vision::detection::YOLOv8Postprocessor::SetMultiLabel); + + pybind11::class_(m, "YOLOv8") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv8 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::YOLOv8 &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::detection::YOLOv8::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::YOLOv8::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc new file mode 100755 index 0000000000..0ce66c0f9d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc @@ -0,0 +1,322 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/contrib/yolox.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace detection { + +struct YOLOXAnchor { + int grid0; + int grid1; + int stride; +}; + +void GenerateYOLOXAnchors(const std::vector &size, + const std::vector &downsample_strides, + std::vector *anchors) { + // size: tuple of input (width, height) + // downsample_strides: downsample strides in YOLOX, e.g (8,16,32) + const int width = size[0]; + const int height = size[1]; + for (const auto &ds : downsample_strides) { + int num_grid_w = width / ds; + int num_grid_h = height / ds; + for (int g1 = 0; g1 < num_grid_h; ++g1) { + for (int g0 = 0; g0 < num_grid_w; ++g0) { + (*anchors).emplace_back(YOLOXAnchor{g0, g1, ds}); + } + } + } +} + +void LetterBoxWithRightBottomPad(Mat *mat, std::vector size, + std::vector color) { + // specific pre process for YOLOX, not the same as YOLOv5 + // reference: YOLOX/yolox/data/data_augment.py#L142 + float r = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + + int resize_h = int(round(static_cast(mat->Height()) * r)); + int resize_w = int(round(static_cast(mat->Width()) * r)); + + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + // right-bottom padding for YOLOX + if (pad_h > 0 || pad_w > 0) { + int top = 0; + int left = 0; + int right = pad_w; + int bottom = pad_h; + Pad::Run(mat, top, bottom, left, right, color); + } +} + +YOLOX::YOLOX(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOX::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + downsample_strides = {8, 16, 32}; + max_wh = 4096.0f; + is_decode_exported = false; + reused_input_tensors_.resize(1); + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + return true; +} + +bool YOLOX::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + // YOLOX ( >= v0.1.1) preprocess steps + // 1. preproc + // 2. HWC->CHW + // 3. NO!!! BRG2GRB and Normalize needed in YOLOX + LetterBoxWithRightBottomPad(mat, size, padding_value); + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool YOLOX::Postprocess( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float *max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float r = std::min(out_h / ipt_h, out_w / ipt_w); + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f); + result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f); + result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f); + result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOX::PostprocessWithDecode( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + // generate anchors with dowmsample strides + std::vector anchors; + GenerateYOLOXAnchors(size, downsample_strides, &anchors); + + // infer_result shape might look like (1,n,85=5+80) + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float *max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // fetch i-th anchor + float grid0 = static_cast(anchors.at(i).grid0); + float grid1 = static_cast(anchors.at(i).grid1); + float downsample_stride = static_cast(anchors.at(i).stride); + // convert from offsets to [x, y, w, h] + float dx = data[s]; + float dy = data[s + 1]; + float dw = data[s + 2]; + float dh = data[s + 3]; + + float x = (dx + grid0) * downsample_stride; + float y = (dy + grid1) * downsample_stride; + float w = std::exp(dw) * downsample_stride; + float h = std::exp(dh) * downsample_stride; + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh, + x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh}); + // label_id * max_wh for multi classes NMS + result->label_ids.push_back(label_id); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float r = std::min(out_h / ipt_h, out_w / ipt_w); + for (size_t i = 0; i < result->boxes.size(); ++i) { + int32_t label_id = (result->label_ids)[i]; + // clip box + result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; + result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; + result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; + result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; + result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f); + result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f); + result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f); + result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool YOLOX::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold, + float nms_iou_threshold) { + Mat mat(*im); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer()) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (is_decode_exported) { + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + } else { + if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info, + conf_threshold, nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h new file mode 100755 index 0000000000..b314a58b56 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h @@ -0,0 +1,106 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace detection { +/*! @brief YOLOX model object used when to load a YOLOX model exported by YOLOX. + */ +class ULTRAINFER_DECL YOLOX : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolox.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOX(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "YOLOX"; } + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * will be writen to this structure \param[in] conf_threshold confidence + * threashold for postprocessing, default is 0.25 \param[in] nms_iou_threshold + * iou threashold for NMS, default is 0.5 \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result, + float conf_threshold = 0.25, + float nms_iou_threshold = 0.5); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {640, 640} + */ + std::vector size; + // padding value, size should be the same as channels + std::vector padding_value; + /*! @brief + whether the model_file was exported with decode module. The official + YOLOX/tools/export_onnx.py script will export ONNX file without + decode module. Please set it 'true' manually if the model file + was exported with decode module. default false. + */ + bool is_decode_exported; + // downsample strides for YOLOX to generate anchors, + // will take (8,16,32) as default values, might have stride=64 + std::vector downsample_strides; + // for offseting the boxes by classes when using NMS, default 4096 + float max_wh; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + bool PostprocessWithDecode( + FDTensor &infer_result, DetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + bool IsDynamicInput() const { return is_dynamic_input_; } + + // whether to inference with dynamic shape (e.g ONNX export with dynamic shape + // or not.) + // megvii/YOLOX official 'export_onnx.py' script will export static ONNX by + // default. + // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This + // value will + // auto check by ultrainfer after the internal Runtime already initialized. + bool is_dynamic_input_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc new file mode 100755 index 0000000000..38f7efce1a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOX(pybind11::module &m) { + pybind11::class_(m, "YOLOX") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOX &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::detection::YOLOX::size) + .def_readwrite("padding_value", &vision::detection::YOLOX::padding_value) + .def_readwrite("is_decode_exported", + &vision::detection::YOLOX::is_decode_exported) + .def_readwrite("downsample_strides", + &vision::detection::YOLOX::downsample_strides) + .def_readwrite("max_wh", &vision::detection::YOLOX::max_wh); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc new file mode 100755 index 0000000000..4b357406ab --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindYOLOv7(pybind11::module &m); +void BindScaledYOLOv4(pybind11::module &m); +void BindYOLOR(pybind11::module &m); +void BindYOLOv6(pybind11::module &m); +void BindYOLOv5Lite(pybind11::module &m); +void BindYOLOv5(pybind11::module &m); +void BindYOLOv5Seg(pybind11::module &m); +void BindFastestDet(pybind11::module &m); +void BindYOLOX(pybind11::module &m); +void BindNanoDetPlus(pybind11::module &m); +void BindPPDet(pybind11::module &m); +void BindYOLOv7End2EndTRT(pybind11::module &m); +void BindYOLOv7End2EndORT(pybind11::module &m); +void BindYOLOv8(pybind11::module &m); +void BindRKYOLO(pybind11::module &m); + +void BindDetection(pybind11::module &m) { + auto detection_module = + m.def_submodule("detection", "Image object detection models."); + BindPPDet(detection_module); + BindYOLOv7(detection_module); + BindScaledYOLOv4(detection_module); + BindYOLOR(detection_module); + BindYOLOv6(detection_module); + BindYOLOv5Lite(detection_module); + BindYOLOv5(detection_module); + BindYOLOv5Seg(detection_module); + BindFastestDet(detection_module); + BindYOLOX(detection_module); + BindNanoDetPlus(detection_module); + BindYOLOv7End2EndTRT(detection_module); + BindYOLOv7End2EndORT(detection_module); + BindYOLOv8(detection_module); + BindRKYOLO(detection_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc new file mode 100755 index 0000000000..56564411f2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc @@ -0,0 +1,108 @@ +#include "ultrainfer/vision/detection/ppdet/base.h" + +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/utils/utils.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +PPDetBase::PPDetBase(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file), postprocessor_(preprocessor_.GetArch()) { + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; +} + +std::unique_ptr PPDetBase::Clone() const { + std::unique_ptr clone_model = + ultrainfer::utils::make_unique(PPDetBase(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool PPDetBase::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool PPDetBase::Predict(cv::Mat *im, DetectionResult *result) { + return Predict(*im, result); +} + +bool PPDetBase::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool PPDetBase::BatchPredict(const std::vector &imgs, + std::vector *results) { + std::vector fd_images = WrapMat(imgs); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + reused_input_tensors_[0].name = "image"; + reused_input_tensors_[1].name = "scale_factor"; + reused_input_tensors_[2].name = "im_shape"; + + if (NumInputsOfRuntime() == 1) { + auto scale_factor = static_cast(reused_input_tensors_[1].Data()); + postprocessor_.SetScaleFactor({scale_factor[0], scale_factor[1]}); + } + + // Some models don't need scale_factor and im_shape as input + while (reused_input_tensors_.size() != NumInputsOfRuntime()) { + reused_input_tensors_.pop_back(); + } + + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +bool PPDetBase::CheckArch() { + // Add "PicoDet" arch for backward compability with the + // old ppdet model, such as picodet from PaddleClas + // PP-ShiTuV2 pipeline. + std::vector archs = { + "SOLOv2", "YOLO", "SSD", "RetinaNet", "RCNN", "Face", + "GFL", "YOLOX", "YOLOv5", "YOLOv6", "YOLOv7", "RTMDet", + "FCOS", "TTFNet", "TOOD", "DETR", "PicoDet"}; + auto arch_ = preprocessor_.GetArch(); + for (auto item : archs) { + if (arch_ == item) { + return true; + } + } + FDWARNING << "Please set model arch," + << "support value : SOLOv2, YOLO, SSD, RetinaNet, " + << "RCNN, Face , GFL , RTMDet ," + << "FCOS , TTFNet , TOOD , DETR, PicoDet" << std::endl; + return false; +} + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h new file mode 100755 index 0000000000..57b0a210a3 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h @@ -0,0 +1,100 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/detection/ppdet/postprocessor.h" +#include "ultrainfer/vision/detection/ppdet/preprocessor.h" + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +/** \brief All object detection model APIs are defined inside this namespace + * + */ +namespace detection { + +/*! @brief Base model object used when to load a model exported by + * PaddleDetection + */ +class ULTRAINFER_DECL PPDetBase : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g ppyoloe/model.pdmodel + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g + * ppyoloe/infer_cfg.yml \param[in] custom_option RuntimeOption for inference, + * the default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + PPDetBase(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new PaddleDetModel with less memory usage when multiple + * instances of the same model are created + * + * \return new PaddleDetModel* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + virtual std::string ModelName() const { return "PaddleDetection/BaseModel"; } + + /** \brief DEPRECATED Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result); + + /** \brief Predict the detection result for an input image + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, DetectionResult *result); + + /** \brief Predict the detection result for an input image list + * \param[in] im The input image list, all the elements come from + * cv::imread(), is a 3-D array with layout HWC, BGR format \param[in] results + * The output detection result list \return true if the prediction successed, + * otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + PaddleDetPreprocessor &GetPreprocessor() { return preprocessor_; } + + PaddleDetPostprocessor &GetPostprocessor() { return postprocessor_; } + virtual bool CheckArch(); + +protected: + virtual bool Initialize(); + PaddleDetPreprocessor preprocessor_; + PaddleDetPostprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h new file mode 100755 index 0000000000..09c2001e9c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h @@ -0,0 +1,508 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/detection/ppdet/base.h" +#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h" +#include "ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +class ULTRAINFER_DECL PicoDet : public PPDetBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g picodet/model.pdmodel + * \param[in] params_file Path of parameter file, e.g picodet/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g + * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference, + * the default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + PicoDet(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_timvx_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PicoDet"; } +}; + +class ULTRAINFER_DECL SOLOv2 : public PPDetBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g picodet/model.pdmodel + * \param[in] params_file Path of parameter file, e.g picodet/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g + * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference, + * the default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + SOLOv2(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER, Backend::TRT}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "SOLOv2"; } +}; + +class ULTRAINFER_DECL PPYOLOE : public PPDetBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g ppyoloe/model.pdmodel + * \param[in] params_file Path of parameter file, e.g picodet/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g + * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference, + * the default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + PPYOLOE(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER, + Backend::LITE, Backend::TVM}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_timvx_backends = {Backend::LITE}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_horizon_backends = {Backend::HORIZONNPU}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PPYOLOE"; } +}; + +class ULTRAINFER_DECL PPYOLO : public PPDetBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g ppyolo/model.pdmodel + * \param[in] params_file Path of parameter file, e.g ppyolo/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g + * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference, + * the default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + PPYOLO(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/PP-YOLO"; } +}; + +class ULTRAINFER_DECL YOLOv3 : public PPDetBase { +public: + YOLOv3(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/YOLOv3"; } +}; + +class ULTRAINFER_DECL PaddleYOLOX : public PPDetBase { +public: + PaddleYOLOX(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/YOLOX"; } +}; + +class ULTRAINFER_DECL FasterRCNN : public PPDetBase { +public: + FasterRCNN(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER}; + valid_kunlunxin_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/FasterRCNN"; } +}; + +class ULTRAINFER_DECL MaskRCNN : public PPDetBase { +public: + MaskRCNN(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER}; + valid_kunlunxin_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/MaskRCNN"; } +}; + +class ULTRAINFER_DECL SSD : public PPDetBase { +public: + SSD(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/SSD"; } +}; + +class ULTRAINFER_DECL PaddleYOLOv5 : public PPDetBase { +public: + PaddleYOLOv5(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/YOLOv5"; } +}; + +class ULTRAINFER_DECL PaddleYOLOv6 : public PPDetBase { +public: + PaddleYOLOv6(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/YOLOv6"; } +}; + +class ULTRAINFER_DECL PaddleYOLOv7 : public PPDetBase { +public: + PaddleYOLOv7(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/YOLOv7"; } +}; + +class ULTRAINFER_DECL PaddleYOLOv8 : public PPDetBase { +public: + PaddleYOLOv8(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/YOLOv8"; } +}; + +class ULTRAINFER_DECL RTMDet : public PPDetBase { +public: + RTMDet(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/RTMDet"; } +}; + +class ULTRAINFER_DECL CascadeRCNN : public PPDetBase { +public: + CascadeRCNN(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { + return "PaddleDetection/CascadeRCNN"; + } +}; + +class ULTRAINFER_DECL PSSDet : public PPDetBase { +public: + PSSDet(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/PSSDet"; } +}; + +class ULTRAINFER_DECL RetinaNet : public PPDetBase { +public: + RetinaNet(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/RetinaNet"; } +}; + +class ULTRAINFER_DECL PPYOLOESOD : public PPDetBase { +public: + PPYOLOESOD(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/PPYOLOESOD"; } +}; + +class ULTRAINFER_DECL FCOS : public PPDetBase { +public: + FCOS(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/FCOS"; } +}; + +class ULTRAINFER_DECL TTFNet : public PPDetBase { +public: + TTFNet(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/TTFNet"; } +}; + +class ULTRAINFER_DECL TOOD : public PPDetBase { +public: + TOOD(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/TOOD"; } +}; + +class ULTRAINFER_DECL GFL : public PPDetBase { +public: + GFL(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/GFL"; } +}; + +class ULTRAINFER_DECL PaddleDetectionModel : public PPDetBase { +public: + PaddleDetectionModel(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + CheckArch(); + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_timvx_backends = {Backend::LITE}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetectionModel"; } +}; + +class ULTRAINFER_DECL PPYOLOER : public PPDetBase { +public: + PPYOLOER(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER, Backend::OPENVINO, Backend::ORT, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_timvx_backends = {Backend::LITE}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PPYOLOER"; } +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc new file mode 100755 index 0000000000..932049b7f5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc @@ -0,0 +1,227 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h" +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include + +namespace ultrainfer { +namespace vision { +namespace detection { +template +bool SortScorePairDescend(const std::pair &pair1, + const std::pair &pair2) { + return pair1.first > pair2.first; +} + +void GetMaxScoreIndex(const float *scores, const int &score_size, + const float &threshold, const int &top_k, + std::vector> *sorted_indices) { + for (size_t i = 0; i < score_size; ++i) { + if (scores[i] > threshold) { + sorted_indices->push_back(std::make_pair(scores[i], i)); + } + } + // Sort the score pair according to the scores in descending order + std::stable_sort(sorted_indices->begin(), sorted_indices->end(), + SortScorePairDescend); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { + sorted_indices->resize(top_k); + } +} + +float BBoxArea(const float *box, const bool &normalized) { + if (box[2] < box[0] || box[3] < box[1]) { + // If coordinate values are is invalid + // (e.g. xmax < xmin or ymax < ymin), return 0. + return 0.f; + } else { + const float w = box[2] - box[0]; + const float h = box[3] - box[1]; + if (normalized) { + return w * h; + } else { + // If coordinate values are not within range [0, 1]. + return (w + 1) * (h + 1); + } + } +} + +float JaccardOverlap(const float *box1, const float *box2, + const bool &normalized) { + if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || + box2[3] < box1[1]) { + return 0.f; + } else { + const float inter_xmin = std::max(box1[0], box2[0]); + const float inter_ymin = std::max(box1[1], box2[1]); + const float inter_xmax = std::min(box1[2], box2[2]); + const float inter_ymax = std::min(box1[3], box2[3]); + float norm = normalized ? 0.0f : 1.0f; + float inter_w = inter_xmax - inter_xmin + norm; + float inter_h = inter_ymax - inter_ymin + norm; + const float inter_area = inter_w * inter_h; + const float bbox1_area = BBoxArea(box1, normalized); + const float bbox2_area = BBoxArea(box2, normalized); + return inter_area / (bbox1_area + bbox2_area - inter_area); + } +} + +void PaddleMultiClassNMS::FastNMS(const float *boxes, const float *scores, + const int &num_boxes, + std::vector *keep_indices) { + std::vector> sorted_indices; + GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k, + &sorted_indices); + + float adaptive_threshold = nms_threshold; + while (sorted_indices.size() != 0) { + const int idx = sorted_indices.front().second; + bool keep = true; + for (size_t k = 0; k < keep_indices->size(); ++k) { + if (!keep) { + break; + } + const int kept_idx = (*keep_indices)[k]; + float overlap = + JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized); + keep = overlap <= adaptive_threshold; + } + if (keep) { + keep_indices->push_back(idx); + } + sorted_indices.erase(sorted_indices.begin()); + if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) { + adaptive_threshold *= nms_eta; + } + } +} + +int PaddleMultiClassNMS::NMSForEachSample( + const float *boxes, const float *scores, int num_boxes, int num_classes, + std::map> *keep_indices) { + for (int i = 0; i < num_classes; ++i) { + if (i == background_label) { + continue; + } + const float *score_for_class_i = scores + i * num_boxes; + FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i])); + } + int num_det = 0; + for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) { + num_det += iter->second.size(); + } + + if (keep_top_k > -1 && num_det > keep_top_k) { + std::vector>> score_index_pairs; + for (const auto &it : *keep_indices) { + int label = it.first; + const float *current_score = scores + label * num_boxes; + auto &label_indices = it.second; + for (size_t j = 0; j < label_indices.size(); ++j) { + int idx = label_indices[j]; + score_index_pairs.push_back( + std::make_pair(current_score[idx], std::make_pair(label, idx))); + } + } + std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), + SortScorePairDescend>); + score_index_pairs.resize(keep_top_k); + + std::map> new_indices; + for (size_t j = 0; j < score_index_pairs.size(); ++j) { + int label = score_index_pairs[j].second.first; + int idx = score_index_pairs[j].second.second; + new_indices[label].push_back(idx); + } + new_indices.swap(*keep_indices); + num_det = keep_top_k; + } + return num_det; +} + +void PaddleMultiClassNMS::Compute(const float *boxes_data, + const float *scores_data, + const std::vector &boxes_dim, + const std::vector &scores_dim) { + int score_size = scores_dim.size(); + + int64_t batch_size = scores_dim[0]; + int64_t box_dim = boxes_dim[2]; + int64_t out_dim = box_dim + 2; + + int num_nmsed_out = 0; + FDASSERT(score_size == 3, + "Require rank of input scores be 3, but now it's %d.", score_size); + FDASSERT(boxes_dim[2] == 4, + "Require the 3-dimension of input boxes be 4, but now it's %lld.", + box_dim); + out_num_rois_data.resize(batch_size); + + std::vector>> all_indices; + for (size_t i = 0; i < batch_size; ++i) { + std::map> indices; // indices kept for each class + const float *current_boxes_ptr = + boxes_data + i * boxes_dim[1] * boxes_dim[2]; + const float *current_scores_ptr = + scores_data + i * scores_dim[1] * scores_dim[2]; + int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr, + boxes_dim[1], scores_dim[1], &indices); + num_nmsed_out += num; + out_num_rois_data[i] = num; + all_indices.emplace_back(indices); + } + std::vector out_box_dims = {num_nmsed_out, 6}; + std::vector out_index_dims = {num_nmsed_out, 1}; + if (num_nmsed_out == 0) { + for (size_t i = 0; i < batch_size; ++i) { + out_num_rois_data[i] = 0; + } + return; + } + out_box_data.resize(num_nmsed_out * 6); + out_index_data.resize(num_nmsed_out); + + int count = 0; + for (size_t i = 0; i < batch_size; ++i) { + const float *current_boxes_ptr = + boxes_data + i * boxes_dim[1] * boxes_dim[2]; + const float *current_scores_ptr = + scores_data + i * scores_dim[1] * scores_dim[2]; + for (const auto &it : all_indices[i]) { + int label = it.first; + const auto &indices = it.second; + const float *current_scores_class_ptr = + current_scores_ptr + label * scores_dim[2]; + for (size_t j = 0; j < indices.size(); ++j) { + int start = count * 6; + out_box_data[start] = label; + out_box_data[start + 1] = current_scores_class_ptr[indices[j]]; + + out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4]; + out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1]; + out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2]; + + out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3]; + out_index_data[count] = i * boxes_dim[1] + indices[j]; + count += 1; + } + } + } +} +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h new file mode 100755 index 0000000000..392cf15325 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +namespace ultrainfer { +namespace vision { +namespace detection { +/** \brief Config for PaddleMultiClassNMS + * \param[in] background_label the value of background label + * \param[in] keep_top_k the value of keep_top_k + * \param[in] nms_eta the value of nms_eta + * \param[in] nms_threshold a dict that contains the arguments of nms operations + * \param[in] nms_top_k if there are more than max_num bboxes after NMS, only + * top max_num will be kept. \param[in] normalized Determine whether normalized + * is required \param[in] score_threshold bbox threshold, bboxes with scores + * lower than it will not be considered. + */ +struct NMSOption { + NMSOption() = default; + int64_t background_label = -1; + int64_t keep_top_k = 100; + float nms_eta = 1.0; + float nms_threshold = 0.5; + int64_t nms_top_k = 1000; + bool normalized = true; + float score_threshold = 0.3; +}; + +struct PaddleMultiClassNMS { + int64_t background_label = -1; + int64_t keep_top_k = -1; + float nms_eta; + float nms_threshold = 0.7; + int64_t nms_top_k; + bool normalized; + float score_threshold; + + std::vector out_num_rois_data; + std::vector out_index_data; + std::vector out_box_data; + void FastNMS(const float *boxes, const float *scores, const int &num_boxes, + std::vector *keep_indices); + int NMSForEachSample(const float *boxes, const float *scores, int num_boxes, + int num_classes, + std::map> *keep_indices); + void Compute(const float *boxes, const float *scores, + const std::vector &boxes_dim, + const std::vector &scores_dim); + + void SetNMSOption(const struct NMSOption &nms_option) { + background_label = nms_option.background_label; + keep_top_k = nms_option.keep_top_k; + nms_eta = nms_option.nms_eta; + nms_threshold = nms_option.nms_threshold; + nms_top_k = nms_option.nms_top_k; + normalized = nms_option.normalized; + score_threshold = nms_option.score_threshold; + } +}; +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc new file mode 100755 index 0000000000..f9bc1fd275 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc @@ -0,0 +1,468 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h" + +#include +#include +#include +#include + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +template struct RotatedBox { T x_ctr, y_ctr, w, h, a; }; + +template struct Point { + T x, y; + Point(const T &px = 0, const T &py = 0) : x(px), y(py) {} + Point operator+(const Point &p) const { return Point(x + p.x, y + p.y); } + Point &operator+=(const Point &p) { + x += p.x; + y += p.y; + return *this; + } + Point operator-(const Point &p) const { return Point(x - p.x, y - p.y); } + Point operator*(const T coeff) const { return Point(x * coeff, y * coeff); } +}; + +template T Dot2D(const Point &A, const Point &B) { + return A.x * B.x + A.y * B.y; +} + +template T Cross2D(const Point &A, const Point &B) { + return A.x * B.y - B.x * A.y; +} + +template +int GetIntersectionPoints(const Point (&pts1)[4], const Point (&pts2)[4], + Point (&intersections)[24]) { + // Line vector + // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] + Point vec1[4], vec2[4]; + for (int i = 0; i < 4; i++) { + vec1[i] = pts1[(i + 1) % 4] - pts1[i]; + vec2[i] = pts2[(i + 1) % 4] - pts2[i]; + } + + // Line test - test all line combos for intersection + int num = 0; // number of intersections + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + // Solve for 2x2 Ax=b + T det = Cross2D(vec2[j], vec1[i]); + + // This takes care of parallel lines + if (fabs(det) <= 1e-14) { + continue; + } + + auto vec12 = pts2[j] - pts1[i]; + + T t1 = Cross2D(vec2[j], vec12) / det; + T t2 = Cross2D(vec1[i], vec12) / det; + + if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) { + intersections[num++] = pts1[i] + vec1[i] * t1; + } + } + } + + // Check for vertices of rect1 inside rect2 + { + const auto &AB = vec2[0]; + const auto &DA = vec2[3]; + auto ABdotAB = Dot2D(AB, AB); + auto ADdotAD = Dot2D(DA, DA); + for (int i = 0; i < 4; i++) { + // assume ABCD is the rectangle, and P is the point to be judged + // P is inside ABCD iff. P's projection on AB lies within AB + // and P's projection on AD lies within AD + + auto AP = pts1[i] - pts2[0]; + + auto APdotAB = Dot2D(AP, AB); + auto APdotAD = -Dot2D(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && + (APdotAD <= ADdotAD)) { + intersections[num++] = pts1[i]; + } + } + } + + // Reverse the check - check for vertices of rect2 inside rect1 + { + const auto &AB = vec1[0]; + const auto &DA = vec1[3]; + auto ABdotAB = Dot2D(AB, AB); + auto ADdotAD = Dot2D(DA, DA); + for (int i = 0; i < 4; i++) { + auto AP = pts2[i] - pts1[0]; + + auto APdotAB = Dot2D(AP, AB); + auto APdotAD = -Dot2D(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && + (APdotAD <= ADdotAD)) { + intersections[num++] = pts2[i]; + } + } + } + + return num; +} + +template +int ConvexHullGraham(const Point (&p)[24], const int &num_in, + Point (&q)[24], bool shift_to_zero = false) { + assert(num_in >= 2); + + // Step 1: + // Find point with minimum y + // if more than 1 points have the same minimum y, + // pick the one with the minimum x. + int t = 0; + for (int i = 1; i < num_in; i++) { + if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { + t = i; + } + } + auto &start = p[t]; // starting point + + // Step 2: + // Subtract starting point from every points (for sorting in the next step) + for (int i = 0; i < num_in; i++) { + q[i] = p[i] - start; + } + + // Swap the starting point to position 0 + auto tmp = q[0]; + q[0] = q[t]; + q[t] = tmp; + + // Step 3: + // Sort point 1 ~ num_in according to their relative cross-product values + // (essentially sorting according to angles) + // If the angles are the same, sort according to their distance to origin + T dist[24]; + for (int i = 0; i < num_in; i++) { + dist[i] = Dot2D(q[i], q[i]); + } + + // CPU version + std::sort(q + 1, q + num_in, + [](const Point &A, const Point &B) -> bool { + T temp = Cross2D(A, B); + if (fabs(temp) < 1e-6) { + return Dot2D(A, A) < Dot2D(B, B); + } else { + return temp > 0; + } + }); + + // Step 4: + // Make sure there are at least 2 points (that don't overlap with each other) + // in the stack + int k; // index of the non-overlapped second point + for (k = 1; k < num_in; k++) { + if (dist[k] > 1e-8) { + break; + } + } + if (k == num_in) { + // We reach the end, which means the convex hull is just one point + q[0] = p[t]; + return 1; + } + q[1] = q[k]; + int m = 2; // 2 points in the stack + // Step 5: + // Finally we can start the scanning process. + // When a non-convex relationship between the 3 points is found + // (either concave shape or duplicated points), + // we pop the previous point from the stack + // until the 3-point relationship is convex again, or + // until the stack only contains two points + for (int i = k + 1; i < num_in; i++) { + while (m > 1 && Cross2D(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) { + m--; + } + q[m++] = q[i]; + } + + // Step 6 (Optional): + // In general sense we need the original coordinates, so we + // need to shift the points back (reverting Step 2) + // But if we're only interested in getting the area/perimeter of the shape + // We can simply return. + if (!shift_to_zero) { + for (int i = 0; i < m; i++) { + q[i] += start; + } + } + + return m; +} + +template T PolygonArea(const Point (&q)[24], const int &m) { + if (m <= 2) { + return 0; + } + + T area = 0; + for (int i = 1; i < m - 1; i++) { + area += fabs(Cross2D(q[i] - q[0], q[i + 1] - q[0])); + } + + return area / 2.0; +} + +template +T RboxesIntersection(T const *const poly1_raw, T const *const poly2_raw) { + // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned + // from rotated_rect_intersection_pts + Point intersectPts[24], orderedPts[24]; + + Point pts1[4]; + + Point pts2[4]; + for (int i = 0; i < 4; i++) { + pts1[i] = Point(poly1_raw[2 * i], poly1_raw[2 * i + 1]); + pts2[i] = Point(poly2_raw[2 * i], poly2_raw[2 * i + 1]); + } + + int num = GetIntersectionPoints(pts1, pts2, intersectPts); + if (num <= 2) { + return 0.0; + } + + // Convex Hull to order the intersection points in clockwise order and find + // the contour area. + int num_convex = ConvexHullGraham(intersectPts, num, orderedPts, true); + return PolygonArea(orderedPts, num_convex); +} + +template T PolyArea(T const *const poly_raw) { + T area = 0.0; + int j = 3; + for (int i = 0; i < 4; i++) { + // area += (x[j] + x[i]) * (y[j] - y[i]); + area += (poly_raw[2 * j] + poly_raw[2 * i]) * + (poly_raw[2 * j + 1] - poly_raw[2 * i + 1]); + j = i; + } + // return static_cast(abs(static_cast(area) / 2.0)); + return std::abs(area / 2.0); +} + +template +void Poly2Rbox(T const *const poly_raw, RotatedBox &box) { + std::vector contour_poly{ + cv::Point2f(poly_raw[0], poly_raw[1]), + cv::Point2f(poly_raw[2], poly_raw[3]), + cv::Point2f(poly_raw[4], poly_raw[5]), + cv::Point2f(poly_raw[6], poly_raw[7]), + }; + cv::RotatedRect rotate_rect = cv::minAreaRect(contour_poly); + box.x_ctr = rotate_rect.center.x; + box.y_ctr = rotate_rect.center.y; + box.w = rotate_rect.size.width; + box.h = rotate_rect.size.height; + box.a = rotate_rect.angle; +} + +template +T RboxIouSingle(T const *const poly1_raw, T const *const poly2_raw) { + const T area1 = PolyArea(poly1_raw); + const T area2 = PolyArea(poly2_raw); + + const T intersection = RboxesIntersection(poly1_raw, poly2_raw); + const T iou = intersection / (area1 + area2 - intersection); + return iou; +} + +template +bool SortScorePairDescendRotated(const std::pair &pair1, + const std::pair &pair2) { + return pair1.first > pair2.first; +} + +void GetMaxScoreIndexRotated( + const float *scores, const int &score_size, const float &threshold, + const int &top_k, std::vector> *sorted_indices) { + for (size_t i = 0; i < score_size; ++i) { + if (scores[i] > threshold) { + sorted_indices->push_back(std::make_pair(scores[i], i)); + } + } + // Sort the score pair according to the scores in descending order + std::stable_sort(sorted_indices->begin(), sorted_indices->end(), + SortScorePairDescendRotated); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { + sorted_indices->resize(top_k); + } +} + +void PaddleMultiClassNMSRotated::FastNMSRotated( + const float *boxes, const float *scores, const int &num_boxes, + std::vector *keep_indices) { + std::vector> sorted_indices; + GetMaxScoreIndexRotated(scores, num_boxes, score_threshold, nms_top_k, + &sorted_indices); + // printf("nms thrd: %f, sort dim: %d\n", nms_threshold, + // int(sorted_indices.size())); + float adaptive_threshold = nms_threshold; + while (sorted_indices.size() != 0) { + const int idx = sorted_indices.front().second; + bool keep = true; + for (size_t k = 0; k < keep_indices->size(); ++k) { + if (!keep) { + break; + } + const int kept_idx = (*keep_indices)[k]; + float overlap = + RboxIouSingle(boxes + idx * 8, boxes + kept_idx * 8); + + keep = overlap <= adaptive_threshold; + } + if (keep) { + keep_indices->push_back(idx); + } + sorted_indices.erase(sorted_indices.begin()); + if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) { + adaptive_threshold *= nms_eta; + } + } +} + +int PaddleMultiClassNMSRotated::NMSRotatedForEachSample( + const float *boxes, const float *scores, int num_boxes, int num_classes, + std::map> *keep_indices) { + for (int i = 0; i < num_classes; ++i) { + if (i == background_label) { + continue; + } + const float *score_for_class_i = scores + i * num_boxes; + FastNMSRotated(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i])); + } + int num_det = 0; + for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) { + num_det += iter->second.size(); + } + + if (keep_top_k > -1 && num_det > keep_top_k) { + std::vector>> score_index_pairs; + for (const auto &it : *keep_indices) { + int label = it.first; + const float *current_score = scores + label * num_boxes; + auto &label_indices = it.second; + for (size_t j = 0; j < label_indices.size(); ++j) { + int idx = label_indices[j]; + score_index_pairs.push_back( + std::make_pair(current_score[idx], std::make_pair(label, idx))); + } + } + + std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), + SortScorePairDescendRotated>); + score_index_pairs.resize(keep_top_k); + + std::map> new_indices; + for (size_t j = 0; j < score_index_pairs.size(); ++j) { + int label = score_index_pairs[j].second.first; + int idx = score_index_pairs[j].second.second; + new_indices[label].push_back(idx); + } + new_indices.swap(*keep_indices); + num_det = keep_top_k; + } + return num_det; +} + +void PaddleMultiClassNMSRotated::Compute( + const float *boxes_data, const float *scores_data, + const std::vector &boxes_dim, + const std::vector &scores_dim) { + int score_size = scores_dim.size(); + + int64_t batch_size = scores_dim[0]; + int64_t box_dim = boxes_dim[2]; + int64_t out_dim = box_dim + 2; + + int num_nmsed_out = 0; + FDASSERT(score_size == 3, + "Require rank of input scores be 3, but now it's %d.", score_size); + FDASSERT(boxes_dim[2] == 8, + "Require the 3-dimension of input boxes be 8, but now it's %lld.", + box_dim); + out_num_rois_data.resize(batch_size); + + std::vector>> all_indices; + for (size_t i = 0; i < batch_size; ++i) { + std::map> indices; // indices kept for each class + const float *current_boxes_ptr = + boxes_data + i * boxes_dim[1] * boxes_dim[2]; + const float *current_scores_ptr = + scores_data + i * scores_dim[1] * scores_dim[2]; + int num = NMSRotatedForEachSample(current_boxes_ptr, current_scores_ptr, + boxes_dim[1], scores_dim[1], &indices); + num_nmsed_out += num; + out_num_rois_data[i] = num; + all_indices.emplace_back(indices); + } + std::vector out_box_dims = {num_nmsed_out, 10}; + std::vector out_index_dims = {num_nmsed_out, 1}; + if (num_nmsed_out == 0) { + for (size_t i = 0; i < batch_size; ++i) { + out_num_rois_data[i] = 0; + } + return; + } + out_box_data.resize(num_nmsed_out * 10); + out_index_data.resize(num_nmsed_out); + + int count = 0; + for (size_t i = 0; i < batch_size; ++i) { + const float *current_boxes_ptr = + boxes_data + i * boxes_dim[1] * boxes_dim[2]; + const float *current_scores_ptr = + scores_data + i * scores_dim[1] * scores_dim[2]; + for (const auto &it : all_indices[i]) { + int label = it.first; + const auto &indices = it.second; + const float *current_scores_class_ptr = + current_scores_ptr + label * scores_dim[2]; + for (size_t j = 0; j < indices.size(); ++j) { + int start = count * 10; + out_box_data[start] = label; + out_box_data[start + 1] = current_scores_class_ptr[indices[j]]; + for (int k = 0; k < 8; k++) { + out_box_data[start + 2 + k] = current_boxes_ptr[indices[j] * 8 + k]; + } + out_index_data[count] = i * boxes_dim[1] + indices[j]; + count += 1; + } + } + } +} +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h new file mode 100755 index 0000000000..279276333b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +namespace ultrainfer { +namespace vision { +namespace detection { +/** \brief Config for PaddleMultiClassNMSRotated + * \param[in] background_label the value of background label + * \param[in] keep_top_k the value of keep_top_k + * \param[in] nms_eta the value of nms_eta + * \param[in] nms_threshold a dict that contains the arguments of nms operations + * \param[in] nms_top_k if there are more than max_num bboxes after NMS, only + * top max_num will be kept. \param[in] normalized Determine whether normalized + * is required \param[in] score_threshold bbox threshold, bboxes with scores + * lower than it will not be considered. + */ +struct NMSRotatedOption { + NMSRotatedOption() = default; + int64_t background_label = -1; + int64_t keep_top_k = -1; + float nms_eta = 1.0; + float nms_threshold = 0.1; + int64_t nms_top_k = 2000; + bool normalized = false; + float score_threshold = 0.1; +}; + +struct PaddleMultiClassNMSRotated { + int64_t background_label = -1; + int64_t keep_top_k = -1; + float nms_eta; + float nms_threshold = 0.1; + int64_t nms_top_k; + bool normalized; + float score_threshold; + + std::vector out_num_rois_data; + std::vector out_index_data; + std::vector out_box_data; + void FastNMSRotated(const float *boxes, const float *scores, + const int &num_boxes, std::vector *keep_indices); + int NMSRotatedForEachSample(const float *boxes, const float *scores, + int num_boxes, int num_classes, + std::map> *keep_indices); + void Compute(const float *ploy_boxes, const float *scores, + const std::vector &boxes_dim, + const std::vector &scores_dim); + + void SetNMSRotatedOption(const struct NMSRotatedOption &nms_rotated_option) { + background_label = nms_rotated_option.background_label; + keep_top_k = nms_rotated_option.keep_top_k; + nms_eta = nms_rotated_option.nms_eta; + nms_threshold = nms_rotated_option.nms_threshold; + nms_top_k = nms_rotated_option.nms_top_k; + normalized = nms_rotated_option.normalized; + score_threshold = nms_rotated_option.score_threshold; + } +}; +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc new file mode 100755 index 0000000000..e0b58d5da4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc @@ -0,0 +1,362 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/ppdet/postprocessor.h" + +#include "ultrainfer/vision/utils/utils.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +bool PaddleDetPostprocessor::ProcessMask( + const FDTensor &tensor, std::vector *results) { + auto shape = tensor.Shape(); + int64_t out_mask_w = shape[2]; + int64_t out_mask_numel = shape[1] * shape[2]; + const auto *data = reinterpret_cast(tensor.CpuData()); + int index = 0; + + for (int i = 0; i < results->size(); ++i) { + (*results)[i].contain_masks = true; + (*results)[i].masks.resize((*results)[i].boxes.size()); + for (int j = 0; j < (*results)[i].boxes.size(); ++j) { + int x1 = static_cast(round((*results)[i].boxes[j][0])); + int y1 = static_cast(round((*results)[i].boxes[j][1])); + int x2 = static_cast(round((*results)[i].boxes[j][2])); + int y2 = static_cast(round((*results)[i].boxes[j][3])); + int keep_mask_h = y2 - y1; + int keep_mask_w = x2 - x1; + int keep_mask_numel = keep_mask_h * keep_mask_w; + (*results)[i].masks[j].Resize(keep_mask_numel); + (*results)[i].masks[j].shape = {keep_mask_h, keep_mask_w}; + const uint32_t *current_ptr = data + index * out_mask_numel; + + auto *keep_mask_ptr = + reinterpret_cast((*results)[i].masks[j].Data()); + for (int row = y1; row < y2; ++row) { + size_t keep_nbytes_in_col = keep_mask_w * sizeof(uint32_t); + const uint32_t *out_row_start_ptr = current_ptr + row * out_mask_w + x1; + uint32_t *keep_row_start_ptr = keep_mask_ptr + (row - y1) * keep_mask_w; + std::memcpy(keep_row_start_ptr, out_row_start_ptr, keep_nbytes_in_col); + } + index += 1; + } + } + return true; +} + +bool PaddleDetPostprocessor::ProcessWithNMS( + const std::vector &tensors, + std::vector *results) { + // Get number of boxes for each input image + std::vector num_boxes(tensors[1].shape[0]); + int total_num_boxes = 0; + if (tensors[1].dtype == FDDataType::INT32) { + const auto *data = static_cast(tensors[1].CpuData()); + for (size_t i = 0; i < tensors[1].shape[0]; ++i) { + num_boxes[i] = static_cast(data[i]); + total_num_boxes += num_boxes[i]; + } + } else if (tensors[1].dtype == FDDataType::INT64) { + const auto *data = static_cast(tensors[1].CpuData()); + for (size_t i = 0; i < tensors[1].shape[0]; ++i) { + num_boxes[i] = static_cast(data[i]); + total_num_boxes += num_boxes[i]; + } + } + + // Special case for TensorRT, it has fixed output shape of NMS + // So there's invalid boxes in its' output boxes + int num_output_boxes = static_cast(tensors[0].Shape()[0]); + bool contain_invalid_boxes = false; + if (total_num_boxes != num_output_boxes) { + if (num_output_boxes % num_boxes.size() == 0) { + contain_invalid_boxes = true; + } else { + FDERROR << "Cannot handle the output data for this model, unexpected " + "situation." + << std::endl; + return false; + } + } + + // Get boxes for each input image + results->resize(num_boxes.size()); + + if (tensors[0].shape[0] == 0) { + // No detected boxes + return true; + } + + const auto *box_data = static_cast(tensors[0].CpuData()); + int offset = 0; + for (size_t i = 0; i < num_boxes.size(); ++i) { + const float *ptr = box_data + offset; + (*results)[i].Reserve(num_boxes[i]); + for (size_t j = 0; j < num_boxes[i]; ++j) { + (*results)[i].label_ids.push_back( + static_cast(round(ptr[j * 6]))); + (*results)[i].scores.push_back(ptr[j * 6 + 1]); + (*results)[i].boxes.emplace_back(std::array( + {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]})); + } + if (contain_invalid_boxes) { + offset += static_cast(num_output_boxes * 6 / num_boxes.size()); + } else { + offset += static_cast(num_boxes[i] * 6); + } + } + return true; +} + +bool PaddleDetPostprocessor::ProcessWithoutNMS( + const std::vector &tensors, + std::vector *results) { + int boxes_index = 0; + int scores_index = 1; + + // Judge the index of the input Tensor + if (tensors[0].shape[1] == tensors[1].shape[2]) { + boxes_index = 0; + scores_index = 1; + } else if (tensors[0].shape[2] == tensors[1].shape[1]) { + boxes_index = 1; + scores_index = 0; + } else { + FDERROR << "The shape of boxes and scores should be [batch, boxes_num, " + "4], [batch, classes_num, boxes_num]" + << std::endl; + return false; + } + + // do multi class nms + multi_class_nms_.Compute( + static_cast(tensors[boxes_index].Data()), + static_cast(tensors[scores_index].Data()), + tensors[boxes_index].shape, tensors[scores_index].shape); + auto num_boxes = multi_class_nms_.out_num_rois_data; + auto box_data = + static_cast(multi_class_nms_.out_box_data.data()); + + // Get boxes for each input image + results->resize(num_boxes.size()); + int offset = 0; + for (size_t i = 0; i < num_boxes.size(); ++i) { + const float *ptr = box_data + offset; + (*results)[i].Reserve(num_boxes[i]); + for (size_t j = 0; j < num_boxes[i]; ++j) { + (*results)[i].label_ids.push_back( + static_cast(round(ptr[j * 6]))); + (*results)[i].scores.push_back(ptr[j * 6 + 1]); + (*results)[i].boxes.emplace_back(std::array( + {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]})); + } + offset += (num_boxes[i] * 6); + } + + // do scale + if (GetScaleFactor()[0] != 0) { + for (auto &result : *results) { + for (auto &box : result.boxes) { + box[0] /= GetScaleFactor()[1]; + box[1] /= GetScaleFactor()[0]; + box[2] /= GetScaleFactor()[1]; + box[3] /= GetScaleFactor()[0]; + } + } + } + return true; +} + +bool PaddleDetPostprocessor::ProcessSolov2( + const std::vector &tensors, + std::vector *results) { + if (tensors.size() != 4) { + FDERROR << "The size of tensors for solov2 must be 4." << std::endl; + return false; + } + + if (tensors[0].shape[0] != 1) { + FDERROR << "SOLOv2 temporarily only supports batch size is 1." << std::endl; + return false; + } + + results->clear(); + results->resize(1); + + (*results)[0].contain_masks = true; + + // tensor[0] means bbox data + const auto bbox_data = static_cast(tensors[0].CpuData()); + // tensor[1] means label data + const auto label_data_ = static_cast(tensors[1].CpuData()); + // tensor[2] means score data + const auto score_data_ = static_cast(tensors[2].CpuData()); + // tensor[3] is mask data and its shape is the same as that of the image. + const auto mask_data_ = static_cast(tensors[3].CpuData()); + + int rows = static_cast(tensors[3].shape[1]); + int cols = static_cast(tensors[3].shape[2]); + for (int bbox_id = 0; bbox_id < bbox_data[0]; ++bbox_id) { + if (score_data_[bbox_id] >= multi_class_nms_.score_threshold) { + DetectionResult &result_item = (*results)[0]; + result_item.label_ids.emplace_back(label_data_[bbox_id]); + result_item.scores.emplace_back(score_data_[bbox_id]); + + std::vector global_mask; + + for (int k = 0; k < rows * cols; ++k) { + global_mask.push_back( + static_cast(mask_data_[k + bbox_id * rows * cols])); + } + + // find minimize bounding box from mask + cv::Mat mask(rows, cols, CV_32SC1); + + std::memcpy(mask.data, global_mask.data(), + global_mask.size() * sizeof(int)); + + cv::Mat mask_fp; + mask.convertTo(mask_fp, CV_32FC1); + + cv::Mat rowSum; + cv::Mat colSum; + std::vector sum_of_row(rows); + std::vector sum_of_col(cols); + cv::reduce(mask_fp, colSum, 0, cv::REDUCE_SUM, CV_32FC1); + cv::reduce(mask_fp, rowSum, 1, cv::REDUCE_SUM, CV_32FC1); + + for (int row_id = 0; row_id < rows; ++row_id) { + sum_of_row[row_id] = rowSum.at(row_id, 0); + } + for (int col_id = 0; col_id < cols; ++col_id) { + sum_of_col[col_id] = colSum.at(0, col_id); + } + + auto it = std::find_if(sum_of_row.begin(), sum_of_row.end(), + [](int x) { return x > 0.5; }); + float y1 = std::distance(sum_of_row.begin(), it); + auto it2 = std::find_if(sum_of_col.begin(), sum_of_col.end(), + [](int x) { return x > 0.5; }); + float x1 = std::distance(sum_of_col.begin(), it2); + auto rit = std::find_if(sum_of_row.rbegin(), sum_of_row.rend(), + [](int x) { return x > 0.5; }); + float y2 = std::distance(rit, sum_of_row.rend()); + auto rit2 = std::find_if(sum_of_col.rbegin(), sum_of_col.rend(), + [](int x) { return x > 0.5; }); + float x2 = std::distance(rit2, sum_of_col.rend()); + result_item.boxes.emplace_back(std::array({x1, y1, x2, y2})); + } + } + return true; +} + +bool PaddleDetPostprocessor::ProcessPPYOLOER( + const std::vector &tensors, + std::vector *results) { + if (tensors.size() != 2) { + FDERROR << "The size of tensors for PPYOLOER must be 2." << std::endl; + return false; + } + + int boxes_index = 0; + int scores_index = 1; + multi_class_nms_rotated_.Compute( + static_cast(tensors[boxes_index].Data()), + static_cast(tensors[scores_index].Data()), + tensors[boxes_index].shape, tensors[scores_index].shape); + auto num_boxes = multi_class_nms_rotated_.out_num_rois_data; + auto box_data = + static_cast(multi_class_nms_rotated_.out_box_data.data()); + + // Get boxes for each input image + results->resize(num_boxes.size()); + int offset = 0; + for (size_t i = 0; i < num_boxes.size(); ++i) { + const float *ptr = box_data + offset; + (*results)[i].Reserve(num_boxes[i]); + for (size_t j = 0; j < num_boxes[i]; ++j) { + (*results)[i].label_ids.push_back( + static_cast(round(ptr[j * 10]))); + (*results)[i].scores.push_back(ptr[j * 10 + 1]); + (*results)[i].rotated_boxes.push_back(std::array( + {ptr[j * 10 + 2], ptr[j * 10 + 3], ptr[j * 10 + 4], ptr[j * 10 + 5], + ptr[j * 10 + 6], ptr[j * 10 + 7], ptr[j * 10 + 8], + ptr[j * 10 + 9]})); + } + offset += (num_boxes[i] * 10); + } + + // do scale + if (GetScaleFactor()[0] != 0) { + for (auto &result : *results) { + for (int i = 0; i < result.rotated_boxes.size(); i++) { + for (int j = 0; j < 8; j++) { + auto scale = i % 2 == 0 ? GetScaleFactor()[1] : GetScaleFactor()[0]; + result.rotated_boxes[i][j] /= float(scale); + } + } + } + } + + return true; +} + +bool PaddleDetPostprocessor::Run(const std::vector &tensors, + std::vector *results) { + if (arch_ == "SOLOv2") { + // process for SOLOv2 + ProcessSolov2(tensors, results); + // The fourth output of solov2 is mask + return ProcessMask(tensors[3], results); + } else { + if (tensors[0].Shape().size() == 3 && + tensors[0].Shape()[2] == 8) { // PPYOLOER + return ProcessPPYOLOER(tensors, results); + } + + // Do process according to whether NMS exists. + if (with_nms_) { + if (!ProcessWithNMS(tensors, results)) { + return false; + } + } else { + if (!ProcessWithoutNMS(tensors, results)) { + return false; + } + } + + // for only detection + if (tensors.size() <= 2) { + return true; + } + + // for maskrcnn + if (tensors[2].Shape()[0] != tensors[0].Shape()[0]) { + FDERROR << "The first dimension of output mask tensor:" + << tensors[2].Shape()[0] + << " is not equal to the first dimension of output boxes tensor:" + << tensors[0].Shape()[0] << "." << std::endl; + return false; + } + + // The third output of mask-rcnn is mask + return ProcessMask(tensors[2], results); + } +} +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h new file mode 100755 index 0000000000..fb1d538d41 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h @@ -0,0 +1,117 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h" +#include "ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h" + +namespace ultrainfer { +namespace vision { +namespace detection { +/*! @brief Postprocessor object for PaddleDet serials model. + */ +class ULTRAINFER_DECL PaddleDetPostprocessor { +public: + PaddleDetPostprocessor() { + // There may be no NMS config in the yaml file, + // so we need to give a initial value to multi_class_nms_. + multi_class_nms_.SetNMSOption(NMSOption()); + multi_class_nms_rotated_.SetNMSRotatedOption(NMSRotatedOption()); + } + + /** \brief Create a preprocessor instance for PaddleDet serials model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * ppyoloe/infer_cfg.yml + */ + explicit PaddleDetPostprocessor(const std::string &arch) { + // Used to differentiate models + arch_ = arch; + // There may be no NMS config in the yaml file, + // so we need to give a initial value to multi_class_nms_. + multi_class_nms_.SetNMSOption(NMSOption()); + multi_class_nms_rotated_.SetNMSRotatedOption(NMSRotatedOption()); + } + + /** \brief Process the result of runtime and fill to ClassifyResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *result); + + /// Apply box decoding and nms step for the outputs for the model.This is + /// only available for those model exported without box decoding and nms. + void ApplyNMS() { with_nms_ = false; } + + /// If you do not want to modify the Yaml configuration file, + /// you can use this function to set rotated NMS parameters. + void SetNMSRotatedOption(const NMSRotatedOption &option) { + multi_class_nms_rotated_.SetNMSRotatedOption(option); + } + + /// If you do not want to modify the Yaml configuration file, + /// you can use this function to set NMS parameters. + void SetNMSOption(const NMSOption &option) { + multi_class_nms_.SetNMSOption(option); + } + + // Set scale_factor_ value.This is only available for those model exported + // without nms. + void SetScaleFactor(const std::vector &scale_factor_value) { + scale_factor_ = scale_factor_value; + } + +private: + std::vector scale_factor_{0.0, 0.0}; + std::vector GetScaleFactor() { return scale_factor_; } + + // for model without nms. + bool with_nms_ = true; + + // Used to differentiate models + std::string arch_; + + PaddleMultiClassNMS multi_class_nms_{}; + + PaddleMultiClassNMSRotated multi_class_nms_rotated_{}; + + // Process for General tensor without nms. + bool ProcessWithoutNMS(const std::vector &tensors, + std::vector *results); + + // Process for General tensor with nms. + bool ProcessWithNMS(const std::vector &tensors, + std::vector *results); + + // Process SOLOv2 + bool ProcessSolov2(const std::vector &tensors, + std::vector *results); + + // Process PPYOLOER + bool ProcessPPYOLOER(const std::vector &tensors, + std::vector *results); + + // Process mask tensor for MaskRCNN + bool ProcessMask(const FDTensor &tensor, + std::vector *results); +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc new file mode 100755 index 0000000000..47120a2fc4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc @@ -0,0 +1,268 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPDet(pybind11::module &m) { + pybind11::class_(m, "PaddleDetPreprocessor") + .def(pybind11::init()) + .def("run", + [](vision::detection::PaddleDetPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "PaddleDetPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def("disable_normalize", + [](vision::detection::PaddleDetPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", + [](vision::detection::PaddleDetPreprocessor &self) { + self.DisablePermute(); + }); + + pybind11::class_(m, "NMSOption") + .def(pybind11::init()) + .def_readwrite("background_label", + &vision::detection::NMSOption::background_label) + .def_readwrite("keep_top_k", &vision::detection::NMSOption::keep_top_k) + .def_readwrite("nms_eta", &vision::detection::NMSOption::nms_eta) + .def_readwrite("nms_threshold", + &vision::detection::NMSOption::nms_threshold) + .def_readwrite("nms_top_k", &vision::detection::NMSOption::nms_top_k) + .def_readwrite("normalized", &vision::detection::NMSOption::normalized) + .def_readwrite("score_threshold", + &vision::detection::NMSOption::score_threshold); + + pybind11::class_( + m, "PaddleDetPostprocessor") + .def(pybind11::init<>()) + .def(pybind11::init()) + .def("run", + [](vision::detection::PaddleDetPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PaddleDetPostprocessor."); + } + return results; + }) + .def("set_nms_option", + [](vision::detection::PaddleDetPostprocessor &self, + vision::detection::NMSOption option) { + self.SetNMSOption(option); + }) + .def("set_nms_rotated_option", + [](vision::detection::PaddleDetPostprocessor &self, + vision::detection::NMSRotatedOption option) { + self.SetNMSRotatedOption(option); + }) + .def("apply_nms", + [](vision::detection::PaddleDetPostprocessor &self) { + self.ApplyNMS(); + }) + .def("run", [](vision::detection::PaddleDetPostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PaddleDetPostprocessor."); + } + return results; + }); + + pybind11::class_(m, + "PPDetBase") + .def(pybind11::init()) + .def("predict", + [](vision::detection::PPDetBase &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](vision::detection::PPDetBase &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def("clone", + [](vision::detection::PPDetBase &self) { return self.Clone(); }) + .def_property_readonly("preprocessor", + &vision::detection::PPDetBase::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::detection::PPDetBase::GetPostprocessor); + + pybind11::class_( + m, "PPYOLO") + .def(pybind11::init()); + + pybind11::class_( + m, "PPYOLOE") + .def(pybind11::init()); + + pybind11::class_( + m, "PicoDet") + .def(pybind11::init()); + + pybind11::class_(m, "PaddleYOLOX") + .def(pybind11::init()); + + pybind11::class_( + m, "FasterRCNN") + .def(pybind11::init()); + + pybind11::class_( + m, "YOLOv3") + .def(pybind11::init()); + + pybind11::class_( + m, "MaskRCNN") + .def(pybind11::init()); + + pybind11::class_(m, + "SSD") + .def(pybind11::init()); + + pybind11::class_(m, "PaddleYOLOv5") + .def(pybind11::init()); + + pybind11::class_(m, "PaddleYOLOv6") + .def(pybind11::init()); + + pybind11::class_(m, "PaddleYOLOv7") + .def(pybind11::init()); + + pybind11::class_(m, "PaddleYOLOv8") + .def(pybind11::init()); + + pybind11::class_( + m, "RTMDet") + .def(pybind11::init()); + + pybind11::class_(m, "CascadeRCNN") + .def(pybind11::init()); + + pybind11::class_( + m, "PSSDet") + .def(pybind11::init()); + + pybind11::class_( + m, "RetinaNet") + .def(pybind11::init()); + + pybind11::class_( + m, "PPYOLOESOD") + .def(pybind11::init()); + + pybind11::class_( + m, "FCOS") + .def(pybind11::init()); + + pybind11::class_( + m, "TTFNet") + .def(pybind11::init()); + + pybind11::class_( + m, "TOOD") + .def(pybind11::init()); + + pybind11::class_(m, + "GFL") + .def(pybind11::init()); + + pybind11::class_( + m, "SOLOv2") + .def(pybind11::init()); + + pybind11::class_(m, "PaddleDetectionModel") + .def(pybind11::init()); + + pybind11::class_( + m, "PPYOLOER") + .def(pybind11::init()); + + pybind11::class_(m, "NMSRotatedOption") + .def(pybind11::init()) + .def_readwrite("background_label", + &vision::detection::NMSRotatedOption::background_label) + .def_readwrite("keep_top_k", + &vision::detection::NMSRotatedOption::keep_top_k) + .def_readwrite("nms_eta", &vision::detection::NMSRotatedOption::nms_eta) + .def_readwrite("nms_threshold", + &vision::detection::NMSRotatedOption::nms_threshold) + .def_readwrite("nms_top_k", + &vision::detection::NMSRotatedOption::nms_top_k) + .def_readwrite("normalized", + &vision::detection::NMSRotatedOption::normalized) + .def_readwrite("score_threshold", + &vision::detection::NMSRotatedOption::score_threshold); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc new file mode 100755 index 0000000000..87153c78a8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc @@ -0,0 +1,228 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/detection/ppdet/preprocessor.h" + +#include "ultrainfer/function/concat.h" +#include "ultrainfer/function/pad.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace detection { + +PaddleDetPreprocessor::PaddleDetPreprocessor(const std::string &config_file) { + this->config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), + "Failed to create PaddleDetPreprocessor."); + initialized_ = true; +} + +bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + + // read for postprocess + if (cfg["arch"].IsDefined()) { + arch_ = cfg["arch"].as(); + } else { + FDERROR << "Please set model arch," + << "support value : SOLOv2, YOLO, SSD, RetinaNet, RCNN, Face." + << std::endl; + return false; + } + + // read for preprocess + processors_.push_back(std::make_shared()); + + bool has_permute = false; + for (const auto &op : cfg["Preprocess"]) { + std::string op_name = op["type"].as(); + if (op_name == "NormalizeImage") { + if (!disable_normalize_) { + auto mean = op["mean"].as>(); + auto std = op["std"].as>(); + bool is_scale = true; + if (op["is_scale"]) { + is_scale = op["is_scale"].as(); + } + std::string norm_type = "mean_std"; + if (op["norm_type"]) { + norm_type = op["norm_type"].as(); + } + if (norm_type != "mean_std") { + std::fill(mean.begin(), mean.end(), 0.0); + std::fill(std.begin(), std.end(), 1.0); + } + processors_.push_back(std::make_shared(mean, std, is_scale)); + } + } else if (op_name == "Resize") { + bool keep_ratio = op["keep_ratio"].as(); + auto target_size = op["target_size"].as>(); + int interp = op["interp"].as(); + FDASSERT(target_size.size() == 2, + "Require size of target_size be 2, but now it's %lu.", + target_size.size()); + if (!keep_ratio) { + int width = target_size[1]; + int height = target_size[0]; + processors_.push_back( + std::make_shared(width, height, -1.0, -1.0, interp, false)); + } else { + int min_target_size = std::min(target_size[0], target_size[1]); + int max_target_size = std::max(target_size[0], target_size[1]); + std::vector max_size; + if (max_target_size > 0) { + max_size.push_back(max_target_size); + max_size.push_back(max_target_size); + } + processors_.push_back(std::make_shared( + min_target_size, interp, true, max_size)); + } + } else if (op_name == "Permute") { + // Do nothing, do permute as the last operation + has_permute = true; + continue; + } else if (op_name == "Pad") { + auto size = op["size"].as>(); + auto value = op["fill_value"].as>(); + processors_.push_back( + std::make_shared(size[1], size[0], value)); + } else if (op_name == "PadStride") { + auto stride = op["stride"].as(); + processors_.push_back( + std::make_shared(stride, std::vector(3, 0))); + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + if (!disable_permute_) { + if (has_permute) { + // permute = cast + HWC2CHW + processors_.push_back(std::make_shared("float")); + processors_.push_back(std::make_shared()); + } + } + + // Fusion will improve performance + FuseTransforms(&processors_); + + return true; +} + +bool PaddleDetPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + if (image_batch->mats->empty()) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + + // There are 3 outputs, image, scale_factor, im_shape + // But im_shape is not used for all the PaddleDetection models + // So preprocessor will output the 3 FDTensors, and how to use `im_shape` + // is decided by the model itself + outputs->resize(3); + int batch = static_cast(image_batch->mats->size()); + // Allocate memory for scale_factor + (*outputs)[1].Resize({batch, 2}, FDDataType::FP32); + // Allocate memory for im_shape + (*outputs)[2].Resize({batch, 2}, FDDataType::FP32); + // Record the max size for a batch of input image + // All the tensor will pad to the max size to compose a batched tensor + std::vector max_hw({-1, -1}); + + auto *scale_factor_ptr = + reinterpret_cast((*outputs)[1].MutableData()); + auto *im_shape_ptr = reinterpret_cast((*outputs)[2].MutableData()); + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + int origin_w = mat->Width(); + int origin_h = mat->Height(); + scale_factor_ptr[2 * i] = 1.0; + scale_factor_ptr[2 * i + 1] = 1.0; + for (size_t j = 0; j < processors_.size(); ++j) { + if (!(*(processors_[j].get()))(mat)) { + FDERROR << "Failed to processs image:" << i << " in " + << processors_[j]->Name() << "." << std::endl; + return false; + } + if (processors_[j]->Name().find("Resize") != std::string::npos) { + scale_factor_ptr[2 * i] = mat->Height() * 1.0 / origin_h; + scale_factor_ptr[2 * i + 1] = mat->Width() * 1.0 / origin_w; + } + } + if (mat->Height() > max_hw[0]) { + max_hw[0] = mat->Height(); + } + if (mat->Width() > max_hw[1]) { + max_hw[1] = mat->Width(); + } + im_shape_ptr[2 * i] = max_hw[0]; + im_shape_ptr[2 * i + 1] = max_hw[1]; + } + + // if the size of image less than max_hw, pad to max_hw + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + if (mat->Height() < max_hw[0] || mat->Width() < max_hw[1]) { + pad_op_->SetWidthHeight(max_hw[1], max_hw[0]); + (*pad_op_)(mat); + } + } + + // Get the NCHW tensor + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + + return true; +} + +void PaddleDetPreprocessor::DisableNormalize() { + this->disable_normalize_ = true; + // the DisableNormalize function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} + +void PaddleDetPreprocessor::DisablePermute() { + this->disable_permute_ = true; + // the DisablePermute function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h new file mode 100755 index 0000000000..b68809d99d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h @@ -0,0 +1,71 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for PaddleDet serials model. + */ +class ULTRAINFER_DECL PaddleDetPreprocessor : public ProcessorManager { +public: + PaddleDetPreprocessor() = default; + /** \brief Create a preprocessor instance for PaddleDet serials model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * ppyoloe/infer_cfg.yml + */ + explicit PaddleDetPreprocessor(const std::string &config_file); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// This function will disable normalize in preprocessing step. + void DisableNormalize(); + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute(); + + std::string GetArch() { return arch_; } + +private: + bool BuildPreprocessPipelineFromConfig(); + std::vector> processors_; + std::shared_ptr pad_op_ = + std::make_shared(0, 0, std::vector(3, 0)); + bool initialized_ = false; + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + // read config file + std::string config_file_; + // read arch_ for postprocess + std::string arch_; +}; + +} // namespace detection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc new file mode 100755 index 0000000000..a0fc686dd2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc @@ -0,0 +1,134 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facealign/contrib/face_landmark_1000.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facealign { + +FaceLandmark1000::FaceLandmark1000(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool FaceLandmark1000::Initialize() { + // parameters for preprocess + size_ = {128, 128}; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool FaceLandmark1000::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // Resize + int resize_w = size_[0]; + int resize_h = size_[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + // BRG2GRAY + BGR2GRAY::Run(mat); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool FaceLandmark1000::Postprocess( + FDTensor &infer_result, FaceAlignmentResult *result, + const std::map> &im_info) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now."); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + auto iter_in = im_info.find("input_shape"); + FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info."); + int in_h = iter_in->second[0]; + int in_w = iter_in->second[1]; + + result->Clear(); + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; i += 2) { + float x = data[i]; + float y = data[i + 1]; + x = std::min(std::max(0.f, x), 1.0f); + y = std::min(std::max(0.f, y), 1.0f); + // decode landmarks (default 106 landmarks) + result->landmarks.emplace_back(std::array{x * in_w, y * in_h}); + } + + return true; +} + +bool FaceLandmark1000::Predict(cv::Mat *im, FaceAlignmentResult *result) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {mat.Height(), mat.Width()}; + im_info["output_shape"] = {mat.Height(), mat.Width()}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + if (!Postprocess(output_tensors[0], result, im_info)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace facealign +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h new file mode 100755 index 0000000000..231c36c3d6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facealign { +/*! @brief FaceLandmark1000 model object used when to load a FaceLandmark1000 + * model exported by FaceLandmark1000. + */ +class ULTRAINFER_DECL FaceLandmark1000 : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./face_landmarks_1000.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + FaceLandmark1000(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "FaceLandmark1000"; } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, FaceAlignmentResult *result); + + /** \brief Get the input size of image + * + * \return Vector of int values, default {128,128} + */ + std::vector GetSize() { return size_; } + /** \brief Set the input size of image + * + * \param[in] size Vector of int values which represents {width, height} of + * image + */ + void SetSize(const std::vector &size) { size_ = size; } + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, FaceAlignmentResult *result, + const std::map> &im_info); + // tuple of (width, height), default (128, 128) + std::vector size_; +}; + +} // namespace facealign +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc new file mode 100755 index 0000000000..8aae69945a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindFaceLandmark1000(pybind11::module &m) { + pybind11::class_( + m, "FaceLandmark1000") + .def(pybind11::init()) + .def( + "predict", + [](vision::facealign::FaceLandmark1000 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::FaceAlignmentResult res; + self.Predict(&mat, &res); + return res; + }) + .def_property("size", &vision::facealign::FaceLandmark1000::GetSize, + &vision::facealign::FaceLandmark1000::SetSize); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc new file mode 100755 index 0000000000..261f1ac95a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc @@ -0,0 +1,135 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facealign/contrib/pfld.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facealign { + +PFLD::PFLD(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool PFLD::Initialize() { + // parameters for preprocess + size = {112, 112}; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool PFLD::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + // Resize + int resize_w = size[0]; + int resize_h = size[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + // Normalize + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool PFLD::Postprocess( + FDTensor &infer_result, FaceAlignmentResult *result, + const std::map> &im_info) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now."); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + auto iter_in = im_info.find("input_shape"); + FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info."); + int in_h = iter_in->second[0]; + int in_w = iter_in->second[1]; + + result->Clear(); + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; i += 2) { + float x = data[i]; + float y = data[i + 1]; + x = std::min(std::max(0.f, x), 1.0f); + y = std::min(std::max(0.f, y), 1.0f); + // decode landmarks (default 106 landmarks) + result->landmarks.emplace_back(std::array{x * in_w, y * in_h}); + } + + return true; +} + +bool PFLD::Predict(cv::Mat *im, FaceAlignmentResult *result) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {mat.Height(), mat.Width()}; + im_info["output_shape"] = {mat.Height(), mat.Width()}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors[1], result, im_info)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace facealign +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h new file mode 100755 index 0000000000..9bf0e59dc0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facealign { +/*! @brief PFLD model object used when to load a PFLD model exported by PFLD. + */ +class ULTRAINFER_DECL PFLD : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./pfld.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + PFLD(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "PFLD"; } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, FaceAlignmentResult *result); + + /// tuple of (width, height), default (112, 112) + std::vector size; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, FaceAlignmentResult *result, + const std::map> &im_info); +}; + +} // namespace facealign +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc new file mode 100755 index 0000000000..e3f7e5e78d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPFLD(pybind11::module &m) { + pybind11::class_(m, "PFLD") + .def(pybind11::init()) + .def("predict", + [](vision::facealign::PFLD &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::FaceAlignmentResult res; + self.Predict(&mat, &res); + return res; + }) + .def_readwrite("size", &vision::facealign::PFLD::size); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc new file mode 100755 index 0000000000..6caceeece4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc @@ -0,0 +1,687 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facealign/contrib/pipnet.h" + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facealign { + +void PIPNet::GenerateLandmarks(std::vector &infer_result, + FaceAlignmentResult *result, float img_height, + float img_width) { + FDTensor outputs_cls = infer_result.at(0); + FDTensor outputs_x = infer_result.at(1); + FDTensor outputs_y = infer_result.at(2); + FDTensor outputs_nb_x = infer_result.at(3); + FDTensor outputs_nb_y = infer_result.at(4); + int grid_h = outputs_cls.shape[2]; // 8 + int grid_w = outputs_cls.shape[3]; // 8 + int grid_length = grid_h * grid_w; // 8 * 8 = 64 + int input_h = size_[1]; + int input_w = size_[0]; + // fetch data from pointers + const float *outputs_cls_ptr = static_cast(outputs_cls.Data()); + const float *outputs_x_ptr = static_cast(outputs_x.Data()); + const float *outputs_y_ptr = static_cast(outputs_y.Data()); + const float *outputs_nb_x_ptr = static_cast(outputs_nb_x.Data()); + const float *outputs_nb_y_ptr = static_cast(outputs_nb_y.Data()); + + // find max_ids + std::vector max_ids(num_landmarks_); + for (unsigned int i = 0; i < num_landmarks_; ++i) { + const float *score_ptr = outputs_cls_ptr + i * grid_length; + unsigned int max_id = 0; + float max_score = score_ptr[0]; + for (unsigned int j = 0; j < grid_length; ++j) { + if (score_ptr[j] > max_score) { + max_score = score_ptr[j]; + max_id = j; + } + } + max_ids[i] = max_id; // range 0~64 + } + // find x & y offsets + std::vector output_x_select(num_landmarks_); + std::vector output_y_select(num_landmarks_); + for (unsigned int i = 0; i < num_landmarks_; ++i) { + const float *offset_x_ptr = outputs_x_ptr + i * grid_length; + const float *offset_y_ptr = outputs_y_ptr + i * grid_length; + const unsigned int max_id = max_ids.at(i); + output_x_select[i] = offset_x_ptr[max_id]; + output_y_select[i] = offset_y_ptr[max_id]; + } + + // find nb_x & nb_y offsets + std::map> output_nb_x_select; + std::map> output_nb_y_select; + // initialize offsets map + for (unsigned int i = 0; i < num_landmarks_; ++i) { + std::vector nb_x_offset(num_nb_); + std::vector nb_y_offset(num_nb_); + output_nb_x_select[i] = nb_x_offset; + output_nb_y_select[i] = nb_y_offset; + } + for (unsigned int i = 0; i < num_landmarks_; ++i) { + for (unsigned int j = 0; j < num_nb_; ++j) { + const unsigned int max_id = max_ids.at(i); + const float *offset_nb_x_ptr = + outputs_nb_x_ptr + (i * num_nb_ + j) * grid_length; + const float *offset_nb_y_ptr = + outputs_nb_y_ptr + (i * num_nb_ + j) * grid_length; + output_nb_x_select[i][j] = offset_nb_x_ptr[max_id]; + output_nb_y_select[i][j] = offset_nb_y_ptr[max_id]; + } + } + + // calculate coords + std::vector lms_pred_x(num_landmarks_); // 19 + std::vector lms_pred_y(num_landmarks_); // 19 + std::map> lms_pred_nb_x; // 19,10 + std::map> lms_pred_nb_y; // 19,10 + + // initialize pred maps + for (unsigned int i = 0; i < num_landmarks_; ++i) { + std::vector nb_x_offset(num_nb_); + std::vector nb_y_offset(num_nb_); + lms_pred_nb_x[i] = nb_x_offset; + lms_pred_nb_y[i] = nb_y_offset; + } + for (unsigned int i = 0; i < num_landmarks_; ++i) { + float cx = static_cast(max_ids.at(i) % grid_w); + float cy = static_cast(max_ids.at(i) / grid_w); + // calculate coords & normalize + lms_pred_x[i] = + ((cx + output_x_select[i]) * (float)net_stride_) / (float)input_w; + lms_pred_y[i] = + ((cy + output_y_select[i]) * (float)net_stride_) / (float)input_h; + for (unsigned int j = 0; j < num_nb_; ++j) { + lms_pred_nb_x[i][j] = + ((cx + output_nb_x_select[i][j]) * (float)net_stride_) / + (float)input_w; + lms_pred_nb_y[i][j] = + ((cy + output_nb_y_select[i][j]) * (float)net_stride_) / + (float)input_h; + } + } + + // reverse indexes + std::map> + tmp_nb_x; // 19,max_len_map_[num_landmarks_] + std::map> + tmp_nb_y; // 19,max_len_map_[num_landmarks_] + // initialize reverse maps + for (unsigned int i = 0; i < num_landmarks_; ++i) { + std::vector tmp_x(max_len_map_[num_landmarks_]); + std::vector tmp_y(max_len_map_[num_landmarks_]); + tmp_nb_x[i] = tmp_x; + tmp_nb_y[i] = tmp_y; + } + for (unsigned int i = 0; i < num_landmarks_; ++i) { + for (unsigned int j = 0; j < max_len_map_[num_landmarks_]; ++j) { + unsigned int ri = + reverse_index1_map_[num_landmarks_] + [i * max_len_map_[num_landmarks_] + j]; + unsigned int rj = + reverse_index2_map_[num_landmarks_] + [i * max_len_map_[num_landmarks_] + j]; + tmp_nb_x[i][j] = lms_pred_nb_x[ri][rj]; + tmp_nb_y[i][j] = lms_pred_nb_y[ri][rj]; + } + } + + // merge predictions + result->Clear(); + for (unsigned int i = 0; i < num_landmarks_; ++i) { + float total_x = lms_pred_x[i]; + float total_y = lms_pred_y[i]; + for (unsigned int j = 0; j < max_len_map_[num_landmarks_]; ++j) { + total_x += tmp_nb_x[i][j]; + total_y += tmp_nb_y[i][j]; + } + float x = total_x / ((float)max_len_map_[num_landmarks_] + 1.f); + float y = total_y / ((float)max_len_map_[num_landmarks_] + 1.f); + x = std::min(std::max(0.f, x), 1.0f); + y = std::min(std::max(0.f, y), 1.0f); + result->landmarks.emplace_back( + std::array{x * img_width, y * img_height}); + } +}; + +void PIPNet::SetNumLandmarks(const int &num_landmarks) { + if (std::find(supported_num_landmarks_.begin(), + supported_num_landmarks_.end(), + num_landmarks) == supported_num_landmarks_.end()) { + FDWARNING << "The number of landmarks should be in {19, 29, 68, 98}." + << std::endl; + } + num_landmarks_ = num_landmarks; +} +PIPNet::PIPNet(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool PIPNet::Initialize() { + // parameters for preprocess + size_ = {256, 256}; + mean_vals_ = {0.485f, 0.456f, 0.406f}; + std_vals_ = {0.229f, 0.224f, 0.225f}; + num_nb_ = 10; + net_stride_ = 32; + num_landmarks_ = 19; + supported_num_landmarks_ = {19, 29, 68, 98}; + // parameters for num_landmarks_ == 19 + reverse_index1_map_[19] = { + 1, 2, 6, 7, 8, 1, 2, 6, 7, 8, 1, 2, 6, 7, 8, 1, 2, 6, + 0, 2, 3, 4, 6, 7, 8, 0, 2, 3, 4, 6, 7, 8, 0, 2, 3, 4, + 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 4, 5, 6, + 0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 14, 0, 1, 2, 4, 5, 6, + 1, 2, 3, 5, 9, 10, 11, 1, 2, 3, 5, 9, 10, 11, 1, 2, 3, 5, + 3, 4, 9, 10, 11, 3, 4, 9, 10, 11, 3, 4, 9, 10, 11, 3, 4, 9, + 0, 1, 2, 3, 7, 8, 12, 13, 15, 0, 1, 2, 3, 7, 8, 12, 13, 15, + 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 18, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 16, 17, 18, 0, 1, + 3, 4, 5, 9, 10, 14, 17, 3, 4, 5, 9, 10, 14, 17, 3, 4, 5, 9, + 0, 1, 6, 7, 8, 13, 14, 15, 16, 17, 18, 0, 1, 6, 7, 8, 13, 14, + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 0, 2, 5, + 4, 5, 9, 10, 11, 12, 13, 15, 16, 17, 18, 4, 5, 9, 10, 11, 12, 13, + 12, 13, 14, 16, 17, 18, 12, 13, 14, 16, 17, 18, 12, 13, 14, 16, 17, 18, + 12, 13, 14, 15, 17, 18, 12, 13, 14, 15, 17, 18, 12, 13, 14, 15, 17, 18, + 12, 13, 14, 15, 16, 18, 12, 13, 14, 15, 16, 18, 12, 13, 14, 15, 16, 18, + 15, 16, 17, 15, 16, 17, 15, 16, 17, 15, 16, 17, 15, 16, 17, 15, 16, 17}; + reverse_index2_map_[19] = { + 0, 6, 1, 4, 6, 0, 6, 1, 4, 6, 0, 6, 1, 4, 6, 0, 6, 1, 0, 1, 8, 7, 2, 2, 3, + 0, 1, 8, 7, 2, 2, 3, 0, 1, 8, 7, 3, 1, 3, 5, 5, 4, 3, 1, 5, 6, 6, 9, 3, 1, + 3, 5, 5, 4, 5, 5, 3, 1, 3, 7, 5, 5, 1, 3, 4, 9, 5, 5, 3, 1, 3, 7, 7, 8, 1, + 0, 3, 2, 2, 7, 8, 1, 0, 3, 2, 2, 7, 8, 1, 0, 6, 0, 6, 4, 1, 6, 0, 6, 4, 1, + 6, 0, 6, 4, 1, 6, 0, 6, 1, 3, 4, 9, 1, 2, 6, 9, 8, 1, 3, 4, 9, 1, 2, 6, 9, + 8, 2, 2, 2, 7, 8, 9, 0, 0, 9, 9, 9, 5, 7, 7, 8, 8, 2, 2, 4, 4, 0, 5, 6, 6, + 3, 0, 4, 5, 7, 4, 3, 8, 6, 6, 9, 6, 7, 6, 5, 0, 4, 4, 8, 6, 4, 0, 3, 8, 4, + 4, 9, 7, 6, 7, 9, 8, 7, 2, 2, 2, 9, 9, 9, 0, 0, 8, 5, 9, 7, 9, 9, 8, 4, 3, + 1, 2, 1, 6, 8, 4, 3, 1, 2, 1, 6, 8, 4, 3, 1, 2, 6, 9, 5, 7, 8, 0, 2, 1, 3, + 4, 4, 6, 9, 5, 7, 8, 0, 2, 8, 9, 8, 6, 8, 7, 7, 8, 8, 0, 0, 2, 2, 2, 5, 8, + 9, 8, 9, 7, 8, 7, 5, 2, 1, 4, 4, 1, 3, 9, 7, 8, 7, 5, 2, 1, 1, 5, 7, 0, 3, + 1, 1, 5, 7, 0, 3, 1, 1, 5, 7, 0, 3, 1, 3, 2, 3, 0, 0, 0, 3, 2, 3, 0, 0, 0, + 3, 2, 3, 0, 0, 0, 7, 6, 1, 3, 1, 2, 7, 6, 1, 3, 1, 2, 7, 6, 1, 3, 1, 2, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}; + max_len_map_[19] = 18; + // parameters for num_landmarks_ == 29 + reverse_index1_map_[29] = { + 2, 4, 5, 8, 12, 13, 16, 2, 4, 5, 8, 12, 13, 16, 2, 4, 5, 8, + 12, 3, 6, 7, 9, 14, 15, 17, 3, 6, 7, 9, 14, 15, 17, 3, 6, 7, + 9, 14, 0, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 16, 0, 3, 4, + 5, 6, 7, 0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 17, 0, + 1, 2, 4, 5, 0, 2, 5, 8, 10, 12, 13, 16, 0, 2, 5, 8, 10, 12, + 13, 16, 0, 2, 5, 0, 2, 4, 8, 10, 12, 13, 16, 0, 2, 4, 8, 10, + 12, 13, 16, 0, 2, 4, 1, 3, 7, 9, 11, 14, 15, 17, 1, 3, 7, 9, + 11, 14, 15, 17, 1, 3, 7, 1, 3, 6, 9, 11, 14, 15, 17, 1, 3, 6, + 9, 11, 14, 15, 17, 1, 3, 6, 0, 2, 4, 5, 10, 12, 13, 16, 0, 2, + 4, 5, 10, 12, 13, 16, 0, 2, 4, 1, 3, 6, 7, 11, 14, 15, 17, 1, + 3, 6, 7, 11, 14, 15, 17, 1, 3, 6, 0, 2, 3, 4, 5, 8, 12, 13, + 16, 18, 20, 0, 2, 3, 4, 5, 8, 12, 13, 1, 2, 3, 6, 7, 9, 14, + 15, 17, 19, 20, 21, 1, 2, 3, 6, 7, 9, 14, 0, 2, 4, 5, 8, 10, + 13, 16, 0, 2, 4, 5, 8, 10, 13, 16, 0, 2, 4, 0, 2, 4, 5, 8, + 10, 12, 16, 18, 22, 0, 2, 4, 5, 8, 10, 12, 16, 18, 1, 3, 6, 7, + 9, 11, 15, 17, 1, 3, 6, 7, 9, 11, 15, 17, 1, 3, 6, 1, 3, 6, + 7, 9, 11, 14, 17, 19, 23, 1, 3, 6, 7, 9, 11, 14, 17, 19, 0, 2, + 4, 5, 8, 10, 12, 13, 18, 0, 2, 4, 5, 8, 10, 12, 13, 18, 0, 1, + 3, 6, 7, 9, 11, 14, 15, 19, 1, 3, 6, 7, 9, 11, 14, 15, 19, 1, + 0, 4, 5, 8, 10, 12, 13, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 0, 1, 6, 7, 9, 11, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 1, 1, 8, 9, 10, 11, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, + 26, 27, 28, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 18, 19, 20, 22, 23, + 24, 25, 26, 27, 18, 20, 21, 24, 25, 26, 27, 28, 18, 20, 21, 24, 25, 26, + 27, 28, 18, 20, 21, 19, 21, 24, 25, 26, 27, 28, 19, 21, 24, 25, 26, 27, + 28, 19, 21, 24, 25, 26, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 18, 19, + 20, 21, 22, 23, 25, 26, 27, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 18, + 19, 20, 21, 22, 23, 24, 26, 27, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, + 18, 19, 20, 21, 22, 23, 24, 25, 27, 20, 21, 22, 23, 24, 25, 26, 28, 20, + 21, 22, 23, 24, 25, 26, 28, 20, 21, 22, 22, 23, 24, 25, 26, 27, 22, 23, + 24, 25, 26, 27, 22, 23, 24, 25, 26, 27, 22}; + reverse_index2_map_[29] = { + 9, 3, 5, 3, 7, 7, 7, 9, 3, 5, 3, 7, 7, 7, 9, 3, 5, 3, 7, 9, 3, 5, 3, 7, + 7, 7, 9, 3, 5, 3, 7, 7, 7, 9, 3, 5, 3, 7, 7, 6, 6, 6, 8, 9, 7, 0, 9, 6, + 5, 9, 6, 7, 6, 6, 6, 8, 9, 9, 7, 6, 8, 9, 6, 6, 7, 8, 0, 9, 6, 6, 6, 9, + 7, 6, 8, 9, 2, 5, 0, 5, 5, 3, 6, 5, 2, 5, 0, 5, 5, 3, 6, 5, 2, 5, 0, 1, + 3, 0, 4, 4, 2, 4, 2, 1, 3, 0, 4, 4, 2, 4, 2, 1, 3, 0, 2, 4, 0, 5, 5, 3, + 5, 5, 2, 4, 0, 5, 5, 3, 5, 5, 2, 4, 0, 1, 3, 0, 4, 4, 2, 4, 2, 1, 3, 0, + 4, 4, 2, 4, 2, 1, 3, 0, 0, 7, 4, 3, 6, 5, 3, 4, 0, 7, 4, 3, 6, 5, 3, 4, + 0, 7, 4, 0, 7, 4, 3, 6, 5, 2, 4, 0, 7, 4, 3, 6, 5, 2, 4, 0, 7, 4, 6, 0, + 8, 7, 7, 6, 4, 2, 3, 5, 6, 6, 0, 8, 7, 7, 6, 4, 2, 6, 8, 0, 7, 7, 6, 4, + 3, 3, 5, 7, 9, 6, 8, 0, 7, 7, 6, 4, 3, 1, 1, 1, 2, 3, 1, 0, 3, 1, 1, 1, + 2, 3, 1, 0, 3, 1, 1, 5, 4, 5, 4, 0, 2, 1, 1, 6, 9, 5, 4, 5, 4, 0, 2, 1, + 1, 6, 3, 1, 1, 1, 2, 3, 1, 0, 3, 1, 1, 1, 2, 3, 1, 0, 3, 1, 1, 5, 5, 5, + 4, 0, 2, 1, 1, 7, 9, 5, 5, 5, 4, 0, 2, 1, 1, 7, 4, 2, 2, 2, 1, 1, 0, 0, + 9, 4, 2, 2, 2, 1, 1, 0, 0, 9, 4, 4, 2, 2, 2, 1, 1, 0, 0, 9, 4, 2, 2, 2, + 1, 1, 0, 0, 9, 4, 8, 9, 8, 8, 7, 8, 8, 8, 8, 1, 3, 0, 8, 5, 8, 9, 9, 9, + 8, 8, 9, 8, 8, 7, 8, 8, 8, 8, 2, 4, 8, 0, 6, 7, 8, 8, 7, 8, 9, 9, 9, 9, + 8, 9, 9, 9, 9, 0, 0, 0, 6, 6, 4, 4, 6, 7, 8, 1, 1, 0, 5, 5, 2, 3, 3, 4, + 6, 1, 1, 0, 5, 5, 2, 3, 3, 4, 2, 8, 7, 7, 5, 4, 6, 5, 2, 8, 7, 7, 5, 4, + 6, 5, 2, 8, 7, 2, 8, 8, 6, 5, 5, 4, 2, 8, 8, 6, 5, 5, 4, 2, 8, 8, 6, 5, + 3, 3, 3, 1, 2, 3, 0, 2, 2, 3, 3, 3, 3, 1, 2, 3, 0, 2, 2, 4, 4, 4, 2, 1, + 1, 0, 0, 1, 2, 4, 4, 4, 2, 1, 1, 0, 0, 1, 7, 6, 5, 5, 3, 2, 1, 1, 0, 1, + 7, 6, 5, 5, 3, 2, 1, 1, 0, 9, 6, 4, 4, 3, 2, 1, 0, 9, 6, 4, 4, 3, 2, 1, + 0, 9, 6, 4, 7, 7, 9, 9, 7, 3, 7, 7, 9, 9, 7, 3, 7, 7, 9, 9, 7, 3, 7}; + max_len_map_[29] = 19; + // parameters for num_landmarks_ == 68 + reverse_index1_map_[68] = { + 1, 2, 17, 18, 36, 1, 2, 17, 18, 36, 1, 2, 17, 18, 36, 1, 2, 17, + 18, 36, 1, 2, 0, 2, 3, 17, 0, 2, 3, 17, 0, 2, 3, 17, 0, 2, + 3, 17, 0, 2, 3, 17, 0, 2, 0, 1, 3, 4, 0, 1, 3, 4, 0, 1, + 3, 4, 0, 1, 3, 4, 0, 1, 3, 4, 0, 1, 1, 2, 4, 5, 1, 2, + 4, 5, 1, 2, 4, 5, 1, 2, 4, 5, 1, 2, 4, 5, 1, 2, 2, 3, + 5, 6, 2, 3, 5, 6, 2, 3, 5, 6, 2, 3, 5, 6, 2, 3, 5, 6, + 2, 3, 3, 4, 6, 7, 3, 4, 6, 7, 3, 4, 6, 7, 3, 4, 6, 7, + 3, 4, 6, 7, 3, 4, 3, 4, 5, 7, 8, 3, 4, 5, 7, 8, 3, 4, + 5, 7, 8, 3, 4, 5, 7, 8, 3, 4, 5, 6, 8, 9, 5, 6, 8, 9, + 5, 6, 8, 9, 5, 6, 8, 9, 5, 6, 8, 9, 5, 6, 6, 7, 9, 10, + 6, 7, 9, 10, 6, 7, 9, 10, 6, 7, 9, 10, 6, 7, 9, 10, 6, 7, + 7, 8, 10, 11, 7, 8, 10, 11, 7, 8, 10, 11, 7, 8, 10, 11, 7, 8, + 10, 11, 7, 8, 8, 9, 11, 12, 13, 8, 9, 11, 12, 13, 8, 9, 11, 12, + 13, 8, 9, 11, 12, 13, 8, 9, 9, 10, 12, 13, 9, 10, 12, 13, 9, 10, + 12, 13, 9, 10, 12, 13, 9, 10, 12, 13, 9, 10, 10, 11, 13, 14, 10, 11, + 13, 14, 10, 11, 13, 14, 10, 11, 13, 14, 10, 11, 13, 14, 10, 11, 11, 12, + 14, 15, 11, 12, 14, 15, 11, 12, 14, 15, 11, 12, 14, 15, 11, 12, 14, 15, + 11, 12, 12, 13, 15, 16, 12, 13, 15, 16, 12, 13, 15, 16, 12, 13, 15, 16, + 12, 13, 15, 16, 12, 13, 13, 14, 16, 26, 13, 14, 16, 26, 13, 14, 16, 26, + 13, 14, 16, 26, 13, 14, 16, 26, 13, 14, 14, 15, 25, 26, 45, 14, 15, 25, + 26, 45, 14, 15, 25, 26, 45, 14, 15, 25, 26, 45, 14, 15, 0, 1, 2, 18, + 19, 36, 37, 41, 0, 1, 2, 18, 19, 36, 37, 41, 0, 1, 2, 18, 19, 36, + 0, 1, 17, 19, 20, 36, 37, 38, 41, 0, 1, 17, 19, 20, 36, 37, 38, 41, + 0, 1, 17, 19, 0, 17, 18, 20, 21, 36, 37, 38, 40, 41, 0, 17, 18, 20, + 21, 36, 37, 38, 40, 41, 0, 17, 17, 18, 19, 21, 36, 37, 38, 39, 40, 41, + 17, 18, 19, 21, 36, 37, 38, 39, 40, 41, 17, 18, 18, 19, 20, 22, 27, 28, + 37, 38, 39, 40, 41, 18, 19, 20, 22, 27, 28, 37, 38, 39, 40, 41, 21, 23, + 24, 25, 27, 28, 42, 43, 44, 46, 47, 21, 23, 24, 25, 27, 28, 42, 43, 44, + 46, 47, 22, 24, 25, 26, 42, 43, 44, 45, 46, 47, 22, 24, 25, 26, 42, 43, + 44, 45, 46, 47, 22, 24, 16, 22, 23, 25, 26, 43, 44, 45, 46, 47, 16, 22, + 23, 25, 26, 43, 44, 45, 46, 47, 16, 22, 15, 16, 23, 24, 26, 43, 44, 45, + 46, 15, 16, 23, 24, 26, 43, 44, 45, 46, 15, 16, 23, 24, 14, 15, 16, 24, + 25, 44, 45, 46, 14, 15, 16, 24, 25, 44, 45, 46, 14, 15, 16, 24, 25, 44, + 20, 21, 22, 23, 28, 29, 38, 39, 40, 42, 43, 47, 20, 21, 22, 23, 28, 29, + 38, 39, 40, 42, 21, 22, 27, 29, 30, 39, 40, 42, 47, 21, 22, 27, 29, 30, + 39, 40, 42, 47, 21, 22, 27, 29, 27, 28, 30, 31, 35, 39, 42, 27, 28, 30, + 31, 35, 39, 42, 27, 28, 30, 31, 35, 39, 42, 27, 28, 29, 31, 32, 33, 34, + 35, 28, 29, 31, 32, 33, 34, 35, 28, 29, 31, 32, 33, 34, 35, 28, 2, 3, + 29, 30, 32, 33, 48, 49, 2, 3, 29, 30, 32, 33, 48, 49, 2, 3, 29, 30, + 32, 33, 29, 30, 31, 33, 34, 35, 49, 50, 29, 30, 31, 33, 34, 35, 49, 50, + 29, 30, 31, 33, 34, 35, 29, 30, 31, 32, 34, 35, 50, 51, 52, 29, 30, 31, + 32, 34, 35, 50, 51, 52, 29, 30, 31, 32, 29, 30, 31, 32, 33, 35, 52, 53, + 29, 30, 31, 32, 33, 35, 52, 53, 29, 30, 31, 32, 33, 35, 13, 14, 29, 30, + 32, 33, 34, 53, 54, 13, 14, 29, 30, 32, 33, 34, 53, 54, 13, 14, 29, 30, + 0, 1, 2, 17, 18, 19, 20, 37, 38, 39, 40, 41, 0, 1, 2, 17, 18, 19, + 20, 37, 38, 39, 0, 1, 17, 18, 19, 20, 21, 36, 38, 39, 40, 41, 0, 1, + 17, 18, 19, 20, 21, 36, 38, 39, 0, 1, 17, 18, 19, 20, 21, 27, 28, 36, + 37, 39, 40, 41, 0, 1, 17, 18, 19, 20, 21, 27, 19, 20, 21, 27, 28, 29, + 36, 37, 38, 40, 41, 19, 20, 21, 27, 28, 29, 36, 37, 38, 40, 41, 0, 1, + 17, 18, 19, 20, 21, 27, 28, 36, 37, 38, 39, 41, 0, 1, 17, 18, 19, 20, + 21, 27, 0, 1, 2, 17, 18, 19, 20, 21, 36, 37, 38, 39, 40, 0, 1, 2, + 17, 18, 19, 20, 21, 36, 22, 23, 24, 27, 28, 29, 43, 44, 45, 46, 47, 22, + 23, 24, 27, 28, 29, 43, 44, 45, 46, 47, 15, 16, 22, 23, 24, 25, 26, 27, + 42, 44, 45, 46, 47, 15, 16, 22, 23, 24, 25, 26, 27, 42, 15, 16, 22, 23, + 24, 25, 26, 42, 43, 45, 46, 47, 15, 16, 22, 23, 24, 25, 26, 42, 43, 45, + 14, 15, 16, 23, 24, 25, 26, 42, 43, 44, 46, 47, 14, 15, 16, 23, 24, 25, + 26, 42, 43, 44, 14, 15, 16, 22, 23, 24, 25, 26, 42, 43, 44, 45, 47, 14, + 15, 16, 22, 23, 24, 25, 26, 42, 15, 16, 22, 23, 24, 25, 26, 27, 28, 42, + 43, 44, 45, 46, 15, 16, 22, 23, 24, 25, 26, 27, 2, 3, 4, 5, 6, 49, + 59, 60, 2, 3, 4, 5, 6, 49, 59, 60, 2, 3, 4, 5, 6, 49, 3, 4, + 5, 31, 32, 48, 50, 51, 59, 60, 61, 67, 3, 4, 5, 31, 32, 48, 50, 51, + 59, 60, 30, 31, 32, 33, 34, 48, 49, 51, 52, 58, 59, 60, 61, 62, 66, 67, + 30, 31, 32, 33, 34, 48, 30, 31, 32, 33, 34, 35, 48, 49, 50, 52, 53, 54, + 56, 58, 60, 61, 62, 63, 64, 65, 66, 67, 30, 32, 33, 34, 35, 50, 51, 53, + 54, 55, 56, 62, 63, 64, 65, 30, 32, 33, 34, 35, 50, 51, 11, 12, 13, 34, + 35, 52, 54, 55, 63, 64, 65, 11, 12, 13, 34, 35, 52, 54, 55, 63, 64, 65, + 10, 11, 12, 13, 14, 53, 55, 64, 10, 11, 12, 13, 14, 53, 55, 64, 10, 11, + 12, 13, 14, 53, 8, 9, 10, 11, 12, 13, 53, 54, 56, 57, 63, 64, 65, 8, + 9, 10, 11, 12, 13, 53, 54, 56, 7, 8, 9, 10, 11, 12, 54, 55, 57, 58, + 63, 64, 65, 66, 7, 8, 9, 10, 11, 12, 54, 55, 6, 7, 8, 9, 10, 55, + 56, 58, 59, 62, 65, 66, 67, 6, 7, 8, 9, 10, 55, 56, 58, 59, 4, 5, + 6, 7, 8, 9, 48, 56, 57, 59, 60, 61, 62, 66, 67, 4, 5, 6, 7, 8, + 9, 48, 3, 4, 5, 6, 7, 8, 48, 49, 57, 58, 60, 61, 67, 3, 4, 5, + 6, 7, 8, 48, 49, 57, 2, 3, 4, 5, 6, 31, 48, 49, 59, 2, 3, 4, + 5, 6, 31, 48, 49, 59, 2, 3, 4, 5, 31, 32, 33, 48, 49, 50, 51, 52, + 57, 58, 59, 60, 62, 63, 66, 67, 31, 32, 33, 48, 49, 50, 33, 34, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 64, 65, 66, 67, 33, + 34, 35, 50, 51, 52, 53, 54, 55, 56, 57, 61, 62, 64, 65, 66, 34, 35, 50, + 51, 52, 53, 54, 10, 11, 12, 13, 14, 35, 53, 54, 55, 10, 11, 12, 13, 14, + 35, 53, 54, 55, 10, 11, 12, 13, 9, 10, 11, 12, 51, 52, 53, 54, 55, 56, + 57, 58, 61, 62, 63, 64, 66, 67, 9, 10, 11, 12, 7, 8, 9, 50, 51, 52, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 7, 8, 9, 50, 4, 5, + 6, 7, 48, 49, 50, 51, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 4, 5, + 6, 7}; + reverse_index2_map_[68] = { + 0, 3, 1, 7, 8, 0, 3, 1, 7, 8, 0, 3, 1, 7, 8, 0, 3, 1, 7, 8, 0, 3, 1, 1, 4, + 9, 1, 1, 4, 9, 1, 1, 4, 9, 1, 1, 4, 9, 1, 1, 4, 9, 1, 1, 6, 1, 1, 5, 6, 1, + 1, 5, 6, 1, 1, 5, 6, 1, 1, 5, 6, 1, 1, 5, 6, 1, 5, 0, 0, 6, 5, 0, 0, 6, 5, + 0, 0, 6, 5, 0, 0, 6, 5, 0, 0, 6, 5, 0, 2, 0, 1, 7, 2, 0, 1, 7, 2, 0, 1, 7, + 2, 0, 1, 7, 2, 0, 1, 7, 2, 0, 2, 1, 1, 6, 2, 1, 1, 6, 2, 1, 1, 6, 2, 1, 1, + 6, 2, 1, 1, 6, 2, 1, 9, 4, 0, 1, 4, 9, 4, 0, 1, 4, 9, 4, 0, 1, 4, 9, 4, 0, + 1, 4, 9, 4, 5, 0, 1, 3, 5, 0, 1, 3, 5, 0, 1, 3, 5, 0, 1, 3, 5, 0, 1, 3, 5, + 0, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 3, 0, + 0, 5, 3, 0, 0, 5, 3, 0, 0, 5, 3, 0, 0, 5, 3, 0, 0, 5, 3, 0, 3, 1, 0, 4, 9, + 3, 1, 0, 4, 9, 3, 1, 0, 4, 9, 3, 1, 0, 4, 9, 3, 1, 6, 1, 0, 2, 6, 1, 0, 2, + 6, 1, 0, 2, 6, 1, 0, 2, 6, 1, 0, 2, 6, 1, 7, 1, 0, 2, 7, 1, 0, 2, 7, 1, 0, + 2, 7, 1, 0, 2, 7, 1, 0, 2, 7, 1, 6, 1, 1, 4, 6, 1, 1, 4, 6, 1, 1, 4, 6, 1, + 1, 4, 6, 1, 1, 4, 6, 1, 5, 1, 0, 6, 5, 1, 0, 6, 5, 1, 0, 6, 5, 1, 0, 6, 5, + 1, 0, 6, 5, 1, 3, 0, 0, 9, 3, 0, 0, 9, 3, 0, 0, 9, 3, 0, 0, 9, 3, 0, 0, 9, + 3, 0, 3, 1, 7, 2, 8, 3, 1, 7, 2, 8, 3, 1, 7, 2, 8, 3, 1, 7, 2, 8, 3, 1, 0, + 3, 9, 0, 4, 4, 8, 6, 0, 3, 9, 0, 4, 4, 8, 6, 0, 3, 9, 0, 4, 4, 3, 8, 0, 0, + 6, 5, 7, 9, 7, 3, 8, 0, 0, 6, 5, 7, 9, 7, 3, 8, 0, 0, 7, 4, 1, 1, 6, 6, 5, + 7, 9, 5, 7, 4, 1, 1, 6, 6, 5, 7, 9, 5, 7, 4, 8, 4, 1, 0, 9, 6, 4, 7, 6, 8, + 8, 4, 1, 0, 9, 6, 4, 7, 6, 8, 8, 4, 9, 6, 0, 4, 2, 7, 9, 6, 5, 5, 9, 9, 6, + 0, 4, 2, 7, 9, 6, 5, 5, 9, 4, 1, 6, 9, 3, 8, 5, 6, 9, 9, 6, 4, 1, 6, 9, 3, + 8, 5, 6, 9, 9, 6, 0, 1, 4, 8, 7, 5, 7, 9, 8, 5, 0, 1, 4, 8, 7, 5, 7, 9, 8, + 5, 0, 1, 7, 6, 0, 1, 4, 7, 5, 6, 6, 9, 7, 6, 0, 1, 4, 7, 5, 6, 6, 9, 7, 6, + 8, 3, 5, 0, 0, 9, 6, 5, 7, 8, 3, 5, 0, 0, 9, 6, 5, 7, 8, 3, 5, 0, 8, 3, 1, + 4, 0, 8, 4, 5, 8, 3, 1, 4, 0, 8, 4, 5, 8, 3, 1, 4, 0, 8, 9, 1, 1, 9, 1, 2, + 8, 4, 7, 2, 8, 7, 9, 1, 1, 9, 1, 2, 8, 4, 7, 2, 8, 8, 0, 0, 6, 6, 8, 6, 8, + 8, 8, 0, 0, 6, 6, 8, 6, 8, 8, 8, 0, 0, 5, 0, 0, 9, 9, 9, 9, 5, 0, 0, 9, 9, + 9, 9, 5, 0, 0, 9, 9, 9, 9, 5, 4, 1, 2, 2, 2, 2, 2, 4, 1, 2, 2, 2, 2, 2, 4, + 1, 2, 2, 2, 2, 2, 4, 8, 8, 6, 5, 0, 7, 7, 9, 8, 8, 6, 5, 0, 7, 7, 9, 8, 8, + 6, 5, 0, 7, 4, 3, 0, 0, 4, 5, 8, 7, 4, 3, 0, 0, 4, 5, 8, 7, 4, 3, 0, 0, 4, + 5, 7, 2, 1, 1, 1, 1, 5, 8, 5, 7, 2, 1, 1, 1, 1, 5, 8, 5, 7, 2, 1, 1, 3, 1, + 5, 4, 1, 0, 6, 9, 3, 1, 5, 4, 1, 0, 6, 9, 3, 1, 5, 4, 1, 0, 8, 9, 5, 4, 9, + 6, 0, 8, 7, 8, 9, 5, 4, 9, 6, 0, 8, 7, 8, 9, 5, 4, 2, 2, 4, 2, 3, 5, 8, 1, + 5, 8, 4, 1, 2, 2, 4, 2, 3, 5, 8, 1, 5, 8, 5, 6, 3, 2, 2, 3, 7, 1, 1, 3, 3, + 0, 5, 6, 3, 2, 2, 3, 7, 1, 1, 3, 9, 9, 6, 6, 3, 2, 2, 7, 9, 3, 2, 1, 0, 3, + 9, 9, 6, 6, 3, 2, 2, 7, 9, 4, 3, 4, 3, 9, 7, 4, 2, 1, 4, 9, 4, 3, 4, 3, 9, + 7, 4, 2, 1, 4, 8, 7, 7, 8, 8, 5, 5, 8, 5, 2, 3, 0, 0, 2, 8, 7, 7, 8, 8, 5, + 5, 8, 4, 4, 5, 5, 5, 7, 7, 9, 0, 0, 3, 2, 2, 4, 4, 5, 5, 5, 7, 7, 9, 0, 3, + 4, 9, 1, 2, 8, 2, 4, 7, 4, 2, 3, 4, 9, 1, 2, 8, 2, 4, 7, 4, 2, 9, 9, 2, 2, + 3, 6, 6, 6, 1, 2, 3, 3, 0, 9, 9, 2, 2, 3, 6, 6, 6, 1, 6, 5, 7, 3, 2, 2, 3, + 4, 1, 1, 1, 3, 6, 5, 7, 3, 2, 2, 3, 4, 1, 1, 4, 2, 2, 8, 5, 3, 1, 8, 4, 1, + 0, 4, 4, 2, 2, 8, 5, 3, 1, 8, 4, 1, 5, 5, 4, 9, 7, 7, 5, 5, 3, 3, 0, 0, 1, + 5, 5, 4, 9, 7, 7, 5, 5, 3, 7, 8, 5, 6, 8, 8, 7, 9, 6, 0, 0, 3, 2, 2, 7, 8, + 5, 6, 8, 8, 7, 9, 6, 3, 2, 2, 5, 3, 3, 0, 6, 3, 2, 2, 5, 3, 3, 0, 6, 3, 2, + 2, 5, 3, 6, 7, 8, 4, 6, 1, 3, 9, 4, 1, 5, 8, 6, 7, 8, 4, 6, 1, 3, 9, 4, 1, + 7, 3, 3, 4, 8, 5, 1, 1, 7, 9, 8, 5, 1, 6, 9, 5, 7, 3, 3, 4, 8, 5, 9, 6, 5, + 3, 5, 6, 9, 6, 1, 1, 6, 9, 8, 8, 8, 3, 0, 3, 8, 6, 6, 6, 8, 8, 5, 3, 3, 8, + 2, 1, 5, 8, 9, 7, 1, 5, 4, 8, 8, 5, 3, 3, 8, 2, 8, 7, 6, 6, 4, 3, 1, 3, 5, + 1, 8, 8, 7, 6, 6, 4, 3, 1, 3, 5, 1, 8, 5, 2, 2, 4, 6, 2, 4, 0, 5, 2, 2, 4, + 6, 2, 4, 0, 5, 2, 2, 4, 6, 2, 7, 5, 2, 3, 6, 7, 5, 2, 2, 9, 8, 2, 5, 7, 5, + 2, 3, 6, 7, 5, 2, 2, 7, 5, 2, 3, 7, 8, 6, 0, 1, 5, 7, 6, 3, 8, 7, 5, 2, 3, + 7, 8, 6, 0, 8, 4, 2, 4, 8, 7, 0, 0, 7, 8, 7, 4, 7, 8, 4, 2, 4, 8, 7, 0, 0, + 7, 9, 7, 3, 2, 6, 7, 6, 5, 0, 0, 6, 7, 9, 7, 3, 9, 7, 3, 2, 6, 7, 6, 7, 6, + 3, 2, 5, 8, 2, 5, 8, 2, 2, 8, 4, 7, 6, 3, 2, 5, 8, 2, 5, 8, 7, 5, 3, 4, 6, + 8, 0, 0, 1, 7, 5, 3, 4, 6, 8, 0, 0, 1, 7, 5, 3, 4, 7, 7, 9, 3, 2, 0, 3, 9, + 6, 4, 5, 3, 2, 6, 3, 0, 7, 7, 9, 3, 2, 0, 8, 9, 8, 7, 2, 0, 2, 7, 8, 9, 6, + 5, 6, 9, 7, 2, 2, 7, 2, 0, 2, 8, 7, 7, 9, 4, 0, 3, 3, 5, 4, 7, 6, 3, 3, 0, + 5, 7, 7, 9, 4, 0, 3, 3, 6, 4, 3, 5, 7, 8, 0, 0, 1, 6, 4, 3, 5, 7, 8, 0, 0, + 1, 6, 4, 3, 5, 8, 9, 9, 9, 7, 4, 4, 4, 2, 1, 4, 7, 9, 5, 0, 4, 2, 9, 8, 9, + 9, 9, 9, 9, 9, 6, 5, 8, 6, 3, 2, 3, 6, 9, 4, 1, 4, 9, 1, 1, 9, 9, 9, 6, 8, + 9, 9, 8, 4, 4, 4, 6, 7, 3, 1, 2, 4, 0, 4, 9, 9, 1, 8, 9, 9, 8}; + max_len_map_[68] = 22; + // parameters for num_landmarks_ == 98 + reverse_index1_map_[98] = { + 1, 2, 3, 4, 5, 33, 1, 2, 3, 4, 5, 33, 1, 2, 3, 4, 5, 0, + 2, 3, 4, 5, 6, 33, 0, 2, 3, 4, 5, 6, 33, 0, 2, 3, 0, 1, + 3, 4, 5, 6, 0, 1, 3, 4, 5, 6, 0, 1, 3, 4, 5, 0, 1, 2, + 4, 5, 6, 7, 0, 1, 2, 4, 5, 6, 7, 0, 1, 2, 0, 1, 2, 3, + 5, 6, 7, 8, 0, 1, 2, 3, 5, 6, 7, 8, 0, 1, 2, 3, 4, 6, + 7, 8, 9, 1, 2, 3, 4, 6, 7, 8, 9, 1, 2, 3, 4, 5, 7, 8, + 9, 10, 2, 3, 4, 5, 7, 8, 9, 10, 2, 3, 4, 5, 6, 8, 9, 10, + 3, 4, 5, 6, 8, 9, 10, 3, 4, 5, 4, 5, 6, 7, 9, 10, 11, 4, + 5, 6, 7, 9, 10, 11, 4, 5, 6, 4, 5, 6, 7, 8, 10, 11, 12, 4, + 5, 6, 7, 8, 10, 11, 12, 4, 5, 6, 7, 8, 9, 11, 12, 13, 76, 5, + 6, 7, 8, 9, 11, 12, 13, 7, 8, 9, 10, 12, 13, 14, 76, 88, 7, 8, + 9, 10, 12, 13, 14, 76, 8, 9, 10, 11, 13, 14, 15, 8, 9, 10, 11, 13, + 14, 15, 8, 9, 10, 10, 11, 12, 14, 15, 16, 10, 11, 12, 14, 15, 16, 10, + 11, 12, 14, 15, 11, 12, 13, 15, 16, 17, 11, 12, 13, 15, 16, 17, 11, 12, + 13, 15, 16, 12, 13, 14, 16, 17, 18, 12, 13, 14, 16, 17, 18, 12, 13, 14, + 16, 17, 13, 14, 15, 17, 18, 19, 13, 14, 15, 17, 18, 19, 13, 14, 15, 17, + 18, 14, 15, 16, 18, 19, 20, 14, 15, 16, 18, 19, 20, 14, 15, 16, 18, 19, + 15, 16, 17, 19, 20, 21, 15, 16, 17, 19, 20, 21, 15, 16, 17, 19, 20, 16, + 17, 18, 20, 21, 22, 16, 17, 18, 20, 21, 22, 16, 17, 18, 20, 21, 17, 18, + 19, 21, 22, 23, 24, 17, 18, 19, 21, 22, 23, 24, 17, 18, 19, 18, 19, 20, + 22, 23, 24, 25, 82, 18, 19, 20, 22, 23, 24, 25, 82, 18, 19, 20, 21, 23, + 24, 25, 26, 27, 19, 20, 21, 23, 24, 25, 26, 27, 19, 20, 21, 22, 24, 25, + 26, 27, 28, 20, 21, 22, 24, 25, 26, 27, 28, 20, 21, 22, 23, 25, 26, 27, + 28, 21, 22, 23, 25, 26, 27, 28, 21, 22, 23, 21, 22, 23, 24, 26, 27, 28, + 29, 21, 22, 23, 24, 26, 27, 28, 29, 21, 22, 23, 24, 25, 27, 28, 29, 30, + 22, 23, 24, 25, 27, 28, 29, 30, 22, 23, 24, 25, 26, 28, 29, 30, 31, 23, + 24, 25, 26, 28, 29, 30, 31, 23, 24, 25, 26, 27, 29, 30, 31, 32, 24, 25, + 26, 27, 29, 30, 31, 32, 24, 25, 26, 27, 28, 30, 31, 32, 25, 26, 27, 28, + 30, 31, 32, 25, 26, 27, 26, 27, 28, 29, 31, 32, 26, 27, 28, 29, 31, 32, + 26, 27, 28, 29, 31, 26, 27, 28, 29, 30, 32, 46, 26, 27, 28, 29, 30, 32, + 46, 26, 27, 28, 27, 28, 29, 30, 31, 46, 27, 28, 29, 30, 31, 46, 27, 28, + 29, 30, 31, 0, 1, 2, 3, 34, 41, 60, 0, 1, 2, 3, 34, 41, 60, 0, + 1, 2, 0, 33, 35, 40, 41, 60, 0, 33, 35, 40, 41, 60, 0, 33, 35, 40, + 41, 33, 34, 36, 37, 39, 40, 41, 60, 61, 62, 33, 34, 36, 37, 39, 40, 41, + 34, 35, 37, 38, 39, 40, 63, 64, 34, 35, 37, 38, 39, 40, 63, 64, 34, 36, + 38, 39, 51, 64, 36, 38, 39, 51, 64, 36, 38, 39, 51, 64, 36, 38, 36, 37, + 39, 51, 52, 63, 64, 65, 36, 37, 39, 51, 52, 63, 64, 65, 36, 35, 36, 37, + 38, 40, 62, 63, 64, 65, 66, 67, 96, 35, 36, 37, 38, 40, 33, 34, 35, 36, + 37, 38, 39, 41, 60, 61, 62, 63, 65, 66, 67, 96, 33, 0, 1, 2, 33, 34, + 35, 40, 60, 61, 67, 0, 1, 2, 33, 34, 35, 40, 43, 49, 50, 51, 68, 43, + 49, 50, 51, 68, 43, 49, 50, 51, 68, 43, 49, 42, 44, 45, 48, 49, 50, 68, + 69, 42, 44, 45, 48, 49, 50, 68, 69, 42, 42, 43, 45, 46, 47, 48, 49, 70, + 42, 43, 45, 46, 47, 48, 49, 70, 42, 32, 44, 46, 47, 48, 71, 72, 73, 32, + 44, 46, 47, 48, 71, 72, 73, 32, 29, 30, 31, 32, 45, 47, 72, 29, 30, 31, + 32, 45, 47, 72, 29, 30, 31, 30, 31, 32, 44, 45, 46, 48, 71, 72, 73, 30, + 31, 32, 44, 45, 46, 48, 42, 43, 44, 45, 46, 47, 49, 50, 69, 70, 71, 72, + 73, 74, 75, 97, 42, 42, 43, 44, 48, 50, 68, 69, 70, 74, 75, 97, 42, 43, + 44, 48, 50, 68, 42, 43, 49, 51, 52, 68, 69, 75, 42, 43, 49, 51, 52, 68, + 69, 75, 42, 37, 38, 42, 50, 52, 53, 64, 68, 37, 38, 42, 50, 52, 53, 64, + 68, 37, 51, 53, 54, 51, 53, 54, 51, 53, 54, 51, 53, 54, 51, 53, 54, 51, + 53, 51, 52, 54, 55, 56, 57, 59, 51, 52, 54, 55, 56, 57, 59, 51, 52, 54, + 52, 53, 55, 56, 57, 58, 59, 52, 53, 55, 56, 57, 58, 59, 52, 53, 55, 53, + 54, 56, 57, 76, 77, 78, 88, 53, 54, 56, 57, 76, 77, 78, 88, 53, 53, 54, + 55, 57, 58, 77, 78, 79, 88, 53, 54, 55, 57, 58, 77, 78, 79, 53, 54, 55, + 56, 58, 59, 78, 79, 80, 90, 53, 54, 55, 56, 58, 59, 78, 53, 54, 56, 57, + 59, 79, 80, 81, 82, 92, 53, 54, 56, 57, 59, 79, 80, 53, 54, 57, 58, 80, + 81, 82, 92, 53, 54, 57, 58, 80, 81, 82, 92, 53, 0, 1, 2, 3, 4, 33, + 34, 41, 61, 62, 66, 67, 96, 0, 1, 2, 3, 0, 1, 33, 34, 35, 40, 41, + 60, 62, 63, 65, 66, 67, 96, 0, 1, 33, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 60, 61, 63, 64, 65, 66, 67, 96, 35, 36, 37, 38, 39, 40, 51, 52, 61, + 62, 64, 65, 66, 67, 96, 35, 36, 36, 37, 38, 39, 51, 52, 53, 63, 65, 66, + 96, 36, 37, 38, 39, 51, 52, 36, 37, 38, 39, 52, 61, 62, 63, 64, 66, 67, + 96, 36, 37, 38, 39, 52, 41, 60, 61, 62, 63, 64, 65, 67, 96, 41, 60, 61, + 62, 63, 64, 65, 67, 0, 1, 2, 3, 33, 34, 35, 40, 41, 60, 61, 62, 65, + 66, 96, 0, 1, 42, 43, 49, 50, 51, 52, 53, 69, 74, 75, 97, 42, 43, 49, + 50, 51, 52, 42, 43, 44, 48, 49, 50, 51, 68, 70, 71, 73, 74, 75, 97, 42, + 43, 44, 42, 43, 44, 45, 46, 47, 48, 49, 50, 68, 69, 71, 72, 73, 74, 75, + 97, 31, 32, 44, 45, 46, 47, 48, 69, 70, 72, 73, 74, 75, 97, 31, 32, 44, + 28, 29, 30, 31, 32, 45, 46, 47, 70, 71, 73, 74, 97, 28, 29, 30, 31, 29, + 30, 31, 32, 44, 45, 46, 47, 48, 70, 71, 72, 74, 75, 97, 29, 30, 47, 68, + 69, 70, 71, 72, 73, 75, 97, 47, 68, 69, 70, 71, 72, 73, 75, 42, 43, 49, + 50, 52, 68, 69, 70, 71, 72, 73, 74, 97, 42, 43, 49, 50, 6, 7, 8, 9, + 10, 11, 12, 55, 77, 87, 88, 89, 95, 6, 7, 8, 9, 55, 56, 76, 78, 86, + 87, 88, 89, 95, 55, 56, 76, 78, 86, 87, 88, 89, 54, 55, 56, 57, 58, 76, + 77, 79, 80, 85, 86, 87, 88, 89, 90, 94, 95, 54, 55, 56, 57, 58, 59, 77, + 78, 80, 81, 84, 85, 86, 89, 90, 91, 94, 54, 57, 58, 59, 78, 79, 81, 82, + 83, 84, 85, 90, 91, 92, 93, 94, 54, 58, 59, 80, 82, 83, 84, 91, 92, 93, + 58, 59, 80, 82, 83, 84, 91, 92, 20, 21, 22, 23, 24, 25, 26, 59, 81, 83, + 91, 92, 93, 20, 21, 22, 23, 17, 18, 19, 20, 21, 22, 23, 81, 82, 84, 91, + 92, 93, 17, 18, 19, 20, 16, 17, 18, 19, 20, 81, 82, 83, 85, 91, 92, 93, + 94, 16, 17, 18, 19, 14, 15, 16, 17, 18, 83, 84, 86, 87, 90, 93, 94, 95, + 14, 15, 16, 17, 11, 12, 13, 14, 15, 16, 76, 77, 85, 87, 88, 89, 94, 95, + 11, 12, 13, 9, 10, 11, 12, 13, 14, 76, 77, 86, 88, 89, 95, 9, 10, 11, + 12, 13, 7, 8, 9, 10, 11, 12, 13, 55, 76, 77, 86, 87, 89, 95, 7, 8, + 9, 55, 56, 76, 77, 78, 79, 86, 87, 88, 90, 95, 55, 56, 76, 77, 78, 79, + 56, 57, 58, 78, 79, 80, 83, 84, 85, 86, 87, 89, 91, 92, 93, 94, 95, 58, + 59, 79, 80, 81, 82, 83, 84, 85, 90, 92, 93, 94, 58, 59, 79, 80, 19, 20, + 21, 22, 23, 24, 25, 59, 81, 82, 83, 84, 91, 93, 19, 20, 21, 18, 19, 79, + 80, 81, 82, 83, 84, 85, 90, 91, 92, 94, 18, 19, 79, 80, 15, 16, 17, 78, + 79, 80, 83, 84, 85, 86, 87, 89, 90, 91, 93, 95, 15, 13, 14, 15, 76, 77, + 78, 85, 86, 87, 88, 89, 90, 94, 13, 14, 15, 76, 34, 35, 36, 38, 39, 40, + 41, 60, 61, 62, 63, 64, 65, 66, 67, 34, 35, 43, 44, 45, 47, 48, 49, 50, + 68, 69, 70, 71, 72, 73, 74, 75, 43, 44}; + reverse_index2_map_[98] = { + 0, 2, 4, 6, 8, 4, 0, 2, 4, 6, 8, 4, 0, 2, 4, 6, 8, 0, 0, 2, 4, 6, 8, 8, 0, + 0, 2, 4, 6, 8, 8, 0, 0, 2, 1, 1, 0, 2, 4, 6, 1, 1, 0, 2, 4, 6, 1, 1, 0, 2, + 4, 3, 2, 1, 0, 2, 4, 6, 3, 2, 1, 0, 2, 4, 6, 3, 2, 1, 6, 3, 3, 1, 0, 2, 4, + 7, 6, 3, 3, 1, 0, 2, 4, 7, 6, 6, 4, 3, 1, 0, 2, 4, 8, 6, 4, 3, 1, 0, 2, 4, + 8, 6, 7, 5, 3, 1, 0, 2, 4, 9, 7, 5, 3, 1, 0, 2, 4, 9, 7, 6, 5, 3, 1, 0, 2, + 4, 6, 5, 3, 1, 0, 2, 4, 6, 5, 3, 7, 5, 3, 1, 0, 2, 4, 7, 5, 3, 1, 0, 2, 4, + 7, 5, 3, 9, 7, 5, 3, 1, 0, 2, 5, 9, 7, 5, 3, 1, 0, 2, 5, 9, 9, 7, 5, 3, 1, + 0, 2, 5, 8, 9, 7, 5, 3, 1, 0, 2, 5, 7, 5, 3, 1, 0, 2, 5, 9, 9, 7, 5, 3, 1, + 0, 2, 5, 9, 9, 5, 3, 1, 0, 2, 4, 9, 5, 3, 1, 0, 2, 4, 9, 5, 3, 6, 3, 1, 0, + 2, 6, 6, 3, 1, 0, 2, 6, 6, 3, 1, 0, 2, 7, 3, 1, 0, 3, 7, 7, 3, 1, 0, 3, 7, + 7, 3, 1, 0, 3, 6, 3, 1, 1, 3, 6, 6, 3, 1, 1, 3, 6, 6, 3, 1, 1, 3, 7, 3, 1, + 1, 3, 7, 7, 3, 1, 1, 3, 7, 7, 3, 1, 1, 3, 6, 3, 0, 1, 3, 6, 6, 3, 0, 1, 3, + 6, 6, 3, 0, 1, 3, 7, 2, 0, 1, 3, 5, 7, 2, 0, 1, 3, 5, 7, 2, 0, 1, 3, 5, 2, + 0, 1, 3, 5, 5, 2, 0, 1, 3, 5, 5, 2, 0, 1, 3, 4, 2, 0, 1, 3, 5, 8, 4, 2, 0, + 1, 3, 5, 8, 4, 2, 0, 5, 2, 0, 1, 3, 5, 7, 9, 5, 2, 0, 1, 3, 5, 7, 9, 5, 4, + 2, 0, 1, 3, 5, 7, 9, 4, 2, 0, 1, 3, 5, 7, 9, 4, 4, 2, 0, 1, 3, 5, 7, 9, 4, + 2, 0, 1, 3, 5, 7, 9, 4, 4, 2, 0, 1, 3, 5, 7, 4, 2, 0, 1, 3, 5, 7, 4, 2, 0, + 9, 4, 2, 0, 1, 3, 5, 6, 9, 4, 2, 0, 1, 3, 5, 6, 9, 9, 4, 2, 0, 1, 3, 5, 6, + 9, 4, 2, 0, 1, 3, 5, 6, 9, 8, 4, 2, 0, 1, 3, 4, 6, 8, 4, 2, 0, 1, 3, 4, 6, + 8, 6, 4, 2, 0, 1, 3, 3, 5, 6, 4, 2, 0, 1, 3, 3, 5, 6, 6, 4, 2, 0, 1, 2, 3, + 6, 4, 2, 0, 1, 2, 3, 6, 4, 2, 6, 4, 2, 0, 1, 1, 6, 4, 2, 0, 1, 1, 6, 4, 2, + 0, 1, 8, 6, 4, 2, 0, 0, 9, 8, 6, 4, 2, 0, 0, 9, 8, 6, 4, 8, 6, 4, 2, 0, 6, + 8, 6, 4, 2, 0, 6, 8, 6, 4, 2, 0, 2, 4, 5, 8, 3, 1, 6, 2, 4, 5, 8, 3, 1, 6, + 2, 4, 5, 7, 1, 1, 5, 0, 8, 7, 1, 1, 5, 0, 8, 7, 1, 1, 5, 0, 7, 1, 2, 8, 6, + 0, 5, 9, 8, 8, 7, 1, 2, 8, 6, 0, 5, 8, 2, 1, 4, 0, 6, 7, 9, 8, 2, 1, 4, 0, + 6, 7, 9, 8, 1, 0, 5, 5, 7, 1, 0, 5, 5, 7, 1, 0, 5, 5, 7, 1, 0, 4, 0, 2, 2, + 6, 6, 2, 8, 4, 0, 2, 2, 6, 6, 2, 8, 4, 4, 0, 2, 1, 4, 7, 4, 4, 5, 9, 9, 7, + 4, 0, 2, 1, 4, 5, 2, 0, 3, 9, 9, 4, 2, 7, 5, 4, 8, 9, 8, 6, 6, 5, 5, 7, 9, + 0, 0, 3, 3, 2, 6, 7, 5, 7, 9, 0, 0, 3, 3, 2, 5, 0, 6, 7, 2, 5, 0, 6, 7, 2, + 5, 0, 6, 7, 2, 5, 1, 1, 8, 5, 0, 4, 9, 7, 1, 1, 8, 5, 0, 4, 9, 7, 1, 8, 1, + 1, 7, 4, 0, 6, 9, 8, 1, 1, 7, 4, 0, 6, 9, 8, 7, 2, 1, 0, 6, 9, 8, 9, 7, 2, + 1, 0, 6, 9, 8, 9, 7, 8, 5, 4, 2, 2, 1, 6, 8, 5, 4, 2, 2, 1, 6, 8, 5, 4, 9, + 7, 6, 3, 0, 0, 3, 6, 2, 7, 9, 7, 6, 3, 0, 0, 3, 7, 3, 0, 3, 5, 2, 2, 9, 8, + 4, 5, 7, 6, 7, 9, 6, 7, 2, 0, 4, 2, 1, 3, 2, 7, 9, 5, 8, 2, 0, 4, 2, 1, 3, + 0, 4, 3, 1, 5, 2, 6, 8, 0, 4, 3, 1, 5, 2, 6, 8, 0, 5, 6, 5, 5, 1, 5, 8, 8, + 5, 6, 5, 5, 1, 5, 8, 8, 5, 0, 1, 9, 0, 1, 9, 0, 1, 9, 0, 1, 9, 0, 1, 9, 0, + 1, 7, 0, 1, 9, 9, 9, 9, 7, 0, 1, 9, 9, 9, 9, 7, 0, 1, 4, 0, 5, 2, 0, 2, 4, + 4, 0, 5, 2, 0, 2, 4, 4, 0, 5, 6, 5, 0, 8, 6, 6, 9, 6, 6, 5, 0, 8, 6, 6, 9, + 6, 6, 3, 2, 0, 2, 7, 7, 5, 7, 8, 3, 2, 0, 2, 7, 7, 5, 7, 2, 0, 2, 1, 1, 2, + 4, 3, 5, 7, 2, 0, 2, 1, 1, 2, 4, 4, 3, 7, 1, 0, 5, 4, 8, 8, 8, 4, 3, 7, 1, + 0, 5, 4, 7, 4, 7, 0, 9, 6, 6, 6, 7, 4, 7, 0, 9, 6, 6, 6, 7, 4, 5, 6, 7, 8, + 2, 5, 4, 1, 9, 6, 1, 9, 4, 5, 6, 7, 8, 9, 3, 4, 6, 2, 3, 1, 2, 9, 7, 4, 0, + 5, 8, 9, 3, 9, 6, 5, 6, 7, 7, 3, 1, 7, 4, 2, 3, 6, 4, 1, 4, 0, 8, 5, 3, 3, + 1, 8, 8, 9, 7, 3, 1, 0, 5, 8, 3, 8, 5, 8, 4, 2, 8, 4, 3, 9, 1, 1, 7, 8, 8, + 4, 2, 8, 4, 3, 9, 6, 5, 9, 7, 9, 6, 0, 0, 3, 5, 2, 9, 6, 5, 9, 7, 9, 3, 4, + 1, 5, 5, 3, 2, 1, 9, 3, 4, 1, 5, 5, 3, 2, 9, 8, 8, 9, 6, 7, 9, 9, 6, 0, 0, + 5, 6, 2, 4, 9, 8, 4, 8, 8, 2, 3, 2, 8, 1, 8, 1, 9, 4, 8, 8, 2, 3, 2, 3, 5, + 8, 8, 1, 3, 9, 0, 3, 7, 8, 5, 0, 5, 3, 5, 8, 9, 6, 5, 6, 8, 6, 1, 4, 7, 6, + 4, 2, 5, 4, 2, 4, 0, 9, 8, 6, 4, 3, 3, 4, 9, 1, 1, 0, 4, 7, 2, 9, 8, 6, 8, + 7, 7, 5, 4, 5, 2, 5, 8, 1, 1, 6, 7, 8, 7, 7, 5, 9, 8, 8, 9, 9, 7, 4, 7, 9, + 5, 0, 0, 1, 6, 3, 9, 8, 9, 5, 5, 2, 4, 3, 2, 3, 1, 9, 5, 5, 2, 4, 3, 2, 3, + 6, 9, 9, 6, 8, 1, 0, 6, 8, 9, 5, 3, 4, 6, 9, 9, 6, 9, 8, 6, 6, 5, 6, 7, 8, + 4, 2, 0, 8, 7, 9, 8, 6, 6, 1, 5, 2, 7, 5, 3, 2, 0, 3, 1, 5, 2, 7, 5, 3, 2, + 0, 7, 4, 3, 4, 9, 7, 5, 1, 3, 7, 7, 6, 7, 2, 2, 3, 4, 6, 7, 4, 3, 4, 6, 9, + 0, 0, 9, 9, 6, 9, 7, 0, 7, 2, 8, 5, 3, 3, 3, 2, 5, 7, 6, 7, 8, 3, 2, 7, 4, + 4, 8, 5, 1, 6, 2, 3, 5, 0, 2, 3, 5, 1, 6, 2, 3, 5, 0, 2, 7, 6, 6, 6, 7, 8, + 9, 8, 4, 2, 8, 0, 8, 7, 6, 6, 6, 8, 7, 6, 5, 7, 8, 9, 3, 1, 1, 3, 1, 2, 8, + 7, 6, 5, 7, 5, 4, 5, 9, 7, 5, 5, 1, 4, 5, 1, 5, 7, 5, 4, 5, 8, 5, 4, 6, 8, + 8, 2, 2, 8, 4, 9, 0, 9, 8, 5, 4, 6, 9, 8, 4, 4, 6, 8, 5, 8, 2, 5, 5, 4, 6, + 1, 9, 8, 4, 9, 8, 5, 4, 6, 7, 1, 3, 1, 1, 3, 2, 9, 8, 5, 4, 6, 9, 8, 7, 7, + 8, 9, 9, 6, 0, 2, 8, 1, 5, 5, 9, 8, 7, 3, 6, 3, 0, 2, 8, 3, 4, 3, 6, 0, 3, + 6, 3, 0, 2, 8, 8, 6, 8, 1, 0, 1, 9, 6, 3, 6, 9, 6, 6, 9, 7, 1, 8, 6, 5, 6, + 2, 0, 3, 4, 3, 9, 5, 3, 0, 9, 6, 5, 6, 2, 9, 8, 8, 7, 7, 9, 9, 7, 2, 0, 1, + 8, 5, 5, 9, 8, 8, 9, 8, 9, 8, 1, 4, 0, 0, 4, 8, 1, 4, 7, 9, 8, 9, 8, 8, 9, + 9, 6, 4, 7, 7, 4, 0, 4, 7, 9, 1, 9, 6, 6, 8, 8, 9, 9, 4, 1, 8, 5, 0, 0, 4, + 1, 9, 8, 8, 9, 9, 4, 9, 7, 7, 8, 7, 7, 8, 5, 3, 0, 2, 3, 2, 0, 3, 9, 7, 7, + 7, 9, 8, 7, 7, 8, 4, 3, 0, 3, 4, 3, 0, 2, 7, 7}; + max_len_map_[98] = 17; + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool PIPNet::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + // Resize + int resize_w = size_[0]; + int resize_h = size_[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + // RGR2RGB + BGR2RGB::Run(mat); + + // Normalize + Normalize::Run(mat, mean_vals_, std_vals_); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool PIPNet::Postprocess( + std::vector &infer_result, FaceAlignmentResult *result, + const std::map> &im_info) { + FDASSERT(infer_result.at(0).shape[0] == 1, "Only support batch = 1 now."); + if (infer_result.at(0).dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + auto iter_in = im_info.find("input_shape"); + FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info."); + int in_h = iter_in->second[0]; + int in_w = iter_in->second[1]; + GenerateLandmarks(infer_result, result, in_h, in_w); + + return true; +} + +bool PIPNet::Predict(cv::Mat *im, FaceAlignmentResult *result) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {mat.Height(), mat.Width()}; + im_info["output_shape"] = {mat.Height(), mat.Width()}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors, result, im_info)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace facealign +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h new file mode 100755 index 0000000000..55a4f724c0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h @@ -0,0 +1,133 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facealign { +/*! @brief PIPNet model object used when to load a PIPNet model exported by + * PIPNet. + */ +class ULTRAINFER_DECL PIPNet : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./pipnet.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + PIPNet(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "PIPNet"; } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, FaceAlignmentResult *result); + + /** \brief Get the number of landmakrs + * + * \return Integer type, default num_landmarks = 19 + */ + int GetNumLandmarks() { return num_landmarks_; } + /** \brief Get the mean values for normalization + * + * \return Vector of float values, default mean_vals = {0.485f, 0.456f, + * 0.406f} + */ + std::vector GetMeanVals() { return mean_vals_; } + /** \brief Get the std values for normalization + * + * \return Vector of float values, default std_vals = {0.229f, 0.224f, 0.225f} + */ + std::vector GetStdVals() { return std_vals_; } + /** \brief Get the input size of image + * + * \return Vector of int values, default {256, 256} + */ + std::vector GetSize() { return size_; } + /** \brief Set the number of landmarks + * + * \param[in] num_landmarks Integer value which represents number of landmarks + */ + void SetNumLandmarks(const int &num_landmarks); + /** \brief Set the mean values for normalization + * + * \param[in] mean_vals Vector of float values whose length is equal to 3 + */ + void SetMeanVals(const std::vector &mean_vals) { + mean_vals_ = mean_vals; + } + /** \brief Set the std values for normalization + * + * \param[in] std_vals Vector of float values whose length is equal to 3 + */ + void SetStdVals(const std::vector &std_vals) { std_vals_ = std_vals; } + /** \brief Set the input size of image + * + * \param[in] size Vector of int values which represents {width, height} of + * image + */ + void SetSize(const std::vector &size) { size_ = size; } + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(std::vector &infer_result, + FaceAlignmentResult *result, + const std::map> &im_info); + void GenerateLandmarks(std::vector &infer_result, + FaceAlignmentResult *result, float img_height, + float img_width); + std::map num_lms_map_; + std::map max_len_map_; + std::map> reverse_index1_map_; + std::map> reverse_index2_map_; + int num_nb_; + int net_stride_; + // Now PIPNet support num_landmarks in {19, 29, 68, 98} + std::vector supported_num_landmarks_; + // tuple of (width, height), default (256, 256) + std::vector size_; + + // Mean parameters for normalize, size should be the the same as channels, + // default mean_vals = {0.485f, 0.456f, 0.406f} + std::vector mean_vals_; + // Std parameters for normalize, size should be the the same as channels, + // default std_vals = {0.229f, 0.224f, 0.225f} + std::vector std_vals_; + // number of landmarks + int num_landmarks_; +}; + +} // namespace facealign +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc new file mode 100755 index 0000000000..cd379c4bf2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPIPNet(pybind11::module &m) { + pybind11::class_(m, "PIPNet") + .def(pybind11::init()) + .def("predict", + [](vision::facealign::PIPNet &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::FaceAlignmentResult res; + self.Predict(&mat, &res); + return res; + }) + .def_property("size", &vision::facealign::PIPNet::GetSize, + &vision::facealign::PIPNet::SetSize) + .def_property("mean_vals", &vision::facealign::PIPNet::GetMeanVals, + &vision::facealign::PIPNet::SetMeanVals) + .def_property("std_vals", &vision::facealign::PIPNet::GetStdVals, + &vision::facealign::PIPNet::SetStdVals) + .def_property("num_landmarks", + &vision::facealign::PIPNet::GetNumLandmarks, + &vision::facealign::PIPNet::SetNumLandmarks); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc new file mode 100755 index 0000000000..a1472d0ce4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindPFLD(pybind11::module &m); +void BindFaceLandmark1000(pybind11::module &m); +void BindPIPNet(pybind11::module &m); + +void BindFaceAlign(pybind11::module &m) { + auto facedet_module = m.def_submodule("facealign", "Face alignment models."); + BindPFLD(facedet_module); + BindFaceLandmark1000(facedet_module); + BindPIPNet(facedet_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc new file mode 100755 index 0000000000..cd203a1417 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/centerface/centerface.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +CenterFace::CenterFace(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool CenterFace::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool CenterFace::Predict(const cv::Mat &im, FaceDetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool CenterFace::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + FDASSERT(images.size() == 1, "Only support batch = 1 now."); + std::vector>> ims_info; + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h new file mode 100755 index 0000000000..bd3dcd8ac4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/facedet/contrib/centerface/postprocessor.h" +#include "ultrainfer/vision/facedet/contrib/centerface/preprocessor.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { +/*! @brief CenterFace model object used when to load a CenterFace model exported + * by CenterFace. + */ +class ULTRAINFER_DECL CenterFace : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./centerface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + CenterFace(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() { return "centerface"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, FaceDetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *results); + + /// Get preprocessor reference of CenterFace + virtual CenterFacePreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of CenterFace + virtual CenterFacePostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + CenterFacePreprocessor preprocessor_; + CenterFacePostprocessor postprocessor_; +}; + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc new file mode 100755 index 0000000000..d1a930fa62 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindCenterFace(pybind11::module &m) { + pybind11::class_( + m, "CenterFacePreprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::facedet::CenterFacePreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error("Failed to preprocess the input data " + "in CenterFacePreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::facedet::CenterFacePreprocessor::GetSize, + &vision::facedet::CenterFacePreprocessor::SetSize); + + pybind11::class_( + m, "CenterFacePostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::facedet::CenterFacePostprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in CenterFacePostprocessor."); + } + return results; + }) + .def("run", + [](vision::facedet::CenterFacePostprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in CenterFacePostprocessor."); + } + return results; + }) + .def_property("conf_threshold", + &vision::facedet::CenterFacePostprocessor::GetConfThreshold, + &vision::facedet::CenterFacePostprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::facedet::CenterFacePostprocessor::GetNMSThreshold, + &vision::facedet::CenterFacePostprocessor::SetNMSThreshold); + + pybind11::class_(m, + "CenterFace") + .def(pybind11::init()) + .def("predict", + [](vision::facedet::CenterFace &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::FaceDetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::facedet::CenterFace &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::facedet::CenterFace::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::facedet::CenterFace::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc new file mode 100755 index 0000000000..f4d4637a6c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc @@ -0,0 +1,151 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/centerface/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +CenterFacePostprocessor::CenterFacePostprocessor() { + conf_threshold_ = 0.5; + nms_threshold_ = 0.3; + landmarks_per_face_ = 5; +} + +bool CenterFacePostprocessor::Run( + const std::vector &infer_result, + std::vector *results, + const std::vector>> &ims_info) { + int batch = infer_result[0].shape[0]; + + results->resize(batch); + FDTensor heatmap = infer_result[0]; //(1 1 160 160) + FDTensor scales = infer_result[1]; //(1 2 160 160) + FDTensor offsets = infer_result[2]; //(1 2 160 160) + FDTensor landmarks = infer_result[3]; //(1 10 160 160) + for (size_t bs = 0; bs < batch; ++bs) { + (*results)[bs].Clear(); + (*results)[bs].landmarks_per_face = landmarks_per_face_; + (*results)[bs].Reserve(heatmap.shape[2]); + if (infer_result[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + int fea_h = heatmap.shape[2]; + int fea_w = heatmap.shape[3]; + int spacial_size = fea_w * fea_h; + + float *heatmap_out = static_cast(heatmap.Data()); + + float *scale0 = static_cast(scales.Data()); + float *scale1 = scale0 + spacial_size; + + float *offset0 = static_cast(offsets.Data()); + float *offset1 = offset0 + spacial_size; + float confidence = 0.f; + + std::vector ids; + for (int i = 0; i < fea_h; i++) { + for (int j = 0; j < fea_w; j++) { + if (heatmap_out[i * fea_w + j] > conf_threshold_) { + ids.push_back(i); + ids.push_back(j); + } + } + } + + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale_h = ipt_h / out_h; + float scale_w = ipt_w / out_w; + + for (int i = 0; i < ids.size() / 2; i++) { + int id_h = ids[2 * i]; + int id_w = ids[2 * i + 1]; + int index = id_h * fea_w + id_w; + confidence = heatmap_out[index]; + + float s0 = std::exp(scale0[index]) * 4; + float s1 = std::exp(scale1[index]) * 4; + float o0 = offset0[index]; + float o1 = offset1[index]; + + float x1 = (id_w + o1 + 0.5) * 4 - s1 / 2 > 0.f + ? (id_w + o1 + 0.5) * 4 - s1 / 2 + : 0; + float y1 = (id_h + o0 + 0.5) * 4 - s0 / 2 > 0 + ? (id_h + o0 + 0.5) * 4 - s0 / 2 + : 0; + float x2 = 0, y2 = 0; + x1 = x1 < (float)out_w ? x1 : (float)out_w; + y1 = y1 < (float)out_h ? y1 : (float)out_h; + x2 = x1 + s1 < (float)out_w ? x1 + s1 : (float)out_w; + y2 = y1 + s0 < (float)out_h ? y1 + s0 : (float)out_h; + + (*results)[bs].boxes.emplace_back(std::array{x1, y1, x2, y2}); + (*results)[bs].scores.push_back(confidence); + // decode landmarks (default 5 landmarks) + if (landmarks_per_face_ > 0) { + // reference: utils/box_utils.py#L241 + for (size_t j = 0; j < landmarks_per_face_; j++) { + float *xmap = (float *)landmarks.Data() + (2 * j + 1) * spacial_size; + float *ymap = (float *)landmarks.Data() + (2 * j) * spacial_size; + float lx = (x1 + xmap[index] * s1) * scale_w; + float ly = (y1 + ymap[index] * s0) * scale_h; + (*results)[bs].landmarks.emplace_back(std::array{lx, ly}); + } + } + } + + if ((*results)[bs].boxes.size() == 0) { + return true; + } + + utils::NMS(&((*results)[bs]), nms_threshold_); + + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + (*results)[bs].boxes[i][0] = + std::max((*results)[bs].boxes[i][0] * scale_w, 0.0f); + (*results)[bs].boxes[i][1] = + std::max((*results)[bs].boxes[i][1] * scale_h, 0.0f); + (*results)[bs].boxes[i][2] = + std::max((*results)[bs].boxes[i][2] * scale_w, 0.0f); + (*results)[bs].boxes[i][3] = + std::max((*results)[bs].boxes[i][3] * scale_h, 0.0f); + (*results)[bs].boxes[i][0] = + std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f); + (*results)[bs].boxes[i][1] = + std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f); + (*results)[bs].boxes[i][2] = + std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f); + (*results)[bs].boxes[i][3] = + std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f); + } + } + return true; +} + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h new file mode 100755 index 0000000000..03f33f3704 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +class ULTRAINFER_DECL CenterFacePostprocessor { +public: + /*! @brief Postprocessor object for CenterFace serials model. + */ + CenterFacePostprocessor(); + + /** \brief Process the result of runtime and fill to FaceDetectionResult + * structure + * + * \param[in] infer_result The inference result from runtime + * \param[in] results The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &infer_result, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.5 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.5 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.3 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.3 + float GetNMSThreshold() const { return nms_threshold_; } + +protected: + float conf_threshold_; + float nms_threshold_; + int landmarks_per_face_; +}; + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc new file mode 100755 index 0000000000..9f4b5b1ecc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/centerface/preprocessor.h" +#include "ultrainfer/function/concat.h" +#include "ultrainfer/vision/common/processors/mat.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +CenterFacePreprocessor::CenterFacePreprocessor() { size_ = {640, 640}; } + +bool CenterFacePreprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); i++) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +bool CenterFacePreprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + // centerface's preprocess steps + // 1. Resize + // 2. ConvertAndPermute + Resize::Run(mat, size_[0], size_[1]); + std::vector alpha = {1.0f, 1.0f, 1.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, true); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); + return true; +} + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h new file mode 100755 index 0000000000..d84d690b1e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h @@ -0,0 +1,59 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +class ULTRAINFER_DECL CenterFacePreprocessor { +public: + /** \brief Create a preprocessor instance for CenterFace serials model + */ + CenterFacePreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \ret + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; +}; + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc new file mode 100755 index 0000000000..878b1c70d8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc @@ -0,0 +1,293 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/retinaface.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +struct RetinaAnchor { + float cx; + float cy; + float s_kx; + float s_ky; +}; + +void GenerateRetinaAnchors(const std::vector &size, + const std::vector &downsample_strides, + const std::vector> &min_sizes, + std::vector *anchors) { + // size: tuple of input (width, height) + // downsample_strides: downsample strides (steps), e.g (8,16,32) + // min_sizes: width and height for each anchor, + // e.g {{16, 32}, {64, 128}, {256, 512}} + int h = size[1]; + int w = size[0]; + std::vector> feature_maps; + for (auto s : downsample_strides) { + feature_maps.push_back( + {static_cast( + std::ceil(static_cast(h) / static_cast(s))), + static_cast( + std::ceil(static_cast(w) / static_cast(s)))}); + } + + (*anchors).clear(); + const size_t num_feature_map = feature_maps.size(); + // reference: layers/functions/prior_box.py#L21 + for (size_t k = 0; k < num_feature_map; ++k) { + auto f_map = feature_maps.at(k); // e.g [640//8,640//8] + auto tmp_min_sizes = min_sizes.at(k); // e.g [8,16] + int f_h = f_map.at(0); + int f_w = f_map.at(1); + for (size_t i = 0; i < f_h; ++i) { + for (size_t j = 0; j < f_w; ++j) { + for (auto min_size : tmp_min_sizes) { + float s_kx = + static_cast(min_size) / static_cast(w); // e.g 16/w + float s_ky = + static_cast(min_size) / static_cast(h); // e.g 16/h + // (x + 0.5) * step / w normalized loc mapping to input width + // (y + 0.5) * step / h normalized loc mapping to input height + float s = static_cast(downsample_strides.at(k)); + float cx = (static_cast(j) + 0.5f) * s / static_cast(w); + float cy = (static_cast(i) + 0.5f) * s / static_cast(h); + (*anchors).emplace_back( + RetinaAnchor{cx, cy, s_kx, s_ky}); // without clip + } + } + } + } +} + +RetinaFace::RetinaFace(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool RetinaFace::Initialize() { + // parameters for preprocess + size = {640, 640}; + variance = {0.1f, 0.2f}; + downsample_strides = {8, 16, 32}; + min_sizes = {{16, 32}, {64, 128}, {256, 512}}; + landmarks_per_face = 5; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + return true; +} + +bool RetinaFace::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // retinaface's preprocess steps + // 1. Resize + // 2. Convert(opencv style) or Normalize + // 3. HWC->CHW + int resize_w = size[0]; + int resize_h = size[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + // Compute `result = mat * alpha + beta` directly by channel + // Reference: detect.py#L94 + std::vector alpha = {1.f, 1.f, 1.f}; + std::vector beta = {-104.f, -117.f, -123.f}; // BGR; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool RetinaFace::Postprocess( + std::vector &infer_result, FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + // retinaface has 3 output tensors, boxes & conf & landmarks + FDASSERT( + (infer_result.size() == 3), + "The default number of output tensor must be 3 according to retinaface."); + FDTensor &boxes_tensor = infer_result.at(0); // (1,n,4) + FDTensor &conf_tensor = infer_result.at(1); // (1,n,2) + FDTensor &landmarks_tensor = infer_result.at(2); // (1,n,10) + FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now."); + if (boxes_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + result->Clear(); + // must be setup landmarks_per_face before reserve + result->landmarks_per_face = landmarks_per_face; + result->Reserve(boxes_tensor.shape[1]); + + float *boxes_ptr = static_cast(boxes_tensor.Data()); + float *conf_ptr = static_cast(conf_tensor.Data()); + float *landmarks_ptr = static_cast(landmarks_tensor.Data()); + const size_t num_bboxes = boxes_tensor.shape[1]; // n + // fetch original image shape + auto iter_ipt = im_info.find("input_shape"); + FDASSERT((iter_ipt != im_info.end()), + "Cannot find input_shape from im_info."); + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + + // generate anchors with dowmsample strides + std::vector anchors; + GenerateRetinaAnchors(size, downsample_strides, min_sizes, &anchors); + + // decode bounding boxes + for (size_t i = 0; i < num_bboxes; ++i) { + float confidence = conf_ptr[2 * i + 1]; + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + float prior_cx = anchors.at(i).cx; + float prior_cy = anchors.at(i).cy; + float prior_s_kx = anchors.at(i).s_kx; + float prior_s_ky = anchors.at(i).s_ky; + + // fetch offsets (dx,dy,dw,dh) + float dx = boxes_ptr[4 * i + 0]; + float dy = boxes_ptr[4 * i + 1]; + float dw = boxes_ptr[4 * i + 2]; + float dh = boxes_ptr[4 * i + 3]; + // reference: Pytorch_Retinaface/utils/box_utils.py + float x = prior_cx + dx * variance[0] * prior_s_kx; + float y = prior_cy + dy * variance[0] * prior_s_ky; + float w = prior_s_kx * std::exp(dw * variance[1]); + float h = prior_s_ky * std::exp(dh * variance[1]); // (0.~1.) + // from (x,y,w,h) to (x1,y1,x2,y2) + float x1 = (x - w / 2.f) * ipt_w; + float y1 = (y - h / 2.f) * ipt_h; + float x2 = (x + w / 2.f) * ipt_w; + float y2 = (y + h / 2.f) * ipt_h; + result->boxes.emplace_back(std::array{x1, y1, x2, y2}); + result->scores.push_back(confidence); + // decode landmarks (default 5 landmarks) + if (landmarks_per_face > 0) { + // reference: utils/box_utils.py#L241 + for (size_t j = 0; j < landmarks_per_face * 2; j += 2) { + float ldx = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 0)]; + float ldy = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 1)]; + float lx = (prior_cx + ldx * variance[0] * prior_s_kx) * ipt_w; + float ly = (prior_cy + ldy * variance[0] * prior_s_ky) * ipt_h; + result->landmarks.emplace_back(std::array{lx, ly}); + } + } + } + + if (result->boxes.size() == 0) { + return true; + } + + utils::NMS(result, nms_iou_threshold); + + // scale and clip box + for (size_t i = 0; i < result->boxes.size(); ++i) { + result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f); + result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f); + result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f); + result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + // scale and clip landmarks + for (size_t i = 0; i < result->landmarks.size(); ++i) { + result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f); + result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f); + result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f); + result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f); + } + return true; +} + +bool RetinaFace::Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold, float nms_iou_threshold) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors, result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h new file mode 100755 index 0000000000..12817516ce --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h @@ -0,0 +1,104 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { +/** \brief All object face detection model APIs are defined inside this + * namespace + * + */ +namespace facedet { +/*! @brief RetinaFace model object used when to load a RetinaFace model exported + * by RetinaFace. + */ +class ULTRAINFER_DECL RetinaFace : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./retinaface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + RetinaFace(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "Pytorch_Retinaface"; } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \param[in] conf_threshold + * confidence threashold for postprocessing, default is 0.25 \param[in] + * nms_iou_threshold iou threashold for NMS, default is 0.4 \return true if + * the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold = 0.25f, + float nms_iou_threshold = 0.4f); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default (640, 640) + */ + std::vector size; + /*! @brief + Argument for image postprocessing step, variance in RetinaFace's + prior-box(anchor) generate process, default (0.1, 0.2) + */ + std::vector variance; + /*! @brief + Argument for image postprocessing step, downsample strides (namely, steps) for + RetinaFace to generate anchors, will take (8,16,32) as default values + */ + std::vector downsample_strides; + /*! @brief + Argument for image postprocessing step, min sizes, width and height for each + anchor, default min_sizes = {{16, 32}, {64, 128}, {256, 512}} + */ + std::vector> min_sizes; + /*! @brief + Argument for image postprocessing step, landmarks_per_face, default 5 in + RetinaFace + */ + int landmarks_per_face; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(std::vector &infer_result, + FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + bool IsDynamicInput() const { return is_dynamic_input_; } + + bool is_dynamic_input_; +}; + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc new file mode 100755 index 0000000000..dd63bf7430 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindRetinaFace(pybind11::module &m) { + pybind11::class_(m, + "RetinaFace") + .def(pybind11::init()) + .def("predict", + [](vision::facedet::RetinaFace &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::FaceDetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::facedet::RetinaFace::size) + .def_readwrite("variance", &vision::facedet::RetinaFace::variance) + .def_readwrite("downsample_strides", + &vision::facedet::RetinaFace::downsample_strides) + .def_readwrite("min_sizes", &vision::facedet::RetinaFace::min_sizes) + .def_readwrite("landmarks_per_face", + &vision::facedet::RetinaFace::landmarks_per_face); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc new file mode 100755 index 0000000000..83ec974117 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc @@ -0,0 +1,375 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/scrfd.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +void SCRFD::LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill, bool scale_up, int stride) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +SCRFD::SCRFD(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool SCRFD::Initialize() { + // parameters for preprocess + use_kps = true; + size = {640, 640}; + padding_value = {0.0, 0.0, 0.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + downsample_strides = {8, 16, 32}; + num_anchors = 2; + landmarks_per_face = 5; + center_points_is_update_ = false; + max_nms = 30000; + // num_outputs = use_kps ? 9 : 6; + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + + return true; +} + +bool SCRFD::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_LINEAR; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + // scrfd's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + SCRFD::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, + is_scale_up, stride); + + BGR2RGB::Run(mat); + if (!disable_normalize_) { + // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), + // std::vector(mat->Channels(), 1.0)); + // Compute `result = mat * alpha + beta` directly by channel + // Original Repo/tools/scrfd.py: cv2.dnn.blobFromImage(img, 1.0/128, + // input_size, (127.5, 127.5, 127.5), swapRB=True) + std::vector alpha = {1.f / 128.f, 1.f / 128.f, 1.f / 128.f}; + std::vector beta = {-127.5f / 128.f, -127.5f / 128.f, + -127.5f / 128.f}; + Convert::Run(mat, alpha, beta); + } + + if (!disable_permute_) { + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +void SCRFD::GeneratePoints() { + if (center_points_is_update_ && !is_dynamic_input_) { + return; + } + // 8, 16, 32 + for (auto local_stride : downsample_strides) { + unsigned int num_grid_w = size[0] / local_stride; + unsigned int num_grid_h = size[1] / local_stride; + // y + for (unsigned int i = 0; i < num_grid_h; ++i) { + // x + for (unsigned int j = 0; j < num_grid_w; ++j) { + // num_anchors, col major + for (unsigned int k = 0; k < num_anchors; ++k) { + SCRFDPoint point; + point.cx = static_cast(j); + point.cy = static_cast(i); + center_points_[local_stride].push_back(point); + } + } + } + } + + center_points_is_update_ = true; +} + +bool SCRFD::Postprocess( + std::vector &infer_result, FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + // number of downsample_strides + int fmc = downsample_strides.size(); + // scrfd has 6,9,10,15 output tensors + FDASSERT((infer_result.size() == 9 || infer_result.size() == 6 || + infer_result.size() == 10 || infer_result.size() == 15), + "The default number of output tensor must be 6, 9, 10, or 15 " + "according to scrfd."); + FDASSERT((fmc == 3 || fmc == 5), "The fmc must be 3 or 5"); + FDASSERT((infer_result.at(0).shape[0] == 1), "Only support batch =1 now."); + for (int i = 0; i < fmc; ++i) { + if (infer_result.at(i).dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + } + int total_num_boxes = 0; + // compute the reserve space. + for (int f = 0; f < fmc; ++f) { + total_num_boxes += infer_result.at(f).shape[1]; + }; + GeneratePoints(); + result->Clear(); + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + if (!is_scale_up) { + scale = std::min(scale, 1.0f); + } + float pad_h = (out_h - ipt_h * scale) / 2.0f; + float pad_w = (out_w - ipt_w * scale) / 2.0f; + if (is_mini_pad) { + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + // must be setup landmarks_per_face before reserve + if (use_kps) { + result->landmarks_per_face = landmarks_per_face; + } else { + // force landmarks_per_face = 0, if use_kps has been set as 'false'. + result->landmarks_per_face = 0; + } + + result->Reserve(total_num_boxes); + unsigned int count = 0; + // loop each stride + for (int f = 0; f < fmc; ++f) { + float *score_ptr = static_cast(infer_result.at(f).Data()); + float *bbox_ptr = static_cast(infer_result.at(f + fmc).Data()); + const unsigned int num_points = infer_result.at(f).shape[1]; + int current_stride = downsample_strides[f]; + auto &stride_points = center_points_[current_stride]; + // loop each anchor + for (unsigned int i = 0; i < num_points; ++i) { + const float cls_conf = score_ptr[i]; + if (cls_conf < conf_threshold) + continue; // filter + auto &point = stride_points.at(i); + const float cx = point.cx; // cx + const float cy = point.cy; // cy + // bbox + const float *offsets = bbox_ptr + i * 4; + float l = offsets[0]; // left + float t = offsets[1]; // top + float r = offsets[2]; // right + float b = offsets[3]; // bottom + + float x1 = ((cx - l) * static_cast(current_stride) - + static_cast(pad_w)) / + scale; // cx - l x1 + float y1 = ((cy - t) * static_cast(current_stride) - + static_cast(pad_h)) / + scale; // cy - t y1 + float x2 = ((cx + r) * static_cast(current_stride) - + static_cast(pad_w)) / + scale; // cx + r x2 + float y2 = ((cy + b) * static_cast(current_stride) - + static_cast(pad_h)) / + scale; // cy + b y2 + result->boxes.emplace_back(std::array{x1, y1, x2, y2}); + result->scores.push_back(cls_conf); + if (use_kps) { + float *landmarks_ptr = + static_cast(infer_result.at(f + 2 * fmc).Data()); + // landmarks + const float *kps_offsets = landmarks_ptr + i * (landmarks_per_face * 2); + for (unsigned int j = 0; j < landmarks_per_face * 2; j += 2) { + float kps_l = kps_offsets[j]; + float kps_t = kps_offsets[j + 1]; + float kps_x = ((cx + kps_l) * static_cast(current_stride) - + static_cast(pad_w)) / + scale; // cx + l x + float kps_y = ((cy + kps_t) * static_cast(current_stride) - + static_cast(pad_h)) / + scale; // cy + t y + result->landmarks.emplace_back(std::array{kps_x, kps_y}); + } + } + count += 1; // limit boxes for nms. + if (count > max_nms) { + break; + } + } + } + + // fetch original image shape + FDASSERT((iter_ipt != im_info.end()), + "Cannot find input_shape from im_info."); + + if (result->boxes.size() == 0) { + return true; + } + + utils::NMS(result, nms_iou_threshold); + + // scale and clip box + for (size_t i = 0; i < result->boxes.size(); ++i) { + result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f); + result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f); + result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f); + result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + // scale and clip landmarks + if (use_kps) { + for (size_t i = 0; i < result->landmarks.size(); ++i) { + result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f); + result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f); + result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f); + result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f); + } + } + return true; +} + +bool SCRFD::Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold, float nms_iou_threshold) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors, result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +void SCRFD::DisableNormalize() { disable_normalize_ = true; } + +void SCRFD::DisablePermute() { disable_permute_ = true; } +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h new file mode 100755 index 0000000000..1b6ae30eac --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h @@ -0,0 +1,142 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include + +namespace ultrainfer { + +namespace vision { + +namespace facedet { +/*! @brief SCRFD model object used when to load a SCRFD model exported by SCRFD. + */ +class ULTRAINFER_DECL SCRFD : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./scrfd.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + SCRFD(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "scrfd"; } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \param[in] conf_threshold + * confidence threashold for postprocessing, default is 0.25 \param[in] + * nms_iou_threshold iou threashold for NMS, default is 0.4 \return true if + * the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold = 0.25f, + float nms_iou_threshold = 0.4f); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default (640, 640) + */ + std::vector size; + // padding value, size should be the same as channels + + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + /*! @brief + Argument for image postprocessing step, downsample strides (namely, steps) for + SCRFD to generate anchors, will take (8,16,32) as default values + */ + std::vector downsample_strides; + /*! @brief + Argument for image postprocessing step, landmarks_per_face, default 5 in SCRFD + */ + int landmarks_per_face; + /*! @brief + Argument for image postprocessing step, the outputs of onnx file with key + points features or not, default true + */ + bool use_kps; + /*! @brief + Argument for image postprocessing step, the upperbond number of boxes + processed by nms, default 30000 + */ + int max_nms; + /*! @brief + Argument for image postprocessing step, anchor number of each stride, default + 2 + */ + unsigned int num_anchors; + + /// This function will disable normalize and hwc2chw in preprocessing step. + void DisableNormalize(); + + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute(); + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(std::vector &infer_result, + FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + void GeneratePoints(); + + void LetterBox(Mat *mat, const std::vector &size, + const std::vector &color, bool _auto, + bool scale_fill = false, bool scale_up = true, + int stride = 32); + + bool is_dynamic_input_; + + bool center_points_is_update_; + + typedef struct { + float cx; + float cy; + } SCRFDPoint; + + std::unordered_map> center_points_; + + // for recording the switch of normalize + bool disable_normalize_ = false; + // for recording the switch of hwc2chw + bool disable_permute_ = false; +}; +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc new file mode 100755 index 0000000000..26650759a1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindSCRFD(pybind11::module &m) { + // Bind SCRFD + pybind11::class_(m, "SCRFD") + .def(pybind11::init()) + .def("predict", + [](vision::facedet::SCRFD &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::FaceDetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def("disable_normalize", &vision::facedet::SCRFD::DisableNormalize) + .def("disable_permute", &vision::facedet::SCRFD::DisablePermute) + .def_readwrite("size", &vision::facedet::SCRFD::size) + .def_readwrite("padding_value", &vision::facedet::SCRFD::padding_value) + .def_readwrite("is_mini_pad", &vision::facedet::SCRFD::is_mini_pad) + .def_readwrite("is_no_pad", &vision::facedet::SCRFD::is_no_pad) + .def_readwrite("is_scale_up", &vision::facedet::SCRFD::is_scale_up) + .def_readwrite("stride", &vision::facedet::SCRFD::stride) + .def_readwrite("use_kps", &vision::facedet::SCRFD::use_kps) + .def_readwrite("max_nms", &vision::facedet::SCRFD::max_nms) + .def_readwrite("downsample_strides", + &vision::facedet::SCRFD::downsample_strides) + .def_readwrite("num_anchors", &vision::facedet::SCRFD::num_anchors) + .def_readwrite("landmarks_per_face", + &vision::facedet::SCRFD::landmarks_per_face); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc new file mode 100755 index 0000000000..b4e7e991d6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc @@ -0,0 +1,203 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/ultraface.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +UltraFace::UltraFace(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool UltraFace::Initialize() { + // parameters for preprocess + size = {320, 240}; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + return true; +} + +bool UltraFace::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // ultraface's preprocess steps + // 1. resize + // 2. BGR->RGB + // 3. HWC->CHW + int resize_w = size[0]; + int resize_h = size[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + BGR2RGB::Run(mat); + // Compute `result = mat * alpha + beta` directly by channel + // Reference: detect_imgs_onnx.py#L73 + std::vector alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f}; + std::vector beta = {-127.0f * (1.0f / 128.0f), + -127.0f * (1.0f / 128.0f), + -127.0f * (1.0f / 128.0f)}; // RGB; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool UltraFace::Postprocess( + std::vector &infer_result, FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + // ultraface has 2 output tensors, scores & boxes + FDASSERT( + (infer_result.size() == 2), + "The default number of output tensor must be 2 according to ultraface."); + FDTensor &scores_tensor = infer_result.at(0); // (1,4420,2) + FDTensor &boxes_tensor = infer_result.at(1); // (1,4420,4) + FDASSERT((scores_tensor.shape[0] == 1), "Only support batch =1 now."); + FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now."); + if (scores_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + if (boxes_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + result->Clear(); + // must be setup landmarks_per_face before reserve. + // ultraface detector does not detect landmarks by default. + result->landmarks_per_face = 0; + result->Reserve(boxes_tensor.shape[1]); + + float *scores_ptr = static_cast(scores_tensor.Data()); + float *boxes_ptr = static_cast(boxes_tensor.Data()); + const size_t num_bboxes = boxes_tensor.shape[1]; // e.g 4420 + // fetch original image shape + auto iter_ipt = im_info.find("input_shape"); + FDASSERT((iter_ipt != im_info.end()), + "Cannot find input_shape from im_info."); + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + + // decode bounding boxes + for (size_t i = 0; i < num_bboxes; ++i) { + float confidence = scores_ptr[2 * i + 1]; + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + float x1 = boxes_ptr[4 * i + 0] * ipt_w; + float y1 = boxes_ptr[4 * i + 1] * ipt_h; + float x2 = boxes_ptr[4 * i + 2] * ipt_w; + float y2 = boxes_ptr[4 * i + 3] * ipt_h; + result->boxes.emplace_back(std::array{x1, y1, x2, y2}); + result->scores.push_back(confidence); + } + + if (result->boxes.size() == 0) { + return true; + } + + utils::NMS(result, nms_iou_threshold); + + // scale and clip box + for (size_t i = 0; i < result->boxes.size(); ++i) { + result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f); + result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f); + result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f); + result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + return true; +} + +bool UltraFace::Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold, float nms_iou_threshold) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors, result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h new file mode 100755 index 0000000000..2ca7d60994 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { +/*! @brief UltraFace model object used when to load a UltraFace model exported + * by UltraFace. + */ +class ULTRAINFER_DECL UltraFace : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./ultraface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + UltraFace(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { + return "Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB"; + } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \param[in] conf_threshold + * confidence threashold for postprocessing, default is 0.7 \param[in] + * nms_iou_threshold iou threashold for NMS, default is 0.3 \return true if + * the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold = 0.7f, + float nms_iou_threshold = 0.3f); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default (320, 240) + */ + std::vector size; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(std::vector &infer_result, + FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + bool IsDynamicInput() const { return is_dynamic_input_; } + + bool is_dynamic_input_; +}; + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc new file mode 100755 index 0000000000..dd6ad88e8b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindUltraFace(pybind11::module &m) { + pybind11::class_(m, "UltraFace") + .def(pybind11::init()) + .def("predict", + [](vision::facedet::UltraFace &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::FaceDetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::facedet::UltraFace::size); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc new file mode 100755 index 0000000000..3848c131c3 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc @@ -0,0 +1,280 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/yolov5face.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +void LetterBox(Mat *mat, std::vector size, std::vector color, + bool _auto, bool scale_fill = false, bool scale_up = true, + int stride = 32) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +YOLOv5Face::YOLOv5Face(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv5Face::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = false; + stride = 32; + landmarks_per_face = 5; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // Check if the input shape is dynamic after Runtime already initialized, + // Note that, We need to force is_mini_pad 'false' to keep static + // shape after padding (LetterBox) when the is_dynamic_input_ is 'false'. + is_dynamic_input_ = false; + auto shape = InputInfoOfRuntime(0).shape; + for (int i = 0; i < shape.size(); ++i) { + // if height or width is dynamic + if (i >= 2 && shape[i] <= 0) { + is_dynamic_input_ = true; + break; + } + } + if (!is_dynamic_input_) { + is_mini_pad = false; + } + return true; +} + +bool YOLOv5Face::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // process after image load + float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), + size[0] * 1.0f / static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_LINEAR; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(round(static_cast(mat->Height()) * ratio)); + int resize_w = int(round(static_cast(mat->Width()) * ratio)); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + // yolov5face's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up, + stride); + BGR2RGB::Run(mat); + // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), + // std::vector(mat->Channels(), 1.0)); + // Compute `result = mat * alpha + beta` directly by channel + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool YOLOv5Face::Postprocess( + FDTensor &infer_result, FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold) { + // infer_result: (1,n,16) 16=4+1+10+1 + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + result->Clear(); + // must be setup landmarks_per_face before reserve + result->landmarks_per_face = landmarks_per_face; + result->Reserve(infer_result.shape[1]); + + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + float *reg_cls_ptr = data + (i * infer_result.shape[2]); + float obj_conf = reg_cls_ptr[4]; + float cls_conf = reg_cls_ptr[15]; + float confidence = obj_conf * cls_conf; + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + float x = reg_cls_ptr[0]; + float y = reg_cls_ptr[1]; + float w = reg_cls_ptr[2]; + float h = reg_cls_ptr[3]; + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)}); + result->scores.push_back(confidence); + // decode landmarks (default 5 landmarks) + if (landmarks_per_face > 0) { + float *landmarks_ptr = reg_cls_ptr + 5; + for (size_t j = 0; j < landmarks_per_face * 2; j += 2) { + result->landmarks.emplace_back( + std::array{landmarks_ptr[j], landmarks_ptr[j + 1]}); + } + } + } + + if (result->boxes.size() == 0) { + return true; + } + + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + if (!is_scale_up) { + scale = std::min(scale, 1.0f); + } + float pad_h = (out_h - ipt_h * scale) / 2.f; + float pad_w = (out_w - ipt_w * scale) / 2.f; + if (is_mini_pad) { + pad_h = static_cast(static_cast(pad_h) % stride); + pad_w = static_cast(static_cast(pad_w) % stride); + } + // scale and clip box + for (size_t i = 0; i < result->boxes.size(); ++i) { + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); + } + // scale and clip landmarks + for (size_t i = 0; i < result->landmarks.size(); ++i) { + result->landmarks[i][0] = + std::max((result->landmarks[i][0] - pad_w) / scale, 0.0f); + result->landmarks[i][1] = + std::max((result->landmarks[i][1] - pad_h) / scale, 0.0f); + result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f); + result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f); + } + return true; +} + +bool YOLOv5Face::Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold, float nms_iou_threshold) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h new file mode 100755 index 0000000000..d17f679678 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h @@ -0,0 +1,102 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { +/*! @brief YOLOv5Face model object used when to load a YOLOv5Face model exported + * by YOLOv5Face. + */ +class ULTRAINFER_DECL YOLOv5Face : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5face.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv5Face(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "yolov5-face"; } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \param[in] conf_threshold + * confidence threashold for postprocessing, default is 0.25 \param[in] + * nms_iou_threshold iou threashold for NMS, default is 0.5 \return true if + * the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, FaceDetectionResult *result, + float conf_threshold = 0.25, + float nms_iou_threshold = 0.5); + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default size = {640, 640} + */ + std::vector size; + // padding value, size should be the same as channels + + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; + /*! @brief + Argument for image postprocessing step, setup the number of landmarks for + per face (if have), default 5 in official yolov5face note that, the outupt + tensor's shape must be: + (1,n,4+1+2*landmarks_per_face+1=box+obj+landmarks+cls), default 5 + */ + int landmarks_per_face; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, FaceDetectionResult *result, + const std::map> &im_info, + float conf_threshold, float nms_iou_threshold); + + bool IsDynamicInput() const { return is_dynamic_input_; } + + bool is_dynamic_input_; +}; + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc new file mode 100755 index 0000000000..aead752b28 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv5Face(pybind11::module &m) { + pybind11::class_(m, + "YOLOv5Face") + .def(pybind11::init()) + .def("predict", + [](vision::facedet::YOLOv5Face &self, pybind11::array &data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::FaceDetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::facedet::YOLOv5Face::size) + .def_readwrite("padding_value", + &vision::facedet::YOLOv5Face::padding_value) + .def_readwrite("is_mini_pad", &vision::facedet::YOLOv5Face::is_mini_pad) + .def_readwrite("is_no_pad", &vision::facedet::YOLOv5Face::is_no_pad) + .def_readwrite("is_scale_up", &vision::facedet::YOLOv5Face::is_scale_up) + .def_readwrite("stride", &vision::facedet::YOLOv5Face::stride) + .def_readwrite("landmarks_per_face", + &vision::facedet::YOLOv5Face::landmarks_per_face); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc new file mode 100755 index 0000000000..3f69724c11 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc @@ -0,0 +1,135 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +Yolov7FacePostprocessor::Yolov7FacePostprocessor() { + conf_threshold_ = 0.5; + nms_threshold_ = 0.45; + landmarks_per_face_ = 5; +} + +bool Yolov7FacePostprocessor::Run( + const std::vector &infer_result, + std::vector *results, + const std::vector>> &ims_info) { + int batch = infer_result[0].shape[0]; + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + (*results)[bs].Clear(); + // must be setup landmarks_per_face before reserve + (*results)[bs].landmarks_per_face = landmarks_per_face_; + (*results)[bs].Reserve(infer_result[0].shape[1]); + if (infer_result[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data = + reinterpret_cast(infer_result[0].Data()) + + bs * infer_result[0].shape[1] * infer_result[0].shape[2]; + for (size_t i = 0; i < infer_result[0].shape[1]; ++i) { + int s = i * infer_result[0].shape[2]; + float confidence = data[s + 4]; + const float *reg_cls_ptr = data + s; + const float *class_score = data + s + 5; + confidence *= (*class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + float x = reg_cls_ptr[0]; + float y = reg_cls_ptr[1]; + float w = reg_cls_ptr[2]; + float h = reg_cls_ptr[3]; + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)}); + (*results)[bs].scores.push_back(confidence); + + // decode landmarks (default 5 landmarks) + if (landmarks_per_face_ > 0) { + float *landmarks_ptr = const_cast(reg_cls_ptr + 6); + for (size_t j = 0; j < landmarks_per_face_ * 3; j += 3) { + (*results)[bs].landmarks.emplace_back( + std::array{landmarks_ptr[j], landmarks_ptr[j + 1]}); + } + } + } + + if ((*results)[bs].boxes.size() == 0) { + return true; + } + + utils::NMS(&((*results)[bs]), nms_threshold_); + + // scale the boxes to the origin image shape + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + // clip box + (*results)[bs].boxes[i][0] = + std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][1] = + std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][2] = + std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][3] = + std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][0] = + std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f); + (*results)[bs].boxes[i][1] = + std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f); + (*results)[bs].boxes[i][2] = + std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f); + (*results)[bs].boxes[i][3] = + std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f); + } + + // scale and clip landmarks + for (size_t i = 0; i < (*results)[bs].landmarks.size(); ++i) { + (*results)[bs].landmarks[i][0] = + std::max(((*results)[bs].landmarks[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].landmarks[i][1] = + std::max(((*results)[bs].landmarks[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].landmarks[i][0] = + std::min((*results)[bs].landmarks[i][0], ipt_w - 1.0f); + (*results)[bs].landmarks[i][1] = + std::min((*results)[bs].landmarks[i][1], ipt_h - 1.0f); + } + } + return true; +} + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h new file mode 100755 index 0000000000..868002fd5b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +class ULTRAINFER_DECL Yolov7FacePostprocessor { +public: + /*! @brief Postprocessor object for YOLOv7Face serials model. + */ + Yolov7FacePostprocessor(); + + /** \brief Process the result of runtime and fill to FaceDetectionResult + * structure + * + * \param[in] infer_result The inference result from runtime + * \param[in] results The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &infer_result, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.5 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.5 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.45 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.45 + float GetNMSThreshold() const { return nms_threshold_; } + + /// Set landmarks_per_face, default 5 + void SetLandmarksPerFace(const int &landmarks_per_face) { + landmarks_per_face_ = landmarks_per_face; + } + + /// Get landmarks_per_face, default 5 + int GetLandmarksPerFace() const { return landmarks_per_face_; } + +protected: + float conf_threshold_; + float nms_threshold_; + int landmarks_per_face_; +}; + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc new file mode 100755 index 0000000000..1462344135 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h" +#include "ultrainfer/function/concat.h" +#include "ultrainfer/vision/common/processors/mat.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +Yolov7FacePreprocessor::Yolov7FacePreprocessor() { + size_ = {640, 640}; + padding_color_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = false; + stride_ = 32; + max_wh_ = 7680.0; +} + +bool Yolov7FacePreprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); i++) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +bool Yolov7FacePreprocessor::Preprocess( + FDMat *mat, FDTensor *output, + std::map> *im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + // yolov7-face's preprocess steps + // 1. letterbox + // 2. convert_and_permute(swap_rb=true) + LetterBox(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, true); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +void Yolov7FacePreprocessor::LetterBox(FDMat *mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + Resize::Run(mat, resize_w, resize_h); + + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_color_value_); + } +} + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h new file mode 100755 index 0000000000..245e097d88 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h @@ -0,0 +1,98 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +class ULTRAINFER_DECL Yolov7FacePreprocessor { +public: + /** \brief Create a preprocessor instance for YOLOv7Face serials model + */ + Yolov7FacePreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \ret + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector &size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingColorValue(const std::vector &padding_color_value) { + padding_color_value_ = padding_color_value; + } + + /// Get padding value, size should be the same as channels + std::vector GetPaddingColorValue() const { + return padding_color_value_; + } + + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; } + + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output, + std::map> *im_info); + + void LetterBox(FDMat *mat); + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_color_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; +}; + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc new file mode 100755 index 0000000000..5f5508614c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +YOLOv7Face::YOLOv7Face(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv7Face::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv7Face::Predict(const cv::Mat &im, FaceDetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool YOLOv7Face::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + FDASSERT(images.size() == 1, "Only support batch = 1 now."); + std::vector>> ims_info; + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h new file mode 100755 index 0000000000..ae268feb6c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h" +#include "ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { +/*! @brief YOLOv7Face model object used when to load a YOLOv7Face model exported + * by YOLOv7Face. + */ +class ULTRAINFER_DECL YOLOv7Face : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7face.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + YOLOv7Face(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() { return "yolov7-face"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, FaceDetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *results); + + /// Get preprocessor reference of YOLOv7Face + virtual Yolov7FacePreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of YOLOv7Face + virtual Yolov7FacePostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + Yolov7FacePreprocessor preprocessor_; + Yolov7FacePostprocessor postprocessor_; +}; + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc new file mode 100755 index 0000000000..bcbbb1f623 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc @@ -0,0 +1,117 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindYOLOv7Face(pybind11::module &m) { + pybind11::class_( + m, "Yolov7FacePreprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::facedet::Yolov7FacePreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error("Failed to preprocess the input data " + "in PaddleClasPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::facedet::Yolov7FacePreprocessor::GetSize, + &vision::facedet::Yolov7FacePreprocessor::SetSize) + .def_property( + "padding_color_value", + &vision::facedet::Yolov7FacePreprocessor::GetPaddingColorValue, + &vision::facedet::Yolov7FacePreprocessor::SetPaddingColorValue) + .def_property("is_scale_up", + &vision::facedet::Yolov7FacePreprocessor::GetScaleUp, + &vision::facedet::Yolov7FacePreprocessor::SetScaleUp); + + pybind11::class_( + m, "YOLOv7FacePostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::facedet::Yolov7FacePostprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in Yolov7Postprocessor."); + } + return results; + }) + .def("run", + [](vision::facedet::Yolov7FacePostprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in YOLOv7Postprocessor."); + } + return results; + }) + .def_property("conf_threshold", + &vision::facedet::Yolov7FacePostprocessor::GetConfThreshold, + &vision::facedet::Yolov7FacePostprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::facedet::Yolov7FacePostprocessor::GetNMSThreshold, + &vision::facedet::Yolov7FacePostprocessor::SetNMSThreshold) + .def_property( + "landmarks_per_face", + &vision::facedet::Yolov7FacePostprocessor::GetLandmarksPerFace, + &vision::facedet::Yolov7FacePostprocessor::SetLandmarksPerFace); + + pybind11::class_(m, + "YOLOv7Face") + .def(pybind11::init()) + .def("predict", + [](vision::facedet::YOLOv7Face &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::FaceDetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::facedet::YOLOv7Face &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::facedet::YOLOv7Face::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::facedet::YOLOv7Face::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc new file mode 100755 index 0000000000..3e3fe95b43 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindRetinaFace(pybind11::module &m); +void BindUltraFace(pybind11::module &m); +void BindYOLOv5Face(pybind11::module &m); +void BindYOLOv7Face(pybind11::module &m); +void BindCenterFace(pybind11::module &m); +void BindBlazeFace(pybind11::module &m); +void BindSCRFD(pybind11::module &m); + +void BindFaceDet(pybind11::module &m) { + auto facedet_module = m.def_submodule("facedet", "Face detection models."); + BindRetinaFace(facedet_module); + BindUltraFace(facedet_module); + BindYOLOv5Face(facedet_module); + BindYOLOv7Face(facedet_module); + BindCenterFace(facedet_module); + BindBlazeFace(facedet_module); + BindSCRFD(facedet_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc new file mode 100755 index 0000000000..459a0d85de --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc @@ -0,0 +1,94 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +BlazeFace::BlazeFace(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file) { + valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::LITE}; + valid_gpu_backends = {Backend::OPENVINO, Backend::LITE, Backend::PDINFER}; + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool BlazeFace::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool BlazeFace::Predict(const cv::Mat &im, FaceDetectionResult *result) { + std::vector results; + if (!this->BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool BlazeFace::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + FDASSERT(images.size() == 1, "Only support batch = 1 now."); + std::vector>> ims_info; + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = "image"; + reused_input_tensors_[1].name = "scale_factor"; + reused_input_tensors_[2].name = "im_shape"; + + // Some models don't need scale_factor and im_shape as input + while (reused_input_tensors_.size() != NumInputsOfRuntime()) { + reused_input_tensors_.pop_back(); + } + + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + + return true; +} + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h new file mode 100755 index 0000000000..020d7ff47e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h" +#include "ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { +/*! @brief BlazeFace model object used when to load a BlazeFace model exported + * by BlazeFace. + */ +class ULTRAINFER_DECL BlazeFace : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./blazeface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g + * resnet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference, + * the default will use cpu, and choose the backend defined in + * "valid_cpu_backends" \param[in] model_format Model format of the loaded + * model, default is ONNX format + */ + BlazeFace(const std::string &model_file, const std::string ¶ms_file = "", + const std::string &config_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + std::string ModelName() { return "blaze-face"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + bool Predict(const cv::Mat &im, FaceDetectionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *results); + + /// Get preprocessor reference of BlazeFace + virtual BlazeFacePreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of BlazeFace + virtual BlazeFacePostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + BlazeFacePreprocessor preprocessor_; + BlazeFacePostprocessor postprocessor_; +}; + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc new file mode 100755 index 0000000000..5ae913a14b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindBlazeFace(pybind11::module &m) { + pybind11::class_( + m, "BlazeFacePreprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::facedet::BlazeFacePreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in BlazeFacePreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }); + + pybind11::class_( + m, "BlazeFacePostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::facedet::BlazeFacePostprocessor &self, + std::vector &inputs, + const std::vector>> + &ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in BlazeFacePostprocessor."); + } + return results; + }) + .def("run", + [](vision::facedet::BlazeFacePostprocessor &self, + std::vector &input_array, + const std::vector>> + &ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in BlazePostprocessor."); + } + return results; + }) + .def_property("conf_threshold", + &vision::facedet::BlazeFacePostprocessor::GetConfThreshold, + &vision::facedet::BlazeFacePostprocessor::SetConfThreshold) + .def_property("nms_threshold", + &vision::facedet::BlazeFacePostprocessor::GetNMSThreshold, + &vision::facedet::BlazeFacePostprocessor::SetNMSThreshold); + + pybind11::class_(m, "BlazeFace") + .def(pybind11::init()) + .def("predict", + [](vision::facedet::BlazeFace &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::FaceDetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::facedet::BlazeFace &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::facedet::BlazeFace::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::facedet::BlazeFace::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc new file mode 100755 index 0000000000..1ce5e54e02 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h" +#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +BlazeFacePostprocessor::BlazeFacePostprocessor() { + conf_threshold_ = 0.5; + nms_threshold_ = 0.3; +} + +bool BlazeFacePostprocessor::Run( + const std::vector &tensors, + std::vector *results, + const std::vector>> &ims_info) { + // Get number of boxes for each input image + std::vector num_boxes(tensors[1].shape[0]); + int total_num_boxes = 0; + if (tensors[1].dtype == FDDataType::INT32) { + const auto *data = static_cast(tensors[1].CpuData()); + for (size_t i = 0; i < tensors[1].shape[0]; ++i) { + num_boxes[i] = static_cast(data[i]); + total_num_boxes += num_boxes[i]; + } + } else if (tensors[1].dtype == FDDataType::INT64) { + const auto *data = static_cast(tensors[1].CpuData()); + for (size_t i = 0; i < tensors[1].shape[0]; ++i) { + num_boxes[i] = static_cast(data[i]); + } + } + + // Special case for TensorRT, it has fixed output shape of NMS + // So there's invalid boxes in its' output boxes + int num_output_boxes = static_cast(tensors[0].Shape()[0]); + bool contain_invalid_boxes = false; + if (total_num_boxes != num_output_boxes) { + if (num_output_boxes % num_boxes.size() == 0) { + contain_invalid_boxes = true; + } else { + FDERROR << "Cannot handle the output data for this model, unexpected " + "situation." + << std::endl; + return false; + } + } + + // Get boxes for each input image + results->resize(num_boxes.size()); + + if (tensors[0].shape[0] == 0) { + // No detected boxes + return true; + } + + const auto *box_data = static_cast(tensors[0].CpuData()); + int offset = 0; + for (size_t i = 0; i < num_boxes.size(); ++i) { + const float *ptr = box_data + offset; + (*results)[i].Reserve(num_boxes[i]); + for (size_t j = 0; j < num_boxes[i]; ++j) { + if (ptr[j * 6 + 1] > conf_threshold_) { + (*results)[i].scores.push_back(ptr[j * 6 + 1]); + (*results)[i].boxes.emplace_back(std::array( + {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]})); + } + } + if (contain_invalid_boxes) { + offset += static_cast(num_output_boxes * 6 / num_boxes.size()); + } else { + offset += static_cast(num_boxes[i] * 6); + } + } + return true; +} + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h new file mode 100755 index 0000000000..fd8c78c83e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +class ULTRAINFER_DECL BlazeFacePostprocessor { +public: + /*! @brief Postprocessor object for BlazeFace serials model. + */ + BlazeFacePostprocessor(); + + /** \brief Process the result of runtime and fill to FaceDetectionResult + * structure + * + * \param[in] infer_result The inference result from runtime + * \param[in] results The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool + Run(const std::vector &infer_result, + std::vector *results, + const std::vector>> &ims_info); + + /// Set conf_threshold, default 0.5 + void SetConfThreshold(const float &conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.5 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.3 + void SetNMSThreshold(const float &nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.3 + float GetNMSThreshold() const { return nms_threshold_; } + +protected: + float conf_threshold_; + float nms_threshold_; +}; + +} // namespace facedet +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc new file mode 100755 index 0000000000..f13ee64b52 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc @@ -0,0 +1,209 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h" +#include "ultrainfer/function/concat.h" +#include "ultrainfer/function/pad.h" +#include "ultrainfer/vision/common/processors/mat.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +BlazeFacePreprocessor::BlazeFacePreprocessor(const std::string &config_file) { + is_scale_ = false; + normalize_mean_ = {123, 117, 104}; + normalize_std_ = {127.502231, 127.502231, 127.502231}; + this->config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), + "Failed to create PaddleDetPreprocessor."); +} + +bool BlazeFacePreprocessor::Run( + std::vector *images, std::vector *outputs, + std::vector>> *ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(3); + int batch = static_cast(images->size()); + // Allocate memory for scale_factor + (*outputs)[1].Resize({batch, 2}, FDDataType::FP32); + // Allocate memory for im_shape + (*outputs)[2].Resize({batch, 2}, FDDataType::FP32); + + std::vector max_hw({-1, -1}); + + auto *scale_factor_ptr = + reinterpret_cast((*outputs)[1].MutableData()); + auto *im_shape_ptr = reinterpret_cast((*outputs)[2].MutableData()); + + // Concat all the preprocessed data to a batch tensor + std::vector im_tensors(images->size()); + + for (size_t i = 0; i < images->size(); ++i) { + int origin_w = (*images)[i].Width(); + int origin_h = (*images)[i].Height(); + scale_factor_ptr[2 * i] = 1.0; + scale_factor_ptr[2 * i + 1] = 1.0; + + for (size_t j = 0; j < processors_.size(); ++j) { + if (!(*(processors_[j].get()))(&((*images)[i]))) { + FDERROR << "Failed to processs image:" << i << " in " + << processors_[i]->Name() << "." << std::endl; + return false; + } + if (processors_[j]->Name().find("Resize") != std::string::npos) { + scale_factor_ptr[2 * i] = (*images)[i].Height() * 1.0 / origin_h; + scale_factor_ptr[2 * i + 1] = (*images)[i].Width() * 1.0 / origin_w; + } + } + + if ((*images)[i].Height() > max_hw[0]) { + max_hw[0] = (*images)[i].Height(); + } + if ((*images)[i].Width() > max_hw[1]) { + max_hw[1] = (*images)[i].Width(); + } + im_shape_ptr[2 * i] = max_hw[0]; + im_shape_ptr[2 * i + 1] = max_hw[1]; + + if ((*images)[i].Height() < max_hw[0] || (*images)[i].Width() < max_hw[1]) { + // if the size of image less than max_hw, pad to max_hw + FDTensor tensor; + (*images)[i].ShareWithTensor(&tensor); + function::Pad(tensor, &(im_tensors[i]), + {0, 0, max_hw[0] - (*images)[i].Height(), + max_hw[1] - (*images)[i].Width()}, + 0); + } else { + // No need pad + (*images)[i].ShareWithTensor(&(im_tensors[i])); + } + // Reshape to 1xCxHxW + im_tensors[i].ExpandDim(0); + } + + if (im_tensors.size() == 1) { + // If there's only 1 input, no need to concat + // skip memory copy + (*outputs)[0] = std::move(im_tensors[0]); + } else { + // Else concat the im tensor for each input image + // compose a batched input tensor + function::Concat(im_tensors, &((*outputs)[0]), 0); + } + + return true; +} + +bool BlazeFacePreprocessor::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + + processors_.push_back(std::make_shared()); + + bool has_permute = false; + for (const auto &op : cfg["Preprocess"]) { + std::string op_name = op["type"].as(); + if (op_name == "NormalizeImage") { + auto mean = op["mean"].as>(); + auto std = op["std"].as>(); + bool is_scale = true; + if (op["is_scale"]) { + is_scale = op["is_scale"].as(); + } + std::string norm_type = "mean_std"; + if (op["norm_type"]) { + norm_type = op["norm_type"].as(); + } + if (norm_type != "mean_std") { + std::fill(mean.begin(), mean.end(), 0.0); + std::fill(std.begin(), std.end(), 1.0); + } + processors_.push_back(std::make_shared(mean, std, is_scale)); + } else if (op_name == "Resize") { + bool keep_ratio = op["keep_ratio"].as(); + auto target_size = op["target_size"].as>(); + int interp = op["interp"].as(); + FDASSERT(target_size.size() == 2, + "Require size of target_size be 2, but now it's %lu.", + target_size.size()); + if (!keep_ratio) { + int width = target_size[1]; + int height = target_size[0]; + processors_.push_back( + std::make_shared(width, height, -1.0, -1.0, interp, false)); + } else { + int min_target_size = std::min(target_size[0], target_size[1]); + int max_target_size = std::max(target_size[0], target_size[1]); + std::vector max_size; + if (max_target_size > 0) { + max_size.push_back(max_target_size); + max_size.push_back(max_target_size); + } + processors_.push_back(std::make_shared( + min_target_size, interp, true, max_size)); + } + } else if (op_name == "Permute") { + // Do nothing, do permute as the last operation + has_permute = true; + continue; + } else if (op_name == "Pad") { + auto size = op["size"].as>(); + auto value = op["fill_value"].as>(); + processors_.push_back(std::make_shared("float")); + processors_.push_back( + std::make_shared(size[1], size[0], value)); + } else if (op_name == "PadStride") { + auto stride = op["stride"].as(); + processors_.push_back( + std::make_shared(stride, std::vector(3, 0))); + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + + if (has_permute) { + // permute = cast + HWC2CHW + processors_.push_back(std::make_shared("float")); + processors_.push_back(std::make_shared()); + } + + // Fusion will improve performance + FuseTransforms(&processors_); + + return true; +} + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h new file mode 100755 index 0000000000..afef5d31e7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h @@ -0,0 +1,70 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/detection/ppdet/preprocessor.h" + +namespace ultrainfer { + +namespace vision { + +namespace facedet { + +class ULTRAINFER_DECL BlazeFacePreprocessor + : public ultrainfer::vision::detection::PaddleDetPreprocessor { +public: + /** \brief Create a preprocessor instance for BlazeFace serials model + */ + BlazeFacePreprocessor() = default; + + /** \brief Create a preprocessor instance for Blazeface serials model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * ppyoloe/infer_cfg.yml + */ + explicit BlazeFacePreprocessor(const std::string &config_file); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \ret + */ + bool Run(std::vector *images, std::vector *outputs, + std::vector>> *ims_info); + +private: + bool BuildPreprocessPipelineFromConfig(); + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_; + + std::vector normalize_mean_; + + std::vector normalize_std_; + + std::vector> processors_; + // read config file + std::string config_file_; +}; + +} // namespace facedet + +} // namespace vision + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc new file mode 100755 index 0000000000..7ebfa6f82c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/faceid/contrib/adaface/adaface.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { + +AdaFace::AdaFace(const std::string &model_file, const std::string ¶ms_file, + const ultrainfer::RuntimeOption &custom_option, + const ultrainfer::ModelFormat &model_format) { + + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool AdaFace::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool AdaFace::Predict(const cv::Mat &im, FaceRecognitionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + if (!results.empty()) { + *result = std::move(results[0]); + } + return true; +} + +bool AdaFace::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + FDASSERT(images.size() == 1, "Only support batch = 1 now."); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h new file mode 100755 index 0000000000..b88725a544 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/faceid/contrib/adaface/postprocessor.h" +#include "ultrainfer/vision/faceid/contrib/adaface/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { +/*! @brief AdaFace model object used when to load a AdaFace model exported by + * AdaFace. + */ +class ULTRAINFER_DECL AdaFace : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./adaface.onnx + * \param[in] params_file Path of parameter file, e.g adaface/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + AdaFace(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "insightface_rec"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output + * FaceRecognitionResult will be writen to this structure \return true if the + * prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, FaceRecognitionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output FaceRecognitionResult list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *results); + + /// Get preprocessor reference of AdaFace + virtual AdaFacePreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of AdaFace + virtual AdaFacePostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + AdaFacePreprocessor preprocessor_; + AdaFacePostprocessor postprocessor_; +}; + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc new file mode 100755 index 0000000000..838fabaa0c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindAdaFace(pybind11::module &m) { + pybind11::class_(m, + "AdaFacePreprocessor") + .def(pybind11::init()) + .def("run", + [](vision::faceid::AdaFacePreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error("Failed to preprocess the input data " + "in AdaFacePreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def_property("permute", &vision::faceid::AdaFacePreprocessor::GetPermute, + &vision::faceid::AdaFacePreprocessor::SetPermute) + .def_property("alpha", &vision::faceid::AdaFacePreprocessor::GetAlpha, + &vision::faceid::AdaFacePreprocessor::SetAlpha) + .def_property("beta", &vision::faceid::AdaFacePreprocessor::GetBeta, + &vision::faceid::AdaFacePreprocessor::SetBeta) + .def_property("size", &vision::faceid::AdaFacePreprocessor::GetSize, + &vision::faceid::AdaFacePreprocessor::SetSize); + + pybind11::class_(m, + "AdaFacePostprocessor") + .def(pybind11::init()) + .def("run", + [](vision::faceid::AdaFacePostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in AdaFacePostprocessor."); + } + return results; + }) + .def("run", + [](vision::faceid::AdaFacePostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error("Failed to postprocess the runtime " + "result in AdaFacePostprocessor."); + } + return results; + }) + .def_property("l2_normalize", + &vision::faceid::AdaFacePostprocessor::GetL2Normalize, + &vision::faceid::AdaFacePostprocessor::SetL2Normalize); + + pybind11::class_(m, "AdaFace") + .def(pybind11::init()) + .def("predict", + [](vision::faceid::AdaFace &self, pybind11::array &data) { + cv::Mat im = PyArrayToCvMat(data); + vision::FaceRecognitionResult result; + self.Predict(im, &result); + return result; + }) + .def("batch_predict", + [](vision::faceid::AdaFace &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::faceid::AdaFace::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::faceid::AdaFace::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc new file mode 100755 index 0000000000..5a559f4bb9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/faceid/contrib/adaface/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { + +AdaFacePostprocessor::AdaFacePostprocessor() { l2_normalize_ = false; } + +bool AdaFacePostprocessor::Run(std::vector &infer_result, + std::vector *results) { + if (infer_result[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + if (infer_result.size() != 1) { + FDERROR << "The default number of output tensor " + "must be 1 according to insightface." + << std::endl; + } + int batch = infer_result[0].shape[0]; + results->resize(batch); + for (size_t bs = 0; bs < batch; ++bs) { + FDTensor &embedding_tensor = infer_result.at(bs); + FDASSERT((embedding_tensor.shape[0] == 1), "Only support batch = 1 now."); + if (embedding_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + (*results)[bs].Clear(); + (*results)[bs].Resize(embedding_tensor.Numel()); + + // Copy the raw embedding vector directly without L2 normalize + // post process. Let the user decide whether to normalize or not. + // Will call utils::L2Normlize() method to perform L2 + // normalize if l2_normalize was set as 'true'. + std::memcpy((*results)[bs].embedding.data(), embedding_tensor.Data(), + embedding_tensor.Nbytes()); + if (l2_normalize_) { + auto norm_embedding = utils::L2Normalize((*results)[bs].embedding); + std::memcpy((*results)[bs].embedding.data(), norm_embedding.data(), + embedding_tensor.Nbytes()); + } + } + return true; +} + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h new file mode 100755 index 0000000000..c5c38b661f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h @@ -0,0 +1,51 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace faceid { +/*! @brief Postprocessor object for AdaFace serials model. + */ +class ULTRAINFER_DECL AdaFacePostprocessor { +public: + /** \brief Create a postprocessor instance for AdaFace serials model + */ + AdaFacePostprocessor(); + + /** \brief Process the result of runtime and fill to FaceRecognitionResult + * structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of FaceRecognitionResult + * \return true if the postprocess successed, otherwise false + */ + bool Run(std::vector &infer_result, + std::vector *results); + + void SetL2Normalize(bool &l2_normalize) { l2_normalize_ = l2_normalize; } + + bool GetL2Normalize() { return l2_normalize_; } + +private: + bool l2_normalize_; +}; + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc new file mode 100755 index 0000000000..092bdb4873 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/faceid/contrib/adaface/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { + +AdaFacePreprocessor::AdaFacePreprocessor() { + // parameters for preprocess + size_ = {112, 112}; + alpha_ = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + beta_ = {-1.f, -1.f, -1.f}; // RGB + permute_ = true; +} + +bool AdaFacePreprocessor::Preprocess(FDMat *mat, FDTensor *output) { + // face recognition model's preprocess steps in insightface + // reference: insightface/recognition/arcface_torch/inference.py + // 1. Resize + // 2. BGR2RGB + // 3. Convert(opencv style) or Normalize + // 4. HWC2CHW + int resize_w = size_[0]; + int resize_h = size_[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (permute_) { + BGR2RGB::Run(mat); + } + + Convert::Run(mat, alpha_, beta_); + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool AdaFacePreprocessor::Run(std::vector *images, + std::vector *outputs) { + if (images->empty()) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + FDASSERT(images->size() == 1, "Only support batch = 1 now."); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + (*outputs)[0] = std::move(tensors[0]); + return true; +} +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h new file mode 100755 index 0000000000..d4e46bdace --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace faceid { +/*! @brief Preprocessor object for AdaFace serials model. + */ +class ULTRAINFER_DECL AdaFacePreprocessor { +public: + /** \brief Create a preprocessor instance for AdaFace serials model + */ + AdaFacePreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs); + + /// Get Size + std::vector GetSize() { return size_; } + + /// Set size. + void SetSize(std::vector &size) { size_ = size; } + + /// Get alpha + std::vector GetAlpha() { return alpha_; } + + /// Set alpha. + void SetAlpha(std::vector &alpha) { alpha_ = alpha; } + + /// Get beta + std::vector GetBeta() { return beta_; } + + /// Set beta. + void SetBeta(std::vector &beta) { beta_ = beta; } + + bool GetPermute() { return permute_; } + + /// Set permute. + void SetPermute(bool permute) { permute_ = permute; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output); + // Argument for image preprocessing step, tuple of (width, height), + // decide the target size after resize, default (112, 112) + std::vector size_; + // Argument for image preprocessing step, alpha values for normalization, + // default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + std::vector alpha_; + // Argument for image preprocessing step, beta values for normalization, + // default beta = {-1.f, -1.f, -1.f} + std::vector beta_; + // Argument for image preprocessing step, whether to swap the B and R channel, + // such as BGR->RGB, default true. + bool permute_; +}; + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc new file mode 100755 index 0000000000..2ab0be0dba --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/faceid/contrib/insightface/base.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { + +InsightFaceRecognitionBase::InsightFaceRecognitionBase( + const std::string &model_file, const std::string ¶ms_file, + const ultrainfer::RuntimeOption &custom_option, + const ultrainfer::ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + } + valid_rknpu_backends = {Backend::RKNPU2}; + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; +} + +bool InsightFaceRecognitionBase::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool InsightFaceRecognitionBase::Predict(const cv::Mat &im, + FaceRecognitionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool InsightFaceRecognitionBase::BatchPredict( + const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + FDASSERT(images.size() == 1, "Only support batch = 1 now."); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h new file mode 100755 index 0000000000..10d24c69eb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/faceid/contrib/insightface/postprocessor.h" +#include "ultrainfer/vision/faceid/contrib/insightface/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { +/*! @brief InsightFaceRecognition model object used when to load a + * InsightFaceRecognition model exported by InsightFaceRecognition. + */ +class ULTRAINFER_DECL InsightFaceRecognitionBase : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./arcface.onnx + * \param[in] params_file Path of parameter file, e.g arcface/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + InsightFaceRecognitionBase( + const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "insightface_rec"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output + * FaceRecognitionResult will be writen to this structure \return true if the + * prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, FaceRecognitionResult *result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output FaceRecognitionResult list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *results); + + /// Get preprocessor reference of InsightFaceRecognition + virtual InsightFaceRecognitionPreprocessor &GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of InsightFaceRecognition + virtual InsightFaceRecognitionPostprocessor &GetPostprocessor() { + return postprocessor_; + } + +protected: + bool Initialize(); + InsightFaceRecognitionPreprocessor preprocessor_; + InsightFaceRecognitionPostprocessor postprocessor_; +}; + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc new file mode 100755 index 0000000000..d05faf9585 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc @@ -0,0 +1,138 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindInsightFace(pybind11::module &m) { + pybind11::class_( + m, "InsightFaceRecognitionPreprocessor") + .def(pybind11::init()) + .def("run", + [](vision::faceid::InsightFaceRecognitionPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "InsightFaceRecognitionPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def( + "disable_normalize", + &vision::faceid::InsightFaceRecognitionPreprocessor::DisableNormalize) + .def("disable_permute", + &vision::faceid::InsightFaceRecognitionPreprocessor::DisablePermute) + .def_property( + "alpha", + &vision::faceid::InsightFaceRecognitionPreprocessor::GetAlpha, + &vision::faceid::InsightFaceRecognitionPreprocessor::SetAlpha) + .def_property( + "beta", &vision::faceid::InsightFaceRecognitionPreprocessor::GetBeta, + &vision::faceid::InsightFaceRecognitionPreprocessor::SetBeta) + .def_property( + "size", &vision::faceid::InsightFaceRecognitionPreprocessor::GetSize, + &vision::faceid::InsightFaceRecognitionPreprocessor::SetSize); + + pybind11::class_( + m, "InsightFaceRecognitionPostprocessor") + .def(pybind11::init()) + .def("run", + [](vision::faceid::InsightFaceRecognitionPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "InsightFaceRecognitionPostprocessor."); + } + return results; + }) + .def("run", + [](vision::faceid::InsightFaceRecognitionPostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "InsightFaceRecognitionPostprocessor."); + } + return results; + }) + .def_property( + "l2_normalize", + &vision::faceid::InsightFaceRecognitionPostprocessor::GetL2Normalize, + &vision::faceid::InsightFaceRecognitionPostprocessor::SetL2Normalize); + + pybind11::class_( + m, "InsightFaceRecognitionBase") + .def(pybind11::init()) + .def("predict", + [](vision::faceid::InsightFaceRecognitionBase &self, + pybind11::array &data) { + cv::Mat im = PyArrayToCvMat(data); + vision::FaceRecognitionResult result; + self.Predict(im, &result); + return result; + }) + .def("batch_predict", + [](vision::faceid::InsightFaceRecognitionBase &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly( + "preprocessor", + &vision::faceid::InsightFaceRecognitionBase::GetPreprocessor) + .def_property_readonly( + "postprocessor", + &vision::faceid::InsightFaceRecognitionBase::GetPostprocessor); + + pybind11::class_(m, "ArcFace") + .def(pybind11::init()); + + pybind11::class_(m, "CosFace") + .def(pybind11::init()); + + pybind11::class_(m, "PartialFC") + .def(pybind11::init()); + + pybind11::class_(m, "VPL") + .def(pybind11::init()); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h new file mode 100755 index 0000000000..1dec9259d0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h @@ -0,0 +1,154 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/faceid/contrib/insightface/base.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { +class ULTRAINFER_DECL ArcFace : public InsightFaceRecognitionBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g ArcFace/model.pdmodel + * \param[in] params_file Path of parameter file, e.g ArcFace/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in `valid_cpu_backends` \param[in] model_format + * Model format of the loaded model, default is Paddle format + */ + ArcFace(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX) + : InsightFaceRecognitionBase(model_file, params_file, custom_option, + model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + } + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "ArcFace"; } +}; + +class ULTRAINFER_DECL CosFace : public InsightFaceRecognitionBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g CosFace/model.pdmodel + * \param[in] params_file Path of parameter file, e.g CosFace/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in `valid_cpu_backends` \param[in] model_format + * Model format of the loaded model, default is Paddle format + */ + CosFace(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX) + : InsightFaceRecognitionBase(model_file, params_file, custom_option, + model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + } + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "CosFace"; } +}; +class ULTRAINFER_DECL PartialFC : public InsightFaceRecognitionBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g PartialFC/model.pdmodel + * \param[in] params_file Path of parameter file, e.g + * PartialFC/model.pdiparams, if the model format is ONNX, this parameter will + * be ignored \param[in] custom_option RuntimeOption for inference, the + * default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + PartialFC(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX) + : InsightFaceRecognitionBase(model_file, params_file, custom_option, + model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + } + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PartialFC"; } +}; +class ULTRAINFER_DECL VPL : public InsightFaceRecognitionBase { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g VPL/model.pdmodel + * \param[in] params_file Path of parameter file, e.g VPL/model.pdiparams, if + * the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in `valid_cpu_backends` \param[in] model_format + * Model format of the loaded model, default is Paddle format + */ + VPL(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX) + : InsightFaceRecognitionBase(model_file, params_file, custom_option, + model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + } + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "VPL"; } +}; + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc new file mode 100755 index 0000000000..8fae91da57 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/faceid/contrib/insightface/postprocessor.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { + +InsightFaceRecognitionPostprocessor::InsightFaceRecognitionPostprocessor() { + l2_normalize_ = false; +} + +bool InsightFaceRecognitionPostprocessor::Run( + std::vector &infer_result, + std::vector *results) { + if (infer_result[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + if (infer_result.size() != 1) { + FDERROR << "The default number of output tensor " + "must be 1 according to insightface." + << std::endl; + } + int batch = infer_result[0].shape[0]; + results->resize(batch); + for (size_t bs = 0; bs < batch; ++bs) { + FDTensor &embedding_tensor = infer_result.at(bs); + FDASSERT((embedding_tensor.shape[0] == 1), "Only support batch = 1 now."); + if (embedding_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + (*results)[bs].Clear(); + (*results)[bs].Resize(embedding_tensor.Numel()); + + // Copy the raw embedding vector directly without L2 normalize + // post process. Let the user decide whether to normalize or not. + // Will call utils::L2Normlize() method to perform L2 + // normalize if l2_normalize was set as 'true'. + std::memcpy((*results)[bs].embedding.data(), embedding_tensor.Data(), + embedding_tensor.Nbytes()); + if (l2_normalize_) { + auto norm_embedding = utils::L2Normalize((*results)[bs].embedding); + std::memcpy((*results)[bs].embedding.data(), norm_embedding.data(), + embedding_tensor.Nbytes()); + } + } + return true; +} + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h new file mode 100755 index 0000000000..00b310971a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h @@ -0,0 +1,52 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace faceid { +/*! @brief Postprocessor object for InsightFaceRecognition serials model. + */ +class ULTRAINFER_DECL InsightFaceRecognitionPostprocessor { +public: + /** \brief Create a postprocessor instance for InsightFaceRecognition serials + * model + */ + InsightFaceRecognitionPostprocessor(); + + /** \brief Process the result of runtime and fill to FaceRecognitionResult + * structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of FaceRecognitionResult + * \return true if the postprocess successed, otherwise false + */ + bool Run(std::vector &infer_result, + std::vector *results); + + void SetL2Normalize(bool &l2_normalize) { l2_normalize_ = l2_normalize; } + + bool GetL2Normalize() { return l2_normalize_; } + +private: + bool l2_normalize_; +}; + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc new file mode 100755 index 0000000000..d5364c9a15 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/faceid/contrib/insightface/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace faceid { + +InsightFaceRecognitionPreprocessor::InsightFaceRecognitionPreprocessor() { + // parameters for preprocess + size_ = {112, 112}; + alpha_ = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + beta_ = {-1.f, -1.f, -1.f}; // RGB +} + +bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat *mat, + FDTensor *output) { + // face recognition model's preprocess steps in insightface + // reference: insightface/recognition/arcface_torch/inference.py + // 1. Resize + // 2. BGR2RGB + // 3. Convert(opencv style) or Normalize + // 4. HWC2CHW + int resize_w = size_[0]; + int resize_h = size_[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + if (!disable_permute_) { + BGR2RGB::Run(mat); + } + + if (!disable_normalize_) { + Convert::Run(mat, alpha_, beta_); + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool InsightFaceRecognitionPreprocessor::Run(std::vector *images, + std::vector *outputs) { + if (images->empty()) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + FDASSERT(images->size() == 1, "Only support batch = 1 now."); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + (*outputs)[0] = std::move(tensors[0]); + return true; +} +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h new file mode 100755 index 0000000000..9986222432 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace faceid { +/*! @brief Preprocessor object for InsightFaceRecognition serials model. + */ +class ULTRAINFER_DECL InsightFaceRecognitionPreprocessor { +public: + /** \brief Create a preprocessor instance for InsightFaceRecognition serials + * model + */ + InsightFaceRecognitionPreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs); + + /// Get Size + std::vector GetSize() { return size_; } + + /// Set size. + void SetSize(std::vector &size) { size_ = size; } + + /// Get alpha + std::vector GetAlpha() { return alpha_; } + + /// Set alpha. + void SetAlpha(std::vector &alpha) { alpha_ = alpha; } + + /// Get beta + std::vector GetBeta() { return beta_; } + + /// Set beta. + void SetBeta(std::vector &beta) { beta_ = beta; } + + /// This function will disable normalize and hwc2chw in preprocessing step. + void DisableNormalize() { disable_normalize_ = true; } + + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_permute_ = true; } + +protected: + bool Preprocess(FDMat *mat, FDTensor *output); + // Argument for image preprocessing step, tuple of (width, height), + // decide the target size after resize, default (112, 112) + std::vector size_; + // Argument for image preprocessing step, alpha values for normalization, + // default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + std::vector alpha_; + // Argument for image preprocessing step, beta values for normalization, + // default beta = {-1.f, -1.f, -1.f} + std::vector beta_; + // for recording the switch of normalize + bool disable_normalize_ = false; + // Argument for image preprocessing step, whether to swap the B and R channel, + // such as BGR->RGB, default true. + bool disable_permute_ = false; +}; + +} // namespace faceid +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc b/libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc new file mode 100755 index 0000000000..970040baf3 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindInsightFace(pybind11::module &m); +void BindAdaFace(pybind11::module &m); +void BindFaceId(pybind11::module &m) { + auto faceid_module = m.def_submodule("faceid", "Face recognition models."); + BindInsightFace(faceid_module); + BindAdaFace(faceid_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc new file mode 100755 index 0000000000..0441b9a20f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/generation/contrib/animegan.h" +#include "ultrainfer/function/functions.h" + +namespace ultrainfer { +namespace vision { +namespace generation { + +AnimeGAN::AnimeGAN(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + + initialized = Initialize(); +} + +bool AnimeGAN::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool AnimeGAN::Predict(cv::Mat &img, cv::Mat *result) { + std::vector results; + if (!BatchPredict({img}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool AnimeGAN::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + std::vector processed_data(1); + if (!preprocessor_.Run(fd_images, &(processed_data))) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + std::vector infer_result(1); + processed_data[0].name = InputInfoOfRuntime(0).name; + + if (!Infer(processed_data, &infer_result)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + if (!postprocessor_.Run(infer_result, results)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +} // namespace generation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h new file mode 100755 index 0000000000..3135fe72b8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/generation/contrib/postprocessor.h" +#include "ultrainfer/vision/generation/contrib/preprocessor.h" + +namespace ultrainfer { + +namespace vision { + +namespace generation { +/*! @brief AnimeGAN model object is used when load a AnimeGAN model. + */ +class ULTRAINFER_DECL AnimeGAN : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./model.pdmodel + * \param[in] params_file Path of parameter file, e.g ./model.pdiparams, if + * the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is PADDLE format + */ + AnimeGAN(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + std::string ModelName() const { return "styletransfer/animegan"; } + + /** \brief Predict the style transfer result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output style transfer + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + bool Predict(cv::Mat &img, cv::Mat *result); + + /** \brief Predict the style transfer result for a batch of input images + * + * \param[in] images The list of input images, each element comes from + * cv::imread(), is a 3-D array with layout HWC, BGR format \param[in] results + * The list of output style transfer results will be writen to this structure + * \return true if the batch prediction successed, otherwise false + */ + bool BatchPredict(const std::vector &images, + std::vector *results); + + // Get preprocessor reference of AnimeGAN + AnimeGANPreprocessor &GetPreprocessor() { return preprocessor_; } + + // Get postprocessor reference of AnimeGAN + AnimeGANPostprocessor &GetPostprocessor() { return postprocessor_; } + +private: + bool Initialize(); + + AnimeGANPreprocessor preprocessor_; + AnimeGANPostprocessor postprocessor_; +}; + +} // namespace generation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc new file mode 100755 index 0000000000..40fad3b990 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindAnimeGAN(pybind11::module &m) { + pybind11::class_(m, "AnimeGAN") + .def(pybind11::init()) + .def("predict", + [](vision::generation::AnimeGAN &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + cv::Mat res; + self.Predict(mat, &res); + auto ret = pybind11::array_t( + {res.rows, res.cols, res.channels()}, res.data); + return ret; + }) + .def("batch_predict", + [](vision::generation::AnimeGAN &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + std::vector> ret; + for (size_t i = 0; i < results.size(); ++i) { + ret.push_back(pybind11::array_t( + {results[i].rows, results[i].cols, results[i].channels()}, + results[i].data)); + } + return ret; + }) + .def_property_readonly("preprocessor", + &vision::generation::AnimeGAN::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::generation::AnimeGAN::GetPostprocessor); + + pybind11::class_( + m, "AnimeGANPreprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::generation::AnimeGANPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in PaddleClasPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }); + pybind11::class_( + m, "AnimeGANPostprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::generation::AnimeGANPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error("Failed to postprocess the runtime result " + "in YOLOv5Postprocessor."); + } + return results; + }); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc new file mode 100755 index 0000000000..7d94f15f09 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/generation/contrib/postprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace generation { + +bool AnimeGANPostprocessor::Run(std::vector &infer_results, + std::vector *results) { + // 1. Reverse normalization + // 2. RGB2BGR + FDTensor &output_tensor = infer_results.at(0); + std::vector shape = output_tensor.Shape(); // n, h, w, c + int size = shape[1] * shape[2] * shape[3]; + results->resize(shape[0]); + float *infer_result_data = reinterpret_cast(output_tensor.Data()); + for (size_t i = 0; i < results->size(); ++i) { + Mat result_mat = Mat::Create(shape[1], shape[2], 3, FDDataType::FP32, + infer_result_data + i * size); + std::vector mean{127.5f, 127.5f, 127.5f}; + std::vector std{127.5f, 127.5f, 127.5f}; + Convert::Run(&result_mat, mean, std); + // tmp data type is float[0-1.0],convert to uint type + auto temp = result_mat.GetOpenCVMat(); + cv::Mat res = cv::Mat::zeros(temp->size(), CV_8UC3); + temp->convertTo(res, CV_8UC3, 1); + Mat fd_image = WrapMat(res); + BGR2RGB::Run(&fd_image); + res = *(fd_image.GetOpenCVMat()); + res.copyTo(results->at(i)); + } + return true; +} + +} // namespace generation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h new file mode 100755 index 0000000000..e51718874a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/function/functions.h" +#include "ultrainfer/vision/common/processors/transform.h" + +namespace ultrainfer { +namespace vision { + +namespace generation { +/*! @brief Postprocessor object for AnimeGAN serials model. + */ +class ULTRAINFER_DECL AnimeGANPostprocessor { +public: + /** \brief Create a postprocessor instance for AnimeGAN serials model + */ + AnimeGANPostprocessor() {} + + /** \brief Process the result of runtime + * + * \param[in] infer_results The inference results from runtime + * \param[in] results The output results of style transfer + * \return true if the postprocess successed, otherwise false + */ + bool Run(std::vector &infer_results, std::vector *results); +}; + +} // namespace generation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc new file mode 100755 index 0000000000..367f266a6c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/generation/contrib/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace generation { + +bool AnimeGANPreprocessor::Run(std::vector &images, + std::vector *outputs) { + // 1. BGR2RGB + // 2. Convert(opencv style) or Normalize + for (size_t i = 0; i < images.size(); ++i) { + auto ret = BGR2RGB::Run(&images[i]); + if (!ret) { + FDERROR << "Failed to processs image:" << i << " in " + << "BGR2RGB" + << "." << std::endl; + return false; + } + ret = Cast::Run(&images[i], "float"); + if (!ret) { + FDERROR << "Failed to processs image:" << i << " in " + << "Cast" + << "." << std::endl; + return false; + } + std::vector mean{1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + std::vector std{-1.f, -1.f, -1.f}; + ret = Convert::Run(&images[i], mean, std); + if (!ret) { + FDERROR << "Failed to processs image:" << i << " in " + << "Cast" + << "." << std::endl; + return false; + } + } + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images.size()); + for (size_t i = 0; i < images.size(); ++i) { + images[i].ShareWithTensor(&(tensors[i])); + tensors[i].ExpandDim(0); + } + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace generation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h new file mode 100755 index 0000000000..6ba0abcbd1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/function/functions.h" +#include "ultrainfer/vision/common/processors/transform.h" + +namespace ultrainfer { +namespace vision { + +namespace generation { +/*! @brief Preprocessor object for AnimeGAN serials model. + */ +class ULTRAINFER_DECL AnimeGANPreprocessor { +public: + /** \brief Create a preprocessor instance for AnimeGAN serials model + */ + AnimeGANPreprocessor() {} + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * wrapped by FDMat. \param[in] output The output tensors which will feed in + * runtime \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector &images, std::vector *output); +}; + +} // namespace generation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc b/libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc new file mode 100755 index 0000000000..0ef28f028b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc @@ -0,0 +1,26 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindAnimeGAN(pybind11::module &m); + +void BindGeneration(pybind11::module &m) { + auto generation_module = + m.def_submodule("generation", "image generation submodule"); + BindAnimeGAN(generation_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc new file mode 100755 index 0000000000..7d6187a0fd --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/headpose/contrib/fsanet.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace headpose { + +FSANet::FSANet(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool FSANet::Initialize() { + // parameters for preprocess + size = {64, 64}; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool FSANet::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + // Resize + int resize_w = size[0]; + int resize_h = size[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + + // Normalize + std::vector alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f}; + std::vector beta = {-127.5f / 128.0f, -127.5f / 128.0f, + -127.5f / 128.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool FSANet::Postprocess( + FDTensor &infer_result, HeadPoseResult *result, + const std::map> &im_info) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now."); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + auto iter_in = im_info.find("input_shape"); + FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info."); + int in_h = iter_in->second[0]; + int in_w = iter_in->second[1]; + + result->Clear(); + float *data = static_cast(infer_result.Data()); + for (size_t i = 0; i < 3; ++i) { + result->euler_angles.emplace_back(data[i]); + } + + return true; +} + +bool FSANet::Predict(cv::Mat *im, HeadPoseResult *result) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {mat.Height(), mat.Width()}; + im_info["output_shape"] = {mat.Height(), mat.Width()}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors[0], result, im_info)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace headpose +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h new file mode 100755 index 0000000000..1cc2221c7a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace headpose { +/*! @brief FSANet model object used when to load a FSANet model exported by + * FSANet. + */ +class ULTRAINFER_DECL FSANet : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./fsanet-var.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + FSANet(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "FSANet"; } + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output face detection + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, HeadPoseResult *result); + + /// tuple of (width, height), default (64, 64) + std::vector size; + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(FDTensor &infer_result, HeadPoseResult *result, + const std::map> &im_info); +}; + +} // namespace headpose +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc new file mode 100755 index 0000000000..f876af64d5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindFSANet(pybind11::module &m) { + pybind11::class_(m, "FSANet") + .def(pybind11::init()) + .def("predict", + [](vision::headpose::FSANet &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::HeadPoseResult res; + self.Predict(&mat, &res); + return res; + }) + .def_readwrite("size", &vision::headpose::FSANet::size); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc b/libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc new file mode 100755 index 0000000000..e965a8de6e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindFSANet(pybind11::module &m); + +void BindHeadPose(pybind11::module &m) { + auto headpose_module = m.def_submodule("headpose", "Headpose models."); + BindFSANet(headpose_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc new file mode 100755 index 0000000000..0a3319c0c0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc @@ -0,0 +1,26 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindPPTinyPose(pybind11::module &m); + +void BindKeyPointDetection(pybind11::module &m) { + auto keypointdetection_module = m.def_submodule( + "keypointdetection", "Image object keypoint detection models."); + BindPPTinyPose(keypointdetection_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc new file mode 100755 index 0000000000..fc2625b446 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc @@ -0,0 +1,283 @@ +#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose.h" + +#include "ultrainfer/vision/utils/utils.h" +#include "yaml-cpp/yaml.h" +#ifdef ENABLE_PADDLE2ONNX +#include "paddle2onnx/converter.h" +#endif +#include "ultrainfer/vision.h" + +namespace ultrainfer { +namespace vision { +namespace keypointdetection { + +PPTinyPose::PPTinyPose(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + config_file_ = config_file; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_rknpu_backends = {Backend::RKNPU2}; + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool PPTinyPose::Initialize() { + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + return false; + } + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + + return true; +} + +bool PPTinyPose::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + + std::string arch = cfg["arch"].as(); + if (arch != "HRNet" && arch != "HigherHRNet") { + FDERROR << "Require the arch of model is HRNet or HigherHRNet, but arch " + << "defined in " + << "config file is " << arch << "." << std::endl; + return false; + } + + processors_.push_back(std::make_shared()); + + for (const auto &op : cfg["Preprocess"]) { + std::string op_name = op["type"].as(); + if (op_name == "NormalizeImage") { + if (!disable_normalize_) { + auto mean = op["mean"].as>(); + auto std = op["std"].as>(); + bool is_scale = op["is_scale"].as(); + processors_.push_back(std::make_shared(mean, std, is_scale)); + } + } else if (op_name == "Permute") { + if (!disable_permute_) { + // permute = cast + HWC2CHW + processors_.push_back(std::make_shared("float")); + processors_.push_back(std::make_shared()); + } + } else if (op_name == "TopDownEvalAffine") { + auto trainsize = op["trainsize"].as>(); + int height = trainsize[1]; + int width = trainsize[0]; + cv::Mat trans_matrix(2, 3, CV_64FC1); + processors_.push_back( + std::make_shared(trans_matrix, width, height, 1)); + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + return true; +} + +bool PPTinyPose::Preprocess(Mat *mat, std::vector *outputs) { + for (size_t i = 0; i < processors_.size(); ++i) { + if (processors_[i]->Name().compare("WarpAffine") == 0) { + auto processor = dynamic_cast(processors_[i].get()); + float origin_width = static_cast(mat->Width()); + float origin_height = static_cast(mat->Height()); + std::vector center = {origin_width / 2.0f, origin_height / 2.0f}; + std::vector scale = {origin_width, origin_height}; + int resize_width = -1; + int resize_height = -1; + std::tie(resize_width, resize_height) = processor->GetWidthAndHeight(); + cv::Mat trans_matrix(2, 3, CV_64FC1); + GetAffineTransform(center, scale, 0, {resize_width, resize_height}, + &trans_matrix, 0); + if (!(processor->SetTransformMatrix(trans_matrix))) { + FDERROR << "Failed to set transform matrix of " + << processors_[i]->Name() << " processor." << std::endl; + } + } + if (!(*(processors_[i].get()))(mat)) { + FDERROR << "Failed to process image data in " << processors_[i]->Name() + << "." << std::endl; + return false; + } + } + + outputs->resize(1); + (*outputs)[0].name = InputInfoOfRuntime(0).name; + mat->ShareWithTensor(&((*outputs)[0])); + + // reshape to [1, c, h, w] + (*outputs)[0].ExpandDim(0); + + return true; +} + +bool PPTinyPose::Postprocess(std::vector &infer_result, + KeyPointDetectionResult *result, + const std::vector ¢er, + const std::vector &scale) { + FDASSERT(infer_result[0].shape[0] == 1, + "Only support batch = 1 in UltraInfer now."); + result->Clear(); + + if (infer_result.size() == 1) { + FDTensor result_copy = infer_result[0]; + result_copy.Reshape({result_copy.shape[0], result_copy.shape[1], + result_copy.shape[2] * result_copy.shape[3]}); + infer_result.resize(2); + function::ArgMax(result_copy, &infer_result[1], -1); + } + + // Calculate output length + int outdata_size = + std::accumulate(infer_result[0].shape.begin(), + infer_result[0].shape.end(), 1, std::multiplies()); + int idxdata_size = + std::accumulate(infer_result[1].shape.begin(), + infer_result[1].shape.end(), 1, std::multiplies()); + + if (outdata_size < 6) { + FDWARNING << "PPTinyPose No object detected." << std::endl; + } + float *out_data = static_cast(infer_result[0].Data()); + void *idx_data = infer_result[1].Data(); + int idx_dtype = infer_result[1].dtype; + std::vector out_data_shape(infer_result[0].shape.begin(), + infer_result[0].shape.end()); + std::vector idx_data_shape(infer_result[1].shape.begin(), + infer_result[1].shape.end()); + std::vector preds(out_data_shape[1] * 3, 0); + std::vector heatmap(out_data, out_data + outdata_size); + std::vector idxout(idxdata_size); + if (idx_dtype == FDDataType::INT32) { + std::copy(static_cast(idx_data), + static_cast(idx_data) + idxdata_size, idxout.begin()); + } else if (idx_dtype == FDDataType::INT64) { + std::copy(static_cast(idx_data), + static_cast(idx_data) + idxdata_size, idxout.begin()); + } else { + FDERROR << "Only support process inference result with INT32/INT64 data " + "type, but now it's " + << idx_dtype << "." << std::endl; + } + GetFinalPredictions(heatmap, out_data_shape, idxout, center, scale, &preds, + this->use_dark); + result->Reserve(outdata_size); + result->num_joints = out_data_shape[1]; + result->keypoints.clear(); + for (int i = 0; i < out_data_shape[1]; i++) { + result->keypoints.push_back({preds[i * 3 + 1], preds[i * 3 + 2]}); + result->scores.push_back(preds[i * 3]); + } + return true; +} + +bool PPTinyPose::Predict(cv::Mat *im, KeyPointDetectionResult *result) { + std::vector center = {round(im->cols / 2.0f), round(im->rows / 2.0f)}; + std::vector scale = {static_cast(im->cols), + static_cast(im->rows)}; + Mat mat(*im); + std::vector processed_data; + if (!Preprocess(&mat, &processed_data)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + + std::vector infer_result; + if (!Infer(processed_data, &infer_result)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + + if (!Postprocess(infer_result, result, center, scale)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + + return true; +} + +bool PPTinyPose::Predict(cv::Mat *im, KeyPointDetectionResult *result, + const DetectionResult &detection_result) { + std::vector crop_imgs; + std::vector> center_bs; + std::vector> scale_bs; + int crop_imgs_num = 0; + int box_num = detection_result.boxes.size(); + for (int i = 0; i < box_num; i++) { + auto box = detection_result.boxes[i]; + auto label_id = detection_result.label_ids[i]; + int channel = im->channels(); + cv::Mat cv_crop_img(0, 0, CV_32SC(channel)); + Mat crop_img(cv_crop_img); + std::vector rect(box.begin(), box.end()); + std::vector center; + std::vector scale; + if (label_id == 0) { + Mat mat(*im); + utils::CropImageByBox(mat, &crop_img, rect, ¢er, &scale); + center_bs.emplace_back(center); + scale_bs.emplace_back(scale); + crop_imgs.emplace_back(crop_img); + crop_imgs_num += 1; + } + } + for (int i = 0; i < crop_imgs_num; i++) { + std::vector processed_data; + if (!Preprocess(&crop_imgs[i], &processed_data)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + std::vector infer_result; + if (!Infer(processed_data, &infer_result)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + KeyPointDetectionResult one_cropimg_result; + if (!Postprocess(infer_result, &one_cropimg_result, center_bs[i], + scale_bs[i])) { + FDERROR << "Failed to postprocess while using model:" << ModelName() + << "." << std::endl; + return false; + } + if (result->num_joints == -1) { + result->num_joints = one_cropimg_result.num_joints; + } + std::copy(one_cropimg_result.keypoints.begin(), + one_cropimg_result.keypoints.end(), + std::back_inserter(result->keypoints)); + std::copy(one_cropimg_result.scores.begin(), + one_cropimg_result.scores.end(), + std::back_inserter(result->scores)); + } + + return true; +} + +} // namespace keypointdetection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h new file mode 100755 index 0000000000..2bd6f91c3a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h @@ -0,0 +1,116 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h" + +namespace ultrainfer { +namespace vision { +/** \brief All keypoint detection model APIs are defined inside this namespace + * + */ +namespace keypointdetection { + +/*! @brief PPTinyPose model object used when to load a PPTinyPose model exported + * by PaddleDetection + */ +class ULTRAINFER_DECL PPTinyPose : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g pptinypose/model.pdmodel + * \param[in] params_file Path of parameter file, e.g + * pptinypose/model.pdiparams, if the model format is ONNX, this parameter + * will be ignored \param[in] config_file Path of configuration file for + * deployment, e.g pptinypose/infer_cfg.yml \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends` \param[in] model_format Model + * format of the loaded model, default is Paddle format + */ + PPTinyPose(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /// Get model's name + std::string ModelName() const { return "PaddleDetection/PPTinyPose"; } + + /** \brief Predict the keypoint detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread() + * \param[in] result The output keypoint detection result will be writen to + * this structure \return true if the keypoint prediction successed, otherwise + * false + */ + bool Predict(cv::Mat *im, KeyPointDetectionResult *result); + + /** \brief Predict the keypoint detection result with given detection result + * for an input image + * + * \param[in] im The input image data, comes from cv::imread() + * \param[in] result The output keypoint detection result will be writen to + * this structure \param[in] detection_result The structure strores pedestrian + * detection result, which is used to crop image for multi-persons keypoint + * detection \return true if the keypoint prediction successed, otherwise + * false + */ + bool Predict(cv::Mat *im, KeyPointDetectionResult *result, + const DetectionResult &detection_result); + + /** \brief Whether using Distribution-Aware Coordinate Representation for + * Human Pose Estimation(DARK for short) in postprocess, default is true + */ + bool use_dark = true; + + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { + disable_normalize_ = true; + BuildPreprocessPipelineFromConfig(); + } + + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { + disable_permute_ = true; + BuildPreprocessPipelineFromConfig(); + } + +protected: + bool Initialize(); + /// Build the preprocess pipeline from the loaded model + bool BuildPreprocessPipelineFromConfig(); + /// Preprocess an input image, and set the preprocessed results to `outputs` + bool Preprocess(Mat *mat, std::vector *outputs); + + /// Postprocess the inferenced results, and set the final result to `result` + bool Postprocess(std::vector &infer_result, + KeyPointDetectionResult *result, + const std::vector ¢er, + const std::vector &scale); + +private: + std::vector> processors_; + std::string config_file_; + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; +}; +} // namespace keypointdetection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc new file mode 100755 index 0000000000..51891509fe --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPTinyPose(pybind11::module &m) { + pybind11::class_( + m, "PPTinyPose") + .def(pybind11::init()) + .def("predict", + [](vision::keypointdetection::PPTinyPose &self, + pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::KeyPointDetectionResult res; + self.Predict(&mat, &res); + return res; + }) + .def("predict", + [](vision::keypointdetection::PPTinyPose &self, + pybind11::array &data, + vision::DetectionResult &detection_result) { + auto mat = PyArrayToCvMat(data); + vision::KeyPointDetectionResult res; + self.Predict(&mat, &res, detection_result); + return res; + }) + .def("disable_normalize", + [](vision::keypointdetection::PPTinyPose &self) { + self.DisableNormalize(); + }) + .def("disable_permute", + [](vision::keypointdetection::PPTinyPose &self) { + self.DisablePermute(); + }) + .def_readwrite("use_dark", + &vision::keypointdetection::PPTinyPose::use_dark); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc new file mode 100755 index 0000000000..50a708f4cf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h" +#define PI 3.1415926535 +#define HALF_CIRCLE_DEGREE 180 + +namespace ultrainfer { +namespace vision { +namespace keypointdetection { + +cv::Point2f Get3dPoint(const cv::Point2f &a, const cv::Point2f &b) { + cv::Point2f direct{a.x - b.x, a.y - b.y}; + return cv::Point2f(a.x - direct.y, a.y + direct.x); +} + +std::vector GetDir(const float src_point_x, const float src_point_y, + const float rot_rad) { + float sn = sin(rot_rad); + float cs = cos(rot_rad); + std::vector src_result{0.0, 0.0}; + src_result[0] = src_point_x * cs - src_point_y * sn; + src_result[1] = src_point_x * sn + src_point_y * cs; + return src_result; +} + +void AffineTransform(const float pt_x, const float pt_y, const cv::Mat &trans, + std::vector *preds, const int p) { + double new1[3] = {pt_x, pt_y, 1.0}; + cv::Mat new_pt(3, 1, trans.type(), new1); + cv::Mat w = trans * new_pt; + (*preds)[p * 3 + 1] = static_cast(w.at(0, 0)); + (*preds)[p * 3 + 2] = static_cast(w.at(1, 0)); +} + +void GetAffineTransform(const std::vector ¢er, + const std::vector &scale, const float rot, + const std::vector &output_size, cv::Mat *trans, + const int inv) { + float src_w = scale[0]; + float dst_w = static_cast(output_size[0]); + float dst_h = static_cast(output_size[1]); + float rot_rad = rot * PI / HALF_CIRCLE_DEGREE; + std::vector src_dir = GetDir(-0.5 * src_w, 0, rot_rad); + std::vector dst_dir{-0.5f * dst_w, 0.0}; + cv::Point2f srcPoint2f[3], dstPoint2f[3]; + srcPoint2f[0] = cv::Point2f(center[0], center[1]); + srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]); + srcPoint2f[2] = Get3dPoint(srcPoint2f[0], srcPoint2f[1]); + + dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5); + dstPoint2f[1] = + cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]); + dstPoint2f[2] = Get3dPoint(dstPoint2f[0], dstPoint2f[1]); + if (inv == 0) { + (*trans) = cv::getAffineTransform(srcPoint2f, dstPoint2f); + } else { + (*trans) = cv::getAffineTransform(dstPoint2f, srcPoint2f); + } +} + +void TransformPreds(std::vector &coords, + const std::vector ¢er, + const std::vector &scale, + const std::vector &output_size, + const std::vector &dim, + std::vector *target_coords) { + cv::Mat trans(2, 3, CV_64FC1); + GetAffineTransform(center, scale, 0, output_size, &trans, 1); + for (int p = 0; p < dim[1]; ++p) { + AffineTransform(coords[p * 2], coords[p * 2 + 1], trans, target_coords, p); + } +} + +void GetFinalPredictions(const std::vector &heatmap, + const std::vector &dim, + const std::vector &idxout, + const std::vector ¢er, + const std::vector scale, + std::vector *preds, const bool DARK) { + std::vector coords(dim[1] * 2); + + int heatmap_height = dim[2]; + int heatmap_width = dim[3]; + for (int j = 0; j < dim[1]; ++j) { + int index = j * dim[2] * dim[3]; + int idx = idxout[j]; + (*preds)[j * 3] = heatmap[index + idx]; + coords[j * 2] = idx % heatmap_width; + coords[j * 2 + 1] = idx / heatmap_width; + int px = int(coords[j * 2] + 0.5); + int py = int(coords[j * 2 + 1] + 0.5); + if (DARK && px > 1 && px < heatmap_width - 2) { + utils::DarkParse(heatmap, dim, &coords, px, py, index, j); + } else { + if (px > 0 && px < heatmap_width - 1) { + float diff_x = heatmap[index + py * dim[3] + px + 1] - + heatmap[index + py * dim[3] + px - 1]; + coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25; + } + if (py > 0 && py < heatmap_height - 1) { + float diff_y = heatmap[index + (py + 1) * dim[3] + px] - + heatmap[index + (py - 1) * dim[3] + px]; + coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25; + } + } + } + std::vector img_size{heatmap_width, heatmap_height}; + TransformPreds(coords, center, scale, img_size, dim, preds); +} + +} // namespace keypointdetection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h new file mode 100755 index 0000000000..5db2da1517 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h @@ -0,0 +1,51 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace keypointdetection { + +cv::Point2f Get3dPoint(const cv::Point2f &a, const cv::Point2f &b); + +std::vector GetDir(const float src_point_x, const float src_point_y, + const float rot_rad); + +void GetAffineTransform(const std::vector ¢er, + const std::vector &scale, const float rot, + const std::vector &output_size, cv::Mat *trans, + const int inv); + +void AffineTransform(const float pt_x, const float pt_y, const cv::Mat &trans, + std::vector *preds, const int p); + +void TransformPreds(std::vector &coords, + const std::vector ¢er, + const std::vector &scale, + const std::vector &output_size, + const std::vector &dim, + std::vector *target_coords); + +void GetFinalPredictions(const std::vector &heatmap, + const std::vector &dim, + const std::vector &idxout, + const std::vector ¢er, + const std::vector scale, + std::vector *preds, const bool DARK); + +} // namespace keypointdetection +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc new file mode 100755 index 0000000000..3ef17008ee --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc @@ -0,0 +1,155 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/matting/contrib/modnet.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace matting { + +MODNet::MODNet(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool MODNet::Initialize() { + // parameters for preprocess + size = {256, 256}; + alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + beta = {-1.f, -1.f, -1.f}; // RGB + swap_rb = true; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool MODNet::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + // 1. Resize + // 2. BGR2RGB + // 3. Convert(opencv style) or Normalize + // 4. HWC2CHW + int resize_w = size[0]; + int resize_h = size[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (swap_rb) { + BGR2RGB::Run(mat); + } + + Convert::Run(mat, alpha, beta); + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w + return true; +} + +bool MODNet::Postprocess( + std::vector &infer_result, MattingResult *result, + const std::map> &im_info) { + FDASSERT((infer_result.size() == 1), + "The default number of output tensor must be 1 according to " + "modnet."); + FDTensor &alpha_tensor = infer_result.at(0); // (1, 1, h, w) + FDASSERT((alpha_tensor.shape[0] == 1), "Only support batch =1 now."); + if (alpha_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + auto iter_ipt = im_info.find("input_shape"); + auto iter_out = im_info.find("output_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + int out_h = iter_out->second[0]; + int out_w = iter_out->second[1]; + int ipt_h = iter_ipt->second[0]; + int ipt_w = iter_ipt->second[1]; + + float *alpha_ptr = static_cast(alpha_tensor.Data()); + // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); + // Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy. + Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, + alpha_ptr); // ref-only, zero copy. + if ((out_h != ipt_h) || (out_w != ipt_w)) { + Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1); + } + + result->Clear(); + // note: must be setup shape before Resize + result->contain_foreground = false; + result->shape = {static_cast(ipt_h), static_cast(ipt_w)}; + int numel = ipt_h * ipt_w; + int nbytes = numel * sizeof(float); + result->Resize(numel); + std::memcpy(result->alpha.data(), alpha_resized.Data(), nbytes); + return true; +} + +bool MODNet::Predict(cv::Mat *im, MattingResult *result) { + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {mat.Height(), mat.Width()}; + im_info["output_shape"] = {mat.Height(), mat.Width()}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors, result, im_info)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace matting +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h new file mode 100755 index 0000000000..c1008a1e92 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h @@ -0,0 +1,87 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { + +namespace matting { +/*! @brief MODNet model object used when to load a MODNet model exported by + * MODNet. + */ +class ULTRAINFER_DECL MODNet : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./modnet.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is ONNX format + */ + MODNet(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "matting/MODNet"; } + + /*! @brief + Argument for image preprocessing step, tuple of (width, height), decide the + target size after resize, default (256, 256) + */ + std::vector size; + /*! @brief + Argument for image preprocessing step, parameters for normalization, size + should be the the same as channels, default alpha = {1.f / 127.5f, 1.f / + 127.5f, 1.f / 127.5f} + */ + std::vector alpha; + /*! @brief + Argument for image preprocessing step, parameters for normalization, size + should be the the same as channels, default beta = {-1.f, -1.f, -1.f} + */ + std::vector beta; + /*! @brief + Argument for image preprocessing step, whether to swap the B and R channel, + such as BGR->RGB, default true. + */ + bool swap_rb; + /** \brief Predict the matting result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output matting result + * will be writen to this structure \return true if the prediction successed, + * otherwise false + */ + bool Predict(cv::Mat *im, MattingResult *result); + +private: + bool Initialize(); + + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + bool Postprocess(std::vector &infer_result, MattingResult *result, + const std::map> &im_info); +}; + +} // namespace matting +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc new file mode 100755 index 0000000000..0432929c00 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindMODNet(pybind11::module &m) { + // Bind MODNet + pybind11::class_(m, "MODNet") + .def(pybind11::init()) + .def("predict", + [](vision::matting::MODNet &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::MattingResult res; + self.Predict(&mat, &res); + return res; + }) + .def_readwrite("size", &vision::matting::MODNet::size) + .def_readwrite("alpha", &vision::matting::MODNet::alpha) + .def_readwrite("beta", &vision::matting::MODNet::beta) + .def_readwrite("swap_rb", &vision::matting::MODNet::swap_rb); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc new file mode 100755 index 0000000000..d8c4ee4c79 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/matting/contrib/rvm.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { + +namespace vision { + +namespace matting { + +RobustVideoMatting::RobustVideoMatting(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool RobustVideoMatting::Initialize() { + // parameters for preprocess + size = {1080, 1920}; + + video_mode = true; + + swap_rb = true; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool RobustVideoMatting::Preprocess( + Mat *mat, FDTensor *output, + std::map> *im_info) { + // Resize + int resize_w = size[0]; + int resize_h = size[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + // Convert_and_permute(swap_rb=true) + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, swap_rb); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, c, h, w + return true; +} + +bool RobustVideoMatting::Postprocess( + std::vector &infer_result, MattingResult *result, + const std::map> &im_info) { + FDASSERT((infer_result.size() == 6), + "The default number of output tensor must be 6 according to " + "RobustVideoMatting."); + FDTensor &fgr = infer_result.at(0); // fgr (1, 3, h, w) 0.~1. + FDTensor &alpha = infer_result.at(1); // alpha (1, 1, h, w) 0.~1. + FDASSERT((fgr.shape[0] == 1), "Only support batch = 1 now."); + FDASSERT((alpha.shape[0] == 1), "Only support batch = 1 now."); + if (fgr.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + if (alpha.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + // update context + if (video_mode) { + for (size_t i = 0; i < 4; ++i) { + FDTensor &rki = infer_result.at(i + 2); + dynamic_inputs_dims_[i] = rki.shape; + dynamic_inputs_datas_[i].resize(rki.Numel()); + memcpy(dynamic_inputs_datas_[i].data(), rki.Data(), + rki.Numel() * FDDataTypeSize(rki.dtype)); + } + } + + auto iter_in = im_info.find("input_shape"); + auto iter_out = im_info.find("output_shape"); + FDASSERT(iter_out != im_info.end() && iter_in != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + int out_h = iter_out->second[0]; + int out_w = iter_out->second[1]; + int in_h = iter_in->second[0]; + int in_w = iter_in->second[1]; + + // for alpha + float *alpha_ptr = static_cast(alpha.Data()); + Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, + alpha_ptr); // ref-only, zero copy. + if ((out_h != in_h) || (out_w != in_w)) { + Resize::Run(&alpha_resized, in_w, in_h, -1, -1); + } + + // for foreground + float *fgr_ptr = static_cast(fgr.Data()); + Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, + fgr_ptr); // ref-only, zero copy. + if ((out_h != in_h) || (out_w != in_w)) { + Resize::Run(&fgr_resized, in_w, in_h, -1, -1); + } + + result->contain_foreground = true; + // if contain_foreground == true, shape must set to (h, w, c) + result->shape = {static_cast(in_h), static_cast(in_w), 3}; + int numel = in_h * in_w; + int nbytes = numel * sizeof(float); + result->Resize(numel); + memcpy(result->alpha.data(), alpha_resized.Data(), nbytes); + memcpy(result->foreground.data(), fgr_resized.Data(), nbytes); + return true; +} + +bool RobustVideoMatting::Predict(cv::Mat *im, MattingResult *result) { + Mat mat(*im); + int inputs_nums = NumInputsOfRuntime(); + std::vector input_tensors(inputs_nums); + std::map> im_info; + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {mat.Height(), mat.Width()}; + im_info["output_shape"] = {mat.Height(), mat.Width()}; + // convert vector to FDTensor + for (size_t i = 1; i < inputs_nums; ++i) { + input_tensors[i].SetExternalData(dynamic_inputs_dims_[i - 1], + FDDataType::FP32, + dynamic_inputs_datas_[i - 1].data()); + input_tensors[i].device = Device::CPU; + } + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + for (size_t i = 0; i < inputs_nums; ++i) { + input_tensors[i].name = InputInfoOfRuntime(i).name; + } + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors, result, im_info)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +} // namespace matting +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h new file mode 100755 index 0000000000..ac9a2fde8b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { + +namespace vision { +/** \brief All image/video matting model APIs are defined inside this namespace + * + */ +namespace matting { + +/*! @brief RobustVideoMatting model object used when to load a + * RobustVideoMatting model exported by RobustVideoMatting + */ +class ULTRAINFER_DECL RobustVideoMatting : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g rvm/rvm_mobilenetv3_fp32.onnx + * \param[in] params_file Path of parameter file, if the model format is ONNX, + * this parameter will be ignored \param[in] custom_option RuntimeOption for + * inference, the default will use cpu, and choose the backend defined in + * `valid_cpu_backends` \param[in] model_format Model format of the loaded + * model, default is ONNX format + */ + RobustVideoMatting(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::ONNX); + + /// Get model's name + std::string ModelName() const { return "matting/RobustVideoMatting"; } + + /** \brief Predict the matting result for an input image + * + * \param[in] im The input image data, comes from cv::imread() + * \param[in] result The output matting result will be writen to this + * structure \return true if the prediction successed, otherwise false + */ + bool Predict(cv::Mat *im, MattingResult *result); + + /// Preprocess image size, the default is (1080, 1920) + std::vector size; + + /// Whether to open the video mode, if there are some irrelevant pictures, set + /// it to fasle, the default is true // NOLINT + bool video_mode; + + /// Whether convert to RGB, Set to false if you have converted YUV format + /// images to RGB outside the model, dafault true // NOLINT + bool swap_rb; + +private: + bool Initialize(); + /// Preprocess an input image, and set the preprocessed results to `outputs` + bool Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info); + + /// Postprocess the inferenced results, and set the final result to `result` + bool Postprocess(std::vector &infer_result, MattingResult *result, + const std::map> &im_info); + + /// Init dynamic inputs datas + std::vector> dynamic_inputs_datas_ = { + {0.0f}, // r1i + {0.0f}, // r2i + {0.0f}, // r3i + {0.0f}, // r4i + {0.25f}, // downsample_ratio + }; + + /// Init dynamic inputs dims + std::vector> dynamic_inputs_dims_ = { + {1, 1, 1, 1}, // r1i + {1, 1, 1, 1}, // r2i + {1, 1, 1, 1}, // r3i + {1, 1, 1, 1}, // r4i + {1}, // downsample_ratio + }; +}; + +} // namespace matting +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc new file mode 100755 index 0000000000..a54f7779cf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindRobustVideoMatting(pybind11::module &m) { + // Bind RobustVideoMatting + pybind11::class_( + m, "RobustVideoMatting") + .def(pybind11::init()) + .def( + "predict", + [](vision::matting::RobustVideoMatting &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::MattingResult res; + self.Predict(&mat, &res); + return res; + }) + .def_readwrite("size", &vision::matting::RobustVideoMatting::size) + .def_readwrite("video_mode", + &vision::matting::RobustVideoMatting::video_mode) + .def_readwrite("swap_rb", &vision::matting::RobustVideoMatting::swap_rb); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc new file mode 100755 index 0000000000..5986a7b594 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindMODNet(pybind11::module &m); +void BindRobustVideoMatting(pybind11::module &m); +void BindPPMatting(pybind11::module &m); + +void BindMatting(pybind11::module &m) { + auto matting_module = + m.def_submodule("matting", "Image/Video matting models."); + BindMODNet(matting_module); + BindRobustVideoMatting(matting_module); + BindPPMatting(matting_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc new file mode 100755 index 0000000000..8a234a31be --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc @@ -0,0 +1,234 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/matting/ppmatting/ppmatting.h" + +#include "ultrainfer/vision/utils/utils.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace matting { + +PPMatting::PPMatting(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + config_file_ = config_file; + valid_cpu_backends = {Backend::ORT, Backend::PDINFER, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool PPMatting::Initialize() { + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + return false; + } + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool PPMatting::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + processors_.push_back(std::make_shared()); + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + + FDASSERT((cfg["Deploy"]["input_shape"]), + "The yaml file should include input_shape parameters"); + // input_shape + // b c h w + auto input_shape = cfg["Deploy"]["input_shape"].as>(); + FDASSERT(input_shape.size() == 4, + "The input_shape in yaml file need to be 4-dimensions, but now its " + "dimension is %zu.", + input_shape.size()); + + is_fixed_input_shape_ = false; + if (input_shape[2] > 0 && input_shape[3] > 0) { + is_fixed_input_shape_ = true; + } + if (input_shape[2] < 0 || input_shape[3] < 0) { + FDWARNING << "Detected dynamic input shape of your model, only Paddle " + "Inference / OpenVINO support this model now." + << std::endl; + } + if (cfg["Deploy"]["transforms"]) { + auto preprocess_cfg = cfg["Deploy"]["transforms"]; + int long_size = -1; + for (const auto &op : preprocess_cfg) { + FDASSERT(op.IsMap(), + "Require the transform information in yaml be Map type."); + if (op["type"].as() == "LimitShort") { + int max_short = op["max_short"] ? op["max_short"].as() : -1; + int min_short = op["min_short"] ? op["min_short"].as() : -1; + if (is_fixed_input_shape_) { + // if the input shape is fixed, will resize by scale, and the max + // shape will not exceed input_shape + long_size = max_short; + std::vector max_size = {input_shape[2], input_shape[3]}; + processors_.push_back( + std::make_shared(long_size, 1, true, max_size)); + } else { + processors_.push_back( + std::make_shared(max_short, min_short)); + } + } else if (op["type"].as() == "ResizeToIntMult") { + if (is_fixed_input_shape_) { + std::vector max_size = {input_shape[2], input_shape[3]}; + processors_.push_back( + std::make_shared(long_size, 1, true, max_size)); + } else { + int mult_int = op["mult_int"] ? op["mult_int"].as() : 32; + processors_.push_back(std::make_shared(mult_int)); + } + } else if (op["type"].as() == "Normalize") { + std::vector mean = {0.5, 0.5, 0.5}; + std::vector std = {0.5, 0.5, 0.5}; + if (op["mean"]) { + mean = op["mean"].as>(); + } + if (op["std"]) { + std = op["std"].as>(); + } + processors_.push_back(std::make_shared(mean, std)); + } else if (op["type"].as() == "ResizeByShort") { + long_size = op["short_size"].as(); + if (is_fixed_input_shape_) { + std::vector max_size = {input_shape[2], input_shape[3]}; + processors_.push_back( + std::make_shared(long_size, 1, true, max_size)); + } else { + processors_.push_back(std::make_shared(long_size)); + } + } + } + // the default padding value is {127.5,127.5,127.5} so after normalizing, + // ((127.5/255)-0.5)/0.5 = 0.0 + std::vector value = {0.0, 0.0, 0.0}; + processors_.push_back(std::make_shared("float")); + processors_.push_back( + std::make_shared(input_shape[3], input_shape[2], value)); + processors_.push_back(std::make_shared()); + } + + return true; +} + +bool PPMatting::Preprocess(Mat *mat, FDTensor *output, + std::map> *im_info) { + (*im_info)["input_shape"] = {mat->Height(), mat->Width()}; + for (size_t i = 0; i < processors_.size(); ++i) { + if (!(*(processors_[i].get()))(mat)) { + FDERROR << "Failed to process image data in " << processors_[i]->Name() + << "." << std::endl; + return false; + } + } + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); + output->name = InputInfoOfRuntime(0).name; + return true; +} + +bool PPMatting::Postprocess( + std::vector &infer_result, MattingResult *result, + const std::map> &im_info) { + FDASSERT((infer_result.size() == 1), + "The default number of output tensor must be 1 "); + FDTensor &alpha_tensor = infer_result.at(0); // (1, 1, h, w) + FDASSERT((alpha_tensor.shape[0] == 1), "Only support batch = 1 now."); + if (alpha_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + std::vector dim{0, 2, 3, 1}; + function::Transpose(alpha_tensor, &alpha_tensor, dim); + alpha_tensor.Squeeze(0); + Mat mat = Mat::Create(alpha_tensor); + + auto iter_ipt = im_info.find("input_shape"); + auto iter_out = im_info.find("output_shape"); + if (is_fixed_input_shape_) { + double scale_h = static_cast(iter_out->second[0]) / + static_cast(iter_ipt->second[0]); + double scale_w = static_cast(iter_out->second[1]) / + static_cast(iter_ipt->second[1]); + double actual_scale = std::min(scale_h, scale_w); + + int size_before_pad_h = round(actual_scale * iter_ipt->second[0]); + int size_before_pad_w = round(actual_scale * iter_ipt->second[1]); + + Crop::Run(&mat, 0, 0, size_before_pad_w, size_before_pad_h); + } + + Resize::Run(&mat, iter_ipt->second[1], iter_ipt->second[0], -1.0f, -1.0f, 1, + false, ProcLib::OPENCV); + + result->Clear(); + // note: must be setup shape before Resize + result->contain_foreground = false; + result->shape = {iter_ipt->second[0], iter_ipt->second[1]}; + int numel = iter_ipt->second[0] * iter_ipt->second[1]; + int nbytes = numel * sizeof(float); + result->Resize(numel); + std::memcpy(result->alpha.data(), mat.Data(), nbytes); + return true; +} + +bool PPMatting::Predict(cv::Mat *im, MattingResult *result) { + Mat mat(*im); + std::vector processed_data(1); + + std::map> im_info; + + if (!Preprocess(&mat, &(processed_data[0]), &im_info)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + std::vector infer_result(1); + if (!Infer(processed_data, &infer_result)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + if (!Postprocess(infer_result, result, im_info)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +} // namespace matting +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h new file mode 100755 index 0000000000..ebd45a8248 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +/** \brief All object matting model APIs are defined inside this namespace + * + */ +namespace matting { +/*! @brief PPMatting model object used when to load a PPMatting model exported + * by PPMatting. + */ +class ULTRAINFER_DECL PPMatting : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g PPMatting-512/model.pdmodel + * \param[in] params_file Path of parameter file, e.g + * PPMatting-512/model.pdiparams, if the model format is ONNX, this parameter + * will be ignored \param[in] config_file Path of configuration file for + * deployment, e.g PPMatting-512/infer_cfg.yml \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends` \param[in] model_format Model + * format of the loaded model, default is Paddle format + */ + PPMatting(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + std::string ModelName() const { return "PaddleMatting"; } + /** \brief Predict the matting result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output matting result + * will be writen to this structure \return true if the prediction successed, + * otherwise false + */ + virtual bool Predict(cv::Mat *im, MattingResult *result); + +private: + bool Initialize(); + + bool BuildPreprocessPipelineFromConfig(); + + bool Preprocess(Mat *mat, FDTensor *outputs, + std::map> *im_info); + + bool Postprocess(std::vector &infer_result, MattingResult *result, + const std::map> &im_info); + + std::vector> processors_; + std::string config_file_; + bool is_fixed_input_shape_; +}; + +} // namespace matting +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc new file mode 100755 index 0000000000..6a41147230 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPMatting(pybind11::module &m) { + pybind11::class_(m, "PPMatting") + .def(pybind11::init()) + .def("predict", + [](vision::matting::PPMatting &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::MattingResult res; + self.Predict(&mat, &res); + return res; + }); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc new file mode 100755 index 0000000000..1636646a54 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindPPOCRModel(pybind11::module &m); +void BindPPOCRv4(pybind11::module &m); +void BindPPOCRv3(pybind11::module &m); +void BindPPOCRv2(pybind11::module &m); +void BindPPStructureV2Table(pybind11::module &m); + +void BindOcr(pybind11::module &m) { + auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models"); + BindPPOCRModel(ocr_module); + BindPPOCRv4(ocr_module); + BindPPOCRv3(ocr_module); + BindPPOCRv2(ocr_module); + BindPPStructureV2Table(ocr_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc new file mode 100755 index 0000000000..649aa330bd --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc @@ -0,0 +1,128 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/classifier.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +Classifier::Classifier() {} +Classifier::Classifier(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + + initialized = Initialize(); +} + +bool Classifier::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + + return true; +} + +std::unique_ptr Classifier::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(Classifier(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool Classifier::Predict(const cv::Mat &img, int32_t *cls_label, + float *cls_score) { + std::vector cls_labels(1); + std::vector cls_scores(1); + bool success = BatchPredict({img}, &cls_labels, &cls_scores); + if (!success) { + return success; + } + *cls_label = cls_labels[0]; + *cls_score = cls_scores[0]; + return true; +} + +bool Classifier::Predict(const cv::Mat &img, vision::OCRResult *ocr_result) { + ocr_result->cls_labels.resize(1); + ocr_result->cls_scores.resize(1); + if (!Predict(img, &(ocr_result->cls_labels[0]), + &(ocr_result->cls_scores[0]))) { + return false; + } + return true; +} + +bool Classifier::BatchPredict(const std::vector &images, + vision::OCRResult *ocr_result) { + return BatchPredict(images, &(ocr_result->cls_labels), + &(ocr_result->cls_scores)); +} + +bool Classifier::BatchPredict(const std::vector &images, + std::vector *cls_labels, + std::vector *cls_scores) { + return BatchPredict(images, cls_labels, cls_scores, 0, images.size()); +} + +bool Classifier::BatchPredict(const std::vector &images, + std::vector *cls_labels, + std::vector *cls_scores, + size_t start_index, size_t end_index) { + size_t total_size = images.size(); + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, + end_index)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, + start_index, total_size)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h new file mode 100755 index 0000000000..d54e3dc378 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h @@ -0,0 +1,123 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/cls_postprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/cls_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { +/** \brief All OCR series model APIs are defined inside this namespace + * + */ +namespace ocr { +/*! @brief Classifier object is used to load the classification model provided + * by PaddleOCR. + */ +class ULTRAINFER_DECL Classifier : public UltraInferModel { +public: + Classifier(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel. \param[in] params_file Path + * of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if + * the model format is ONNX, this parameter will be ignored. \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in `valid_cpu_backends`. \param[in] model_format + * Model format of the loaded model, default is Paddle format. + */ + Classifier(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new Classifier with less memory usage when multiple + * instances of the same model are created + * + * \return new Classifier* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "ppocr/ocr_cls"; } + + /** \brief Predict the input image and get OCR classification model + * cls_result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] cls_label The label result of + * cls model will be written in to this param. \param[in] cls_score The score + * result of cls model will be written in to this param. \return true if the + * prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, int32_t *cls_label, + float *cls_score); + + /** \brief Predict the input image and get OCR recognition model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR + * recognition model result will be writen to this structure. \return true if + * the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result); + + /** \brief BatchPredict the input image and get OCR classification model + * result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR + * classification model result will be writen to this structure. \return true + * if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + vision::OCRResult *ocr_result); + + /** \brief BatchPredict the input image and get OCR classification model + * cls_result. + * + * \param[in] images The list of input image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] cls_labels The label + * results of cls model will be written in to this vector. \param[in] + * cls_scores The score results of cls model will be written in to this + * vector. \return true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *cls_labels, + std::vector *cls_scores); + virtual bool BatchPredict(const std::vector &images, + std::vector *cls_labels, + std::vector *cls_scores, size_t start_index, + size_t end_index); + + /// Get preprocessor reference of ClassifierPreprocessor + virtual ClassifierPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of ClassifierPostprocessor + virtual ClassifierPostprocessor &GetPostprocessor() { return postprocessor_; } + +private: + bool Initialize(); + ClassifierPreprocessor preprocessor_; + ClassifierPostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc new file mode 100755 index 0000000000..9f50d4b6b5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/cls_postprocessor.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +bool SingleBatchPostprocessor(const float *out_data, const size_t &length, + int *cls_label, float *cls_score) { + + *cls_label = std::distance(&out_data[0], + std::max_element(&out_data[0], &out_data[length])); + + *cls_score = float(*std::max_element(&out_data[0], &out_data[length])); + return true; +} + +bool ClassifierPostprocessor::Run(const std::vector &tensors, + std::vector *cls_labels, + std::vector *cls_scores) { + size_t total_size = tensors[0].shape[0]; + return Run(tensors, cls_labels, cls_scores, 0, total_size); +} + +bool ClassifierPostprocessor::Run(const std::vector &tensors, + std::vector *cls_labels, + std::vector *cls_scores, + size_t start_index, size_t total_size) { + // Classifier have only 1 output tensor. + const FDTensor &tensor = tensors[0]; + + // For Classifier, the output tensor shape = [batch,2] + size_t batch = tensor.shape[0]; + size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1, + std::multiplies()); + + if (batch <= 0) { + FDERROR << "The infer outputTensor.shape[0] <=0, wrong infer result." + << std::endl; + return false; + } + if (start_index < 0 || total_size <= 0) { + FDERROR << "start_index or total_size error. Correct is: 0 <= start_index " + "< total_size" + << std::endl; + return false; + } + if ((start_index + batch) > total_size) { + FDERROR << "start_index or total_size error. Correct is: start_index + " + "batch(outputTensor.shape[0]) <= total_size" + << std::endl; + return false; + } + + cls_labels->resize(total_size); + cls_scores->resize(total_size); + const float *tensor_data = reinterpret_cast(tensor.Data()); + for (int i_batch = 0; i_batch < batch; ++i_batch) { + SingleBatchPostprocessor(tensor_data + i_batch * length, length, + &cls_labels->at(i_batch + start_index), + &cls_scores->at(i_batch + start_index)); + } + + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h new file mode 100755 index 0000000000..6991ad04d7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h @@ -0,0 +1,54 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Postprocessor object for Classifier serials model. + */ +class ULTRAINFER_DECL ClassifierPostprocessor { +public: + /** \brief Process the result of runtime and fill to ClassifyResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] cls_labels The output label results of classification model + * \param[in] cls_scores The output score results of classification model + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *cls_labels, std::vector *cls_scores); + + bool Run(const std::vector &tensors, + std::vector *cls_labels, std::vector *cls_scores, + size_t start_index, size_t total_size); + + /// Set threshold for the classification postprocess, default is 0.9 + void SetClsThresh(float cls_thresh) { cls_thresh_ = cls_thresh; } + + /// Get threshold value of the classification postprocess. + float GetClsThresh() const { return cls_thresh_; } + +private: + float cls_thresh_ = 0.9; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc new file mode 100755 index 0000000000..078249629f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/cls_preprocessor.h" + +#include "ultrainfer/function/concat.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +ClassifierPreprocessor::ClassifierPreprocessor() { + resize_op_ = std::make_shared(-1, -1); + + std::vector value = {0, 0, 0}; + pad_op_ = std::make_shared(0, 0, 0, 0, value); + + normalize_op_ = + std::make_shared(std::vector({0.5f, 0.5f, 0.5f}), + std::vector({0.5f, 0.5f, 0.5f}), true); + hwc2chw_op_ = std::make_shared(); +} + +void ClassifierPreprocessor::OcrClassifierResizeImage( + FDMat *mat, const std::vector &cls_image_shape) { + int img_c = cls_image_shape[0]; + int img_h = cls_image_shape[1]; + int img_w = cls_image_shape[2]; + + float ratio = float(mat->Width()) / float(mat->Height()); + + int resize_w; + if (ceilf(img_h * ratio) > img_w) + resize_w = img_w; + else + resize_w = int(ceilf(img_h * ratio)); + + resize_op_->SetWidthAndHeight(resize_w, img_h); + (*resize_op_)(mat); +} + +bool ClassifierPreprocessor::Run(std::vector *images, + std::vector *outputs, + size_t start_index, size_t end_index) { + if (images->size() == 0 || start_index < 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; + return false; + } + + std::vector mats(end_index - start_index); + for (size_t i = start_index; i < end_index; ++i) { + mats[i - start_index] = images->at(i); + } + return Run(&mats, outputs); +} + +bool ClassifierPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + OcrClassifierResizeImage(mat, cls_image_shape_); + if (!disable_normalize_) { + (*normalize_op_)(mat); + } + std::vector value = {0, 0, 0}; + if (mat->Width() < cls_image_shape_[2]) { + pad_op_->SetPaddingSize(0, 0, 0, cls_image_shape_[2] - mat->Width()); + (*pad_op_)(mat); + } + if (!disable_permute_) { + (*hwc2chw_op_)(mat); + } + } + // Only have 1 output tensor. + outputs->resize(1); + // Get the NCHW tensor + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h new file mode 100755 index 0000000000..f24468db8c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Preprocessor object for Classifier serials model. + */ +class ULTRAINFER_DECL ClassifierPreprocessor : public ProcessorManager { +public: + ClassifierPreprocessor(); + using ProcessorManager::Run; + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input data list, all the elements are FDMat + * \param[in] outputs The output tensors which will be fed into runtime + * \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + size_t start_index, size_t end_index); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// Set preprocess normalize parameters, please call this API to customize + /// the normalize parameters, otherwise it will use the default normalize + /// parameters. + void SetNormalize(const std::vector &mean, + const std::vector &std, bool is_scale) { + normalize_op_ = std::make_shared(mean, std, is_scale); + } + + /// Set cls_image_shape for the classification preprocess + void SetClsImageShape(const std::vector &cls_image_shape) { + cls_image_shape_ = cls_image_shape; + } + /// Get cls_image_shape for the classification preprocess + std::vector GetClsImageShape() const { return cls_image_shape_; } + + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + +private: + void OcrClassifierResizeImage(FDMat *mat, + const std::vector &cls_image_shape); + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + std::vector cls_image_shape_ = {3, 48, 192}; + + std::shared_ptr resize_op_; + std::shared_ptr pad_op_; + std::shared_ptr normalize_op_; + std::shared_ptr hwc2chw_op_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc new file mode 100755 index 0000000000..5d6598a5fa --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc @@ -0,0 +1,124 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/dbcurvedetector.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +DBCURVEDetector::DBCURVEDetector() {} +DBCURVEDetector::DBCURVEDetector(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +// Init +bool DBCURVEDetector::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +std::unique_ptr DBCURVEDetector::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(DBCURVEDetector(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool DBCURVEDetector::Predict(const cv::Mat &img, + std::vector> *boxes_result) { + std::vector>> det_results; + if (!BatchPredict({img}, &det_results)) { + return false; + } + *boxes_result = std::move(det_results[0]); + return true; +} + +bool DBCURVEDetector::Predict(const cv::Mat &img, + vision::OCRCURVEResult *ocr_result) { + if (!Predict(img, &(ocr_result->boxes))) { + return false; + } + return true; +} + +bool DBCURVEDetector::BatchPredict( + const std::vector &images, + std::vector *ocr_results) { + std::vector>> det_results; + if (!BatchPredict(images, &det_results)) { + return false; + } + ocr_results->resize(det_results.size()); + for (int i = 0; i < det_results.size(); i++) { + (*ocr_results)[i].boxes = std::move(det_results[i]); + } + return true; +} + +bool DBCURVEDetector::BatchPredict( + const std::vector &images, + std::vector>> *det_results) { + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + auto batch_det_img_info = preprocessor_.GetBatchImgInfo(); + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, det_results, + *batch_det_img_info)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h new file mode 100755 index 0000000000..999430eb8e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h @@ -0,0 +1,118 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h" +#include "ultrainfer/vision/ocr/ppocr/det_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { +/** \brief All OCR series model APIs are defined inside this namespace + * + */ +namespace ocr { + +/*! @brief DBCURVEDetector object is used to load the detection model provided + * by PaddleOCR. + */ +class ULTRAINFER_DECL DBCURVEDetector : public UltraInferModel { +public: + DBCURVEDetector(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ./ch_PP-OCRv3_det_infer/model.pdmodel. \param[in] params_file Path of + * parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model + * format is ONNX, this parameter will be ignored. \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends`. \param[in] model_format Model + * format of the loaded model, default is Paddle format. + */ + DBCURVEDetector(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new DBCURVEDetector with less memory usage when multiple + * instances of the same model are created + * + * \return new DBCURVEDetector* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "ppocr/ocr_det"; } + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] boxes_result The output of + * OCR detection model result will be writen to this structure. \return true + * if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, + std::vector> *boxes_result); + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR + * detection model result will be writen to this structure. \return true if + * the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, vision::OCRCURVEResult *ocr_result); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] det_results The output + * of OCR detection model result will be writen to this structure. \return + * true if the prediction is successed, otherwise false. + */ + virtual bool + BatchPredict(const std::vector &images, + std::vector>> *det_results); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] ocr_results The output + * of OCR detection model result will be writen to this structure. \return + * true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *ocr_results); + + /// Get preprocessor reference of DBCURVEDetectorPreprocessor + virtual DBDetectorPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of DBCURVEDetectorPostprocessor + virtual DBCURVEDetectorPostprocessor &GetPostprocessor() { + return postprocessor_; + } + +private: + bool Initialize(); + DBDetectorPreprocessor preprocessor_; + DBCURVEDetectorPostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc new file mode 100755 index 0000000000..3082c55942 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/dbdetector.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +DBDetector::DBDetector() {} +DBDetector::DBDetector(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +// Init +bool DBDetector::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +std::unique_ptr DBDetector::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(DBDetector(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool DBDetector::Predict(const cv::Mat &img, + std::vector> *boxes_result) { + std::vector>> det_results; + if (!BatchPredict({img}, &det_results)) { + return false; + } + *boxes_result = std::move(det_results[0]); + return true; +} + +bool DBDetector::Predict(const cv::Mat &img, vision::OCRResult *ocr_result) { + if (!Predict(img, &(ocr_result->boxes))) { + return false; + } + return true; +} + +bool DBDetector::BatchPredict(const std::vector &images, + std::vector *ocr_results) { + std::vector>> det_results; + if (!BatchPredict(images, &det_results)) { + return false; + } + ocr_results->resize(det_results.size()); + for (int i = 0; i < det_results.size(); i++) { + (*ocr_results)[i].boxes = std::move(det_results[i]); + } + return true; +} + +bool DBDetector::BatchPredict( + const std::vector &images, + std::vector>> *det_results) { + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + auto batch_det_img_info = preprocessor_.GetBatchImgInfo(); + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, det_results, + *batch_det_img_info)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h new file mode 100755 index 0000000000..8f69f8f717 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h @@ -0,0 +1,115 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/det_postprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/det_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { +/** \brief All OCR series model APIs are defined inside this namespace + * + */ +namespace ocr { + +/*! @brief DBDetector object is used to load the detection model provided by + * PaddleOCR. + */ +class ULTRAINFER_DECL DBDetector : public UltraInferModel { +public: + DBDetector(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ./ch_PP-OCRv3_det_infer/model.pdmodel. \param[in] params_file Path of + * parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model + * format is ONNX, this parameter will be ignored. \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends`. \param[in] model_format Model + * format of the loaded model, default is Paddle format. + */ + DBDetector(const std::string &model_file, const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new DBDetector with less memory usage when multiple + * instances of the same model are created + * + * \return new DBDetector* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "ppocr/ocr_det"; } + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] boxes_result The output of + * OCR detection model result will be writen to this structure. \return true + * if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, + std::vector> *boxes_result); + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR + * detection model result will be writen to this structure. \return true if + * the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] det_results The output + * of OCR detection model result will be writen to this structure. \return + * true if the prediction is successed, otherwise false. + */ + virtual bool + BatchPredict(const std::vector &images, + std::vector>> *det_results); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] ocr_results The output + * of OCR detection model result will be writen to this structure. \return + * true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *ocr_results); + + /// Get preprocessor reference of DBDetectorPreprocessor + virtual DBDetectorPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of DBDetectorPostprocessor + virtual DBDetectorPostprocessor &GetPostprocessor() { return postprocessor_; } + +private: + bool Initialize(); + DBDetectorPreprocessor preprocessor_; + DBDetectorPostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc new file mode 100755 index 0000000000..18a87b8172 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc @@ -0,0 +1,98 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/det_postprocessor.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +bool DBDetectorPostprocessor::SingleBatchPostprocessor( + const float *out_data, int n2, int n3, + const std::array &det_img_info, + std::vector> *boxes_result) { + int n = n2 * n3; + + // prepare bitmap + std::vector pred(n, 0.0); + std::vector cbuf(n, ' '); + + for (int i = 0; i < n; i++) { + pred[i] = float(out_data[i]); + cbuf[i] = (unsigned char)((out_data[i]) * 255); + } + cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char *)cbuf.data()); + cv::Mat pred_map(n2, n3, CV_32F, (float *)pred.data()); + + const double threshold = det_db_thresh_ * 255; + const double maxvalue = 255; + cv::Mat bit_map; + cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY); + if (use_dilation_) { + cv::Mat dila_ele = + cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); + cv::dilate(bit_map, bit_map, dila_ele); + } + + std::vector>> boxes; + + boxes = util_post_processor_.BoxesFromBitmap( + pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_, + det_db_score_mode_); + + boxes = util_post_processor_.FilterTagDetRes(boxes, det_img_info); + + // boxes to boxes_result + for (int i = 0; i < boxes.size(); i++) { + std::array new_box; + int k = 0; + for (auto &vec : boxes[i]) { + for (auto &e : vec) { + new_box[k++] = e; + } + } + boxes_result->emplace_back(new_box); + } + + return true; +} + +bool DBDetectorPostprocessor::Run( + const std::vector &tensors, + std::vector>> *results, + const std::vector> &batch_det_img_info) { + // DBDetector have only 1 output tensor. + const FDTensor &tensor = tensors[0]; + + // For DBDetector, the output tensor shape = [batch, 1, ?, ?] + size_t batch = tensor.shape[0]; + size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1, + std::multiplies()); + const float *tensor_data = reinterpret_cast(tensor.Data()); + + results->resize(batch); + for (int i_batch = 0; i_batch < batch; ++i_batch) { + SingleBatchPostprocessor(tensor_data, tensor.shape[2], tensor.shape[3], + batch_det_img_info[i_batch], + &results->at(i_batch)); + tensor_data = tensor_data + length; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h new file mode 100755 index 0000000000..6583a51bad --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Postprocessor object for DBDetector serials model. + */ +class ULTRAINFER_DECL DBDetectorPostprocessor { +public: + /** \brief Process the result of runtime and fill to results structure + * + * \param[in] tensors The inference result from runtime + * \param[in] results The output result of detector + * \param[in] batch_det_img_info The detector_preprocess result + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector>> *results, + const std::vector> &batch_det_img_info); + + /// Set det_db_thresh for the detection postprocess, default is 0.3 + void SetDetDBThresh(double det_db_thresh) { det_db_thresh_ = det_db_thresh; } + /// Get det_db_thresh of the detection postprocess + double GetDetDBThresh() const { return det_db_thresh_; } + + /// Set det_db_box_thresh for the detection postprocess, default is 0.6 + void SetDetDBBoxThresh(double det_db_box_thresh) { + det_db_box_thresh_ = det_db_box_thresh; + } + /// Get det_db_box_thresh of the detection postprocess + double GetDetDBBoxThresh() const { return det_db_box_thresh_; } + + /// Set det_db_unclip_ratio for the detection postprocess, default is 1.5 + void SetDetDBUnclipRatio(double det_db_unclip_ratio) { + det_db_unclip_ratio_ = det_db_unclip_ratio; + } + /// Get det_db_unclip_ratio_ of the detection postprocess + double GetDetDBUnclipRatio() const { return det_db_unclip_ratio_; } + + /// Set det_db_score_mode for the detection postprocess, default is 'slow' + void SetDetDBScoreMode(const std::string &det_db_score_mode) { + det_db_score_mode_ = det_db_score_mode; + } + /// Get det_db_score_mode_ of the detection postprocess + std::string GetDetDBScoreMode() const { return det_db_score_mode_; } + + /// Set use_dilation for the detection postprocess, default is fasle + void SetUseDilation(int use_dilation) { use_dilation_ = use_dilation; } + /// Get use_dilation of the detection postprocess + int GetUseDilation() const { return use_dilation_; } + +private: + double det_db_thresh_ = 0.3; + double det_db_box_thresh_ = 0.6; + double det_db_unclip_ratio_ = 1.5; + std::string det_db_score_mode_ = "slow"; + bool use_dilation_ = false; + PostProcessor util_post_processor_; + bool SingleBatchPostprocessor(const float *out_data, int n2, int n3, + const std::array &det_img_info, + std::vector> *boxes_result); +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc new file mode 100755 index 0000000000..ab06c59a08 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +bool DBCURVEDetectorPostprocessor::SingleBatchPostprocessor( + const float *out_data, int n2, int n3, + const std::array &det_img_info, + std::vector> *boxes_result) { + int n = n2 * n3; + + // prepare bitmap + std::vector pred(n, 0.0); + std::vector cbuf(n, ' '); + + for (int i = 0; i < n; i++) { + pred[i] = float(out_data[i]); + cbuf[i] = (unsigned char)((out_data[i]) * 255); + } + cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char *)cbuf.data()); + cv::Mat pred_map(n2, n3, CV_32F, (float *)pred.data()); + + const double threshold = det_db_thresh_ * 255; + const double maxvalue = 255; + cv::Mat bit_map; + cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY); + if (use_dilation_) { + cv::Mat dila_ele = + cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); + cv::dilate(bit_map, bit_map, dila_ele); + } + + std::vector>> boxes; + + if (det_db_box_type_ == "bbox") { + boxes = util_post_processor_.BoxesFromBitmap( + pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_, + det_db_score_mode_); + boxes = util_post_processor_.FilterTagDetRes(boxes, det_img_info); + } else { + boxes = util_post_processor_.PolygonFromBitmap( + pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_, + det_db_score_mode_); + boxes = util_post_processor_.FilterCURVETagDetRes(boxes, det_img_info); + } + + // boxes to boxes_result + for (int i = 0; i < boxes.size(); i++) { + std::vector new_box; + for (auto &vec : boxes[i]) { + for (auto &e : vec) { + new_box.push_back(e); + } + } + boxes_result->emplace_back(new_box); + } + + return true; +} + +bool DBCURVEDetectorPostprocessor::Run( + const std::vector &tensors, + std::vector>> *results, + const std::vector> &batch_det_img_info) { + // DBCURVEDetector have only 1 output tensor. + const FDTensor &tensor = tensors[0]; + + // For DBCURVEDetector, the output tensor shape = [batch, 1, ?, ?] + size_t batch = tensor.shape[0]; + size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1, + std::multiplies()); + const float *tensor_data = reinterpret_cast(tensor.Data()); + + results->resize(batch); + for (int i_batch = 0; i_batch < batch; ++i_batch) { + SingleBatchPostprocessor(tensor_data, tensor.shape[2], tensor.shape[3], + batch_det_img_info[i_batch], + &results->at(i_batch)); + tensor_data = tensor_data + length; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h new file mode 100755 index 0000000000..bd4e29b471 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Postprocessor object for DBCURVEDetector serials model. + */ +class ULTRAINFER_DECL DBCURVEDetectorPostprocessor { +public: + /** \brief Process the result of runtime and fill to results structure + * + * \param[in] tensors The inference result from runtime + * \param[in] results The output result of detector + * \param[in] batch_det_img_info The detector_preprocess result + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector>> *results, + const std::vector> &batch_det_img_info); + + /// Set det_db_thresh for the detection postprocess, default is 0.3 + void SetDetDBThresh(double det_db_thresh) { det_db_thresh_ = det_db_thresh; } + /// Get det_db_thresh of the detection postprocess + double GetDetDBThresh() const { return det_db_thresh_; } + + /// Set det_db_box_thresh for the detection postprocess, default is 0.6 + void SetDetDBBoxThresh(double det_db_box_thresh) { + det_db_box_thresh_ = det_db_box_thresh; + } + /// Get det_db_box_thresh of the detection postprocess + double GetDetDBBoxThresh() const { return det_db_box_thresh_; } + + /// Set det_db_unclip_ratio for the detection postprocess, default is 1.5 + void SetDetDBUnclipRatio(double det_db_unclip_ratio) { + det_db_unclip_ratio_ = det_db_unclip_ratio; + } + /// Get det_db_unclip_ratio_ of the detection postprocess + double GetDetDBUnclipRatio() const { return det_db_unclip_ratio_; } + + void SetDetDBScoreMode(const std::string &det_db_score_mode) { + det_db_score_mode_ = det_db_score_mode; + } + + void SetDetDBBoxType(const std::string &det_db_box_type) { + det_db_box_type_ = det_db_box_type; + } + std::string GetDetDBScoreMode() const { return det_db_score_mode_; } + + std::string GetDetDBBoxType() const { return det_db_box_type_; } + + /// Set use_dilation for the detection postprocess, default is fasle + void SetUseDilation(int use_dilation) { use_dilation_ = use_dilation; } + /// Get use_dilation of the detection postprocess + int GetUseDilation() const { return use_dilation_; } + +private: + double det_db_thresh_ = 0.3; + double det_db_box_thresh_ = 0.6; + double det_db_unclip_ratio_ = 1.5; + std::string det_db_box_type_ = "bbox"; + std::string det_db_score_mode_ = "slow"; + bool use_dilation_ = false; + PostProcessor util_post_processor_; + bool SingleBatchPostprocessor(const float *out_data, int n2, int n3, + const std::array &det_img_info, + std::vector> *boxes_result); +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc new file mode 100755 index 0000000000..e41c2eff18 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/det_preprocessor.h" + +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +std::array +DBDetectorPreprocessor::OcrDetectorGetInfo(FDMat *img, int max_size_len) { + int w = img->Width(); + int h = img->Height(); + if (static_shape_infer_) { + return {w, h, det_image_shape_[2], det_image_shape_[1]}; + } + + float ratio = 1.f; + int max_wh = w >= h ? w : h; + if (max_wh > max_size_len) { + if (h > w) { + ratio = float(max_size_len) / float(h); + } else { + ratio = float(max_size_len) / float(w); + } + } + int resize_h = int(float(h) * ratio); + int resize_w = int(float(w) * ratio); + resize_h = std::max(int(std::round(float(resize_h) / 32) * 32), 32); + resize_w = std::max(int(std::round(float(resize_w) / 32) * 32), 32); + + return {w, h, resize_w, resize_h}; + /* + *ratio_h = float(resize_h) / float(h); + *ratio_w = float(resize_w) / float(w); + */ +} + +DBDetectorPreprocessor::DBDetectorPreprocessor() { + resize_op_ = std::make_shared(-1, -1); + + std::vector value = {0, 0, 0}; + pad_op_ = std::make_shared(0, 0, 0, 0, value); + + normalize_permute_op_ = std::make_shared( + std::vector({0.485f, 0.456f, 0.406f}), + std::vector({0.229f, 0.224f, 0.225f}), true); +} + +bool DBDetectorPreprocessor::ResizeImage(FDMat *img, int resize_w, int resize_h, + int max_resize_w, int max_resize_h) { + resize_op_->SetWidthAndHeight(resize_w, resize_h); + (*resize_op_)(img); + + pad_op_->SetPaddingSize(0, max_resize_h - resize_h, 0, + max_resize_w - resize_w); + (*pad_op_)(img); + return true; +} + +bool DBDetectorPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + int max_resize_w = 0; + int max_resize_h = 0; + batch_det_img_info_.clear(); + batch_det_img_info_.resize(image_batch->mats->size()); + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + batch_det_img_info_[i] = OcrDetectorGetInfo(mat, max_side_len_); + max_resize_w = std::max(max_resize_w, batch_det_img_info_[i][2]); + max_resize_h = std::max(max_resize_h, batch_det_img_info_[i][3]); + } + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3], + max_resize_w, max_resize_h); + } + + if (!disable_normalize_ && !disable_permute_) { + (*normalize_permute_op_)(image_batch); + } + + outputs->resize(1); + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h new file mode 100755 index 0000000000..f2b419232b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/normalize_and_permute.h" +#include "ultrainfer/vision/common/processors/pad.h" +#include "ultrainfer/vision/common/processors/resize.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Preprocessor object for DBDetector serials model. + */ +class ULTRAINFER_DECL DBDetectorPreprocessor : public ProcessorManager { +public: + DBDetectorPreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// Set max_side_len for the detection preprocess, default is 960 + void SetMaxSideLen(int max_side_len) { max_side_len_ = max_side_len; } + + /// Get max_side_len of the detection preprocess + int GetMaxSideLen() const { return max_side_len_; } + + /// Set preprocess normalize parameters, please call this API to customize + /// the normalize parameters, otherwise it will use the default normalize + /// parameters. + void SetNormalize(const std::vector &mean, + const std::vector &std, bool is_scale) { + normalize_permute_op_ = + std::make_shared(mean, std, is_scale); + } + + /// Get the image info of the last batch, return a list of array + /// {image width, image height, resize width, resize height} + const std::vector> *GetBatchImgInfo() { + return &batch_det_img_info_; + } + + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + + /// Set det_image_shape for the detection preprocess. + /// This api is usually used when you retrain the model. + /// Generally, you do not need to use it. + void SetDetImageShape(const std::vector &det_image_shape) { + det_image_shape_ = det_image_shape; + } + /// Get cls_image_shape for the classification preprocess + std::vector GetDetImageShape() const { return det_image_shape_; } + + /// Set static_shape_infer is true or not. When deploy PP-OCR + /// on hardware which can not support dynamic input shape very well, + /// like Huawei Ascned, static_shape_infer needs to to be true. + void SetStaticShapeInfer(bool static_shape_infer) { + static_shape_infer_ = static_shape_infer; + } + /// Get static_shape_infer of the recognition preprocess + bool GetStaticShapeInfer() const { return static_shape_infer_; } + +private: + bool ResizeImage(FDMat *img, int resize_w, int resize_h, int max_resize_w, + int max_resize_h); + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + int max_side_len_ = 960; + std::vector> batch_det_img_info_; + std::shared_ptr resize_op_; + std::shared_ptr pad_op_; + std::shared_ptr normalize_permute_op_; + std::vector det_image_shape_ = {3, 960, 960}; + bool static_shape_infer_ = false; + std::array OcrDetectorGetInfo(FDMat *img, int max_size_len); +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc new file mode 100755 index 0000000000..11cac3366f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc @@ -0,0 +1,748 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPOCRModel(pybind11::module &m) { + m.def("sort_boxes", [](std::vector> &boxes) { + vision::ocr::SortBoxes(&boxes); + return boxes; + }); + + // UVDoc + pybind11::class_( + m, "UVDocPreprocessor") + .def(pybind11::init<>()) + .def("set_normalize", + [](vision::ocr::UVDocPreprocessor &self, + const std::vector &mean, const std::vector &std, + bool is_scale) { self.SetNormalize(mean, std, is_scale); }) + .def("run", + [](vision::ocr::UVDocPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "UVDocPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def( + "disable_normalize", + [](vision::ocr::UVDocPreprocessor &self) { self.DisableNormalize(); }) + .def("disable_permute", + [](vision::ocr::UVDocPreprocessor &self) { self.DisablePermute(); }); + + pybind11::class_(m, "UVDocPostprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::ocr::UVDocPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error("Failed to preprocess the input data in " + "UVDocPostprocessor."); + } + for (size_t i = 0; i < results.size(); ++i) { + results[i].StopSharing(); + } + return results; + }); + + pybind11::class_(m, + "UVDocWarpper") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::UVDocWarpper::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::UVDocWarpper::GetPostprocessor) + .def("clone", + [](vision::ocr::UVDocWarpper &self) { return self.Clone(); }) + .def("predict", + [](vision::ocr::UVDocWarpper &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + FDTensor res; + self.Predict(mat, &res); + res.StopSharing(); + return res; + }) + .def("batch_predict", [](vision::ocr::UVDocWarpper &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + for (size_t i = 0; i < results.size(); ++i) { + results[i].StopSharing(); + } + return results; + // std::vector results; + // self.BatchPredict(images, &results); + // std::vector> ret; + // for(size_t i = 0; i < results.size(); ++i){ + // ret.push_back(pybind11::array_t( + // {results[i].rows, results[i].cols, results[i].channels()}, + // results[i].data)); + // } + // return ret; + }); + + // DBDetector + pybind11::class_(m, "DBDetectorPreprocessor") + .def(pybind11::init<>()) + .def_property("static_shape_infer", + &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer, + &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer) + .def_property("max_side_len", + &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, + &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen) + .def("set_normalize", + [](vision::ocr::DBDetectorPreprocessor &self, + const std::vector &mean, const std::vector &std, + bool is_scale) { self.SetNormalize(mean, std, is_scale); }) + .def("run", + [](vision::ocr::DBDetectorPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + self.Run(&images, &outputs); + auto batch_det_img_info = self.GetBatchImgInfo(); + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return std::make_pair(outputs, *batch_det_img_info); + }) + .def("disable_normalize", + [](vision::ocr::DBDetectorPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor &self) { + self.DisablePermute(); + }); + + pybind11::class_( + m, "DBDetectorPostprocessor") + .def(pybind11::init<>()) + .def_property("det_db_thresh", + &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh, + &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh) + .def_property("det_db_box_thresh", + &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh, + &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh) + .def_property("det_db_unclip_ratio", + &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio, + &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio) + .def_property("det_db_score_mode", + &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode, + &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode) + .def_property("use_dilation", + &vision::ocr::DBDetectorPostprocessor::GetUseDilation, + &vision::ocr::DBDetectorPostprocessor::SetUseDilation) + + .def("run", + [](vision::ocr::DBDetectorPostprocessor &self, + std::vector &inputs, + const std::vector> &batch_det_img_info) { + std::vector>> results; + + if (!self.Run(inputs, &results, batch_det_img_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "DBDetectorPostprocessor."); + } + return results; + }) + .def( + "run", [](vision::ocr::DBDetectorPostprocessor &self, + std::vector &input_array, + const std::vector> &batch_det_img_info) { + std::vector>> results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, batch_det_img_info)) { + throw std::runtime_error("Failed to preprocess the input data in " + "DBDetectorPostprocessor."); + } + return results; + }); + + pybind11::class_( + m, "DBCURVEDetectorPostprocessor") + .def(pybind11::init<>()) + .def_property("det_db_thresh", + &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBThresh, + &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBThresh) + .def_property( + "det_db_box_thresh", + &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBBoxThresh, + &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBBoxThresh) + .def_property( + "det_db_unclip_ratio", + &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBUnclipRatio, + &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBUnclipRatio) + .def_property( + "det_db_score_mode", + &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBScoreMode, + &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBScoreMode) + .def_property("det_db_box_type", + &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBBoxType, + &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBBoxType) + .def_property("use_dilation", + &vision::ocr::DBCURVEDetectorPostprocessor::GetUseDilation, + &vision::ocr::DBCURVEDetectorPostprocessor::SetUseDilation) + + .def("run", + [](vision::ocr::DBCURVEDetectorPostprocessor &self, + std::vector &inputs, + const std::vector> &batch_det_img_info) { + std::vector>> results; + + if (!self.Run(inputs, &results, batch_det_img_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "DBCURVEDetectorPostprocessor."); + } + return results; + }) + .def( + "run", [](vision::ocr::DBCURVEDetectorPostprocessor &self, + std::vector &input_array, + const std::vector> &batch_det_img_info) { + std::vector>> results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, batch_det_img_info)) { + throw std::runtime_error("Failed to preprocess the input data in " + "DBCURVEDetectorPostprocessor."); + } + return results; + }); + + pybind11::class_(m, "DBDetector") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::DBDetector::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::DBDetector::GetPostprocessor) + .def("predict", + [](vision::ocr::DBDetector &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; + }) + .def("batch_predict", [](vision::ocr::DBDetector &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector ocr_results; + self.BatchPredict(images, &ocr_results); + return ocr_results; + }); + + pybind11::class_( + m, "DBCURVEDetector") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::DBCURVEDetector::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::DBCURVEDetector::GetPostprocessor) + .def("predict", + [](vision::ocr::DBCURVEDetector &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRCURVEResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; + }) + .def("batch_predict", [](vision::ocr::DBCURVEDetector &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector ocr_results; + self.BatchPredict(images, &ocr_results); + return ocr_results; + }); + + // Classifier + pybind11::class_(m, "ClassifierPreprocessor") + .def(pybind11::init<>()) + .def_property("cls_image_shape", + &vision::ocr::ClassifierPreprocessor::GetClsImageShape, + &vision::ocr::ClassifierPreprocessor::SetClsImageShape) + .def("set_normalize", + [](vision::ocr::ClassifierPreprocessor &self, + const std::vector &mean, const std::vector &std, + bool is_scale) { self.SetNormalize(mean, std, is_scale); }) + .def("run", + [](vision::ocr::ClassifierPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "ClassifierPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def("disable_normalize", + [](vision::ocr::ClassifierPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::ClassifierPreprocessor &self) { + self.DisablePermute(); + }); + + pybind11::class_( + m, "ClassifierPostprocessor") + .def(pybind11::init<>()) + .def_property("cls_thresh", + &vision::ocr::ClassifierPostprocessor::GetClsThresh, + &vision::ocr::ClassifierPostprocessor::SetClsThresh) + .def("run", + [](vision::ocr::ClassifierPostprocessor &self, + std::vector &inputs) { + std::vector cls_labels; + std::vector cls_scores; + if (!self.Run(inputs, &cls_labels, &cls_scores)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "ClassifierPostprocessor."); + } + return std::make_pair(cls_labels, cls_scores); + }) + .def("run", [](vision::ocr::ClassifierPostprocessor &self, + std::vector &input_array) { + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + std::vector cls_labels; + std::vector cls_scores; + if (!self.Run(inputs, &cls_labels, &cls_scores)) { + throw std::runtime_error("Failed to preprocess the input data in " + "ClassifierPostprocessor."); + } + return std::make_pair(cls_labels, cls_scores); + }); + + pybind11::class_(m, "Classifier") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::Classifier::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::Classifier::GetPostprocessor) + .def("predict", + [](vision::ocr::Classifier &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; + }) + .def("batch_predict", [](vision::ocr::Classifier &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + vision::OCRResult ocr_result; + self.BatchPredict(images, &ocr_result); + return ocr_result; + }); + + // Recognizer + pybind11::class_(m, "RecognizerPreprocessor") + .def(pybind11::init<>()) + .def_property("static_shape_infer", + &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer, + &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer) + .def_property("rec_image_shape", + &vision::ocr::RecognizerPreprocessor::GetRecImageShape, + &vision::ocr::RecognizerPreprocessor::SetRecImageShape) + .def("set_normalize", + [](vision::ocr::RecognizerPreprocessor &self, + const std::vector &mean, const std::vector &std, + bool is_scale) { self.SetNormalize(mean, std, is_scale); }) + .def("run", + [](vision::ocr::RecognizerPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "RecognizerPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def("disable_normalize", + [](vision::ocr::RecognizerPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::RecognizerPreprocessor &self) { + self.DisablePermute(); + }); + + pybind11::class_( + m, "RecognizerPostprocessor") + .def(pybind11::init()) + .def("run", + [](vision::ocr::RecognizerPostprocessor &self, + std::vector &inputs) { + std::vector texts; + std::vector rec_scores; + if (!self.Run(inputs, &texts, &rec_scores)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "RecognizerPostprocessor."); + } + return std::make_pair(texts, rec_scores); + }) + .def("run", [](vision::ocr::RecognizerPostprocessor &self, + std::vector &input_array) { + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + std::vector texts; + std::vector rec_scores; + if (!self.Run(inputs, &texts, &rec_scores)) { + throw std::runtime_error("Failed to preprocess the input data in " + "RecognizerPostprocessor."); + } + return std::make_pair(texts, rec_scores); + }); + + pybind11::class_(m, "Recognizer") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::Recognizer::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::Recognizer::GetPostprocessor) + .def("clone", [](vision::ocr::Recognizer &self) { return self.Clone(); }) + .def("predict", + [](vision::ocr::Recognizer &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; + }) + .def("batch_predict", [](vision::ocr::Recognizer &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + vision::OCRResult ocr_result; + self.BatchPredict(images, &ocr_result); + return ocr_result; + }); + + // Table + pybind11::class_(m, "StructureV2TablePreprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::ocr::StructureV2TablePreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error("Failed to preprocess the input data in " + "StructureV2TablePreprocessor."); + } + + auto batch_det_img_info = self.GetBatchImgInfo(); + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + + return std::make_pair(outputs, *batch_det_img_info); + }); + + pybind11::class_( + m, "StructureV2TablePostprocessor") + .def(pybind11::init()) + .def("run", + [](vision::ocr::StructureV2TablePostprocessor &self, + std::vector &inputs, + const std::vector> &batch_det_img_info) { + std::vector>> boxes; + std::vector> structure_list; + + if (!self.Run(inputs, &boxes, &structure_list, + batch_det_img_info)) { + throw std::runtime_error( + "Failed to postprocess the input data in " + "StructureV2TablePostprocessor."); + } + return std::make_pair(boxes, structure_list); + }) + .def("run", + [](vision::ocr::StructureV2TablePostprocessor &self, + std::vector &input_array, + const std::vector> &batch_det_img_info) { + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + std::vector>> boxes; + std::vector> structure_list; + + if (!self.Run(inputs, &boxes, &structure_list, + batch_det_img_info)) { + throw std::runtime_error( + "Failed to postprocess the input data in " + "StructureV2TablePostprocessor."); + } + return std::make_pair(boxes, structure_list); + }); + + pybind11::class_( + m, "StructureV2Table") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::StructureV2Table::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::StructureV2Table::GetPostprocessor) + .def("clone", + [](vision::ocr::StructureV2Table &self) { return self.Clone(); }) + .def("predict", + [](vision::ocr::StructureV2Table &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; + }) + .def("batch_predict", [](vision::ocr::StructureV2Table &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + + std::vector ocr_results; + self.BatchPredict(images, &ocr_results); + return ocr_results; + }); + + // Layout + pybind11::class_(m, "StructureV2LayoutPreprocessor") + .def(pybind11::init<>()) + .def_property( + "static_shape_infer", + &vision::ocr::StructureV2LayoutPreprocessor::GetStaticShapeInfer, + &vision::ocr::StructureV2LayoutPreprocessor::SetStaticShapeInfer) + .def_property( + "layout_image_shape", + &vision::ocr::StructureV2LayoutPreprocessor::GetLayoutImageShape, + &vision::ocr::StructureV2LayoutPreprocessor::SetLayoutImageShape) + .def("set_normalize", + [](vision::ocr::StructureV2LayoutPreprocessor &self, + const std::vector &mean, const std::vector &std, + bool is_scale) { self.SetNormalize(mean, std, is_scale); }) + .def("run", + [](vision::ocr::StructureV2LayoutPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "StructureV2LayoutPreprocessor."); + } + + auto batch_layout_img_info = self.GetBatchLayoutImgInfo(); + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + + return std::make_pair(outputs, *batch_layout_img_info); + }) + .def("disable_normalize", + [](vision::ocr::StructureV2LayoutPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", + [](vision::ocr::StructureV2LayoutPreprocessor &self) { + self.DisablePermute(); + }); + + pybind11::class_( + m, "StructureV2LayoutPostprocessor") + .def(pybind11::init<>()) + .def_property( + "score_threshold", + &vision::ocr::StructureV2LayoutPostprocessor::GetScoreThreshold, + &vision::ocr::StructureV2LayoutPostprocessor::SetScoreThreshold) + .def_property( + "nms_threshold", + &vision::ocr::StructureV2LayoutPostprocessor::GetNMSThreshold, + &vision::ocr::StructureV2LayoutPostprocessor::SetNMSThreshold) + .def_property("num_class", + &vision::ocr::StructureV2LayoutPostprocessor::GetNumClass, + &vision::ocr::StructureV2LayoutPostprocessor::SetNumClass) + .def_property("fpn_stride", + &vision::ocr::StructureV2LayoutPostprocessor::GetFPNStride, + &vision::ocr::StructureV2LayoutPostprocessor::SetFPNStride) + .def_property("reg_max", + &vision::ocr::StructureV2LayoutPostprocessor::GetRegMax, + &vision::ocr::StructureV2LayoutPostprocessor::SetRegMax) + .def("run", + [](vision::ocr::StructureV2LayoutPostprocessor &self, + std::vector &inputs, + const std::vector> &batch_layout_img_info) { + std::vector results; + + if (!self.Run(inputs, &results, batch_layout_img_info)) { + throw std::runtime_error( + "Failed to postprocess the input data in " + "StructureV2LayoutPostprocessor."); + } + return results; + }); + + pybind11::class_( + m, "StructureV2Layout") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::StructureV2Layout::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::StructureV2Layout::GetPostprocessor) + .def("clone", + [](vision::ocr::StructureV2Layout &self) { return self.Clone(); }) + .def("predict", + [](vision::ocr::StructureV2Layout &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult result; + self.Predict(mat, &result); + return result; + }) + .def("batch_predict", [](vision::ocr::StructureV2Layout &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); + + pybind11::class_(m, "StructureV2SERViLayoutXLMModel") + .def(pybind11::init()) + .def("clone", + [](vision::ocr::StructureV2SERViLayoutXLMModel &self) { + return self.Clone(); + }) + .def("predict", + [](vision::ocr::StructureV2SERViLayoutXLMModel &self, + pybind11::array &data) { + throw std::runtime_error( + "StructureV2SERViLayoutXLMModel do not support predict."); + }) + .def( + "batch_predict", + [](vision::ocr::StructureV2SERViLayoutXLMModel &self, + std::vector &data) { + throw std::runtime_error( + "StructureV2SERViLayoutXLMModel do not support batch_predict."); + }) + .def("infer", + [](vision::ocr::StructureV2SERViLayoutXLMModel &self, + std::map &data) { + std::vector inputs(data.size()); + int index = 0; + for (auto iter = data.begin(); iter != data.end(); ++iter) { + std::vector data_shape; + data_shape.insert(data_shape.begin(), iter->second.shape(), + iter->second.shape() + iter->second.ndim()); + auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype()); + + inputs[index].Resize(data_shape, dtype); + memcpy(inputs[index].MutableData(), iter->second.mutable_data(), + iter->second.nbytes()); + inputs[index].name = iter->first; + index += 1; + } + + std::vector outputs(self.NumOutputsOfRuntime()); + self.Infer(inputs, &outputs); + + std::vector results; + results.reserve(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype); + results.emplace_back( + pybind11::array(numpy_dtype, outputs[i].shape)); + memcpy(results[i].mutable_data(), outputs[i].Data(), + outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype)); + } + return results; + }) + .def("get_input_info", + [](vision::ocr::StructureV2SERViLayoutXLMModel &self, int &index) { + return self.InputInfoOfRuntime(index); + }); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc new file mode 100755 index 0000000000..f260decc2c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc @@ -0,0 +1,147 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPOCRv4(pybind11::module &m) { + // PPOCRv4 + pybind11::class_(m, "PPOCRv4") + + .def(pybind11::init()) + .def(pybind11::init()) + .def_property("cls_batch_size", &pipeline::PPOCRv4::GetClsBatchSize, + &pipeline::PPOCRv4::SetClsBatchSize) + .def_property("rec_batch_size", &pipeline::PPOCRv4::GetRecBatchSize, + &pipeline::PPOCRv4::SetRecBatchSize) + .def("clone", [](pipeline::PPOCRv4 &self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPOCRv4 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](pipeline::PPOCRv4 &self, std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); +} +void BindPPOCRv3(pybind11::module &m) { + // PPOCRv3 + pybind11::class_(m, "PPOCRv3") + + .def(pybind11::init()) + .def(pybind11::init()) + .def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize, + &pipeline::PPOCRv3::SetClsBatchSize) + .def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize, + &pipeline::PPOCRv3::SetRecBatchSize) + .def("clone", [](pipeline::PPOCRv3 &self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPOCRv3 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](pipeline::PPOCRv3 &self, std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); +} + +void BindPPOCRv2(pybind11::module &m) { + // PPOCRv2 + pybind11::class_(m, "PPOCRv2") + .def(pybind11::init()) + .def(pybind11::init()) + .def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize, + &pipeline::PPOCRv2::SetClsBatchSize) + .def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize, + &pipeline::PPOCRv2::SetRecBatchSize) + .def("clone", [](pipeline::PPOCRv2 &self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPOCRv2 &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](pipeline::PPOCRv2 &self, std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); +} + +void BindPPStructureV2Table(pybind11::module &m) { + // PPStructureV2Table + pybind11::class_( + m, "PPStructureV2Table") + .def(pybind11::init()) + .def_property("rec_batch_size", + &pipeline::PPStructureV2Table::GetRecBatchSize, + &pipeline::PPStructureV2Table::SetRecBatchSize) + .def("clone", + [](pipeline::PPStructureV2Table &self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPStructureV2Table &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", [](pipeline::PPStructureV2Table &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); +} + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc new file mode 100755 index 0000000000..cd49d1075b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc @@ -0,0 +1,186 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/ppocr_v2.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace pipeline { +PPOCRv2::PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Classifier *cls_model, + ultrainfer::vision::ocr::Recognizer *rec_model) + : detector_(det_model), classifier_(cls_model), recognizer_(rec_model) { + Initialized(); + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 32; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); +} + +PPOCRv2::PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Recognizer *rec_model) + : detector_(det_model), recognizer_(rec_model) { + Initialized(); + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 32; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); +} + +bool PPOCRv2::SetClsBatchSize(int cls_batch_size) { + if (cls_batch_size < -1 || cls_batch_size == 0) { + FDERROR << "batch_size > 0 or batch_size == -1." << std::endl; + return false; + } + cls_batch_size_ = cls_batch_size; + return true; +} + +int PPOCRv2::GetClsBatchSize() { return cls_batch_size_; } + +bool PPOCRv2::SetRecBatchSize(int rec_batch_size) { + if (rec_batch_size < -1 || rec_batch_size == 0) { + FDERROR << "batch_size > 0 or batch_size == -1." << std::endl; + return false; + } + rec_batch_size_ = rec_batch_size; + return true; +} + +int PPOCRv2::GetRecBatchSize() { return rec_batch_size_; } + +bool PPOCRv2::Initialized() const { + + if (detector_ != nullptr && !detector_->Initialized()) { + return false; + } + + if (classifier_ != nullptr && !classifier_->Initialized()) { + return false; + } + + if (recognizer_ != nullptr && !recognizer_->Initialized()) { + return false; + } + return true; +} + +std::unique_ptr PPOCRv2::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PPOCRv2(*this)); + clone_model->detector_ = detector_->Clone().release(); + if (classifier_ != nullptr) { + clone_model->classifier_ = classifier_->Clone().release(); + } + clone_model->recognizer_ = recognizer_->Clone().release(); + return clone_model; +} + +bool PPOCRv2::Predict(cv::Mat *img, ultrainfer::vision::OCRResult *result) { + return Predict(*img, result); +} + +bool PPOCRv2::Predict(const cv::Mat &img, + ultrainfer::vision::OCRResult *result) { + std::vector batch_result(1); + bool success = BatchPredict({img}, &batch_result); + if (!success) { + return success; + } + *result = std::move(batch_result[0]); + return true; +}; + +bool PPOCRv2::BatchPredict( + const std::vector &images, + std::vector *batch_result) { + batch_result->clear(); + batch_result->resize(images.size()); + std::vector>> batch_boxes(images.size()); + + if (!detector_->BatchPredict(images, &batch_boxes)) { + FDERROR << "There's error while detecting image in PPOCR." << std::endl; + return false; + } + + for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) { + vision::ocr::SortBoxes(&(batch_boxes[i_batch])); + (*batch_result)[i_batch].boxes = batch_boxes[i_batch]; + } + + for (int i_batch = 0; i_batch < images.size(); ++i_batch) { + ultrainfer::vision::OCRResult &ocr_result = (*batch_result)[i_batch]; + // Get croped images by detection result + const std::vector> &boxes = ocr_result.boxes; + const cv::Mat &img = images[i_batch]; + std::vector image_list; + if (boxes.size() == 0) { + image_list.emplace_back(img); + } else { + image_list.resize(boxes.size()); + for (size_t i_box = 0; i_box < boxes.size(); ++i_box) { + image_list[i_box] = vision::ocr::GetRotateCropImage(img, boxes[i_box]); + } + } + std::vector *cls_labels_ptr = &ocr_result.cls_labels; + std::vector *cls_scores_ptr = &ocr_result.cls_scores; + + std::vector *text_ptr = &ocr_result.text; + std::vector *rec_scores_ptr = &ocr_result.rec_scores; + + if (nullptr != classifier_) { + for (size_t start_index = 0; start_index < image_list.size(); + start_index += cls_batch_size_) { + size_t end_index = + std::min(start_index + cls_batch_size_, image_list.size()); + if (!classifier_->BatchPredict(image_list, cls_labels_ptr, + cls_scores_ptr, start_index, + end_index)) { + FDERROR << "There's error while recognizing image in PPOCR." + << std::endl; + return false; + } else { + for (size_t i_img = start_index; i_img < end_index; ++i_img) { + if (cls_labels_ptr->at(i_img) % 2 == 1 && + cls_scores_ptr->at(i_img) > + classifier_->GetPostprocessor().GetClsThresh()) { + cv::rotate(image_list[i_img], image_list[i_img], 1); + } + } + } + } + } + + std::vector width_list; + for (int i = 0; i < image_list.size(); i++) { + width_list.push_back(float(image_list[i].cols) / image_list[i].rows); + } + std::vector indices = vision::ocr::ArgSort(width_list); + + for (size_t start_index = 0; start_index < image_list.size(); + start_index += rec_batch_size_) { + size_t end_index = + std::min(start_index + rec_batch_size_, image_list.size()); + if (!recognizer_->BatchPredict(image_list, text_ptr, rec_scores_ptr, + start_index, end_index, indices)) { + FDERROR << "There's error while recognizing image in PPOCR." + << std::endl; + return false; + } + } + } + return true; +} + +} // namespace pipeline +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h new file mode 100755 index 0000000000..30de63d98f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h @@ -0,0 +1,112 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/ocr/ppocr/classifier.h" +#include "ultrainfer/vision/ocr/ppocr/dbdetector.h" +#include "ultrainfer/vision/ocr/ppocr/recognizer.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +/** \brief This pipeline can launch detection model, classification model and + * recognition model sequentially. All OCR pipeline APIs are defined inside this + * namespace. + * + */ +namespace pipeline { +/*! @brief PPOCRv2 is used to load PP-OCRv2 series models provided by PaddleOCR. + */ +class ULTRAINFER_DECL PPOCRv2 : public UltraInferModel { +public: + /** \brief Set up the detection model path, classification model path and + * recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer + * \param[in] cls_model Path of classification model, e.g + * ./ch_ppocr_mobile_v2.0_cls_infer \param[in] rec_model Path of recognition + * model, e.g ./ch_PP-OCRv2_rec_infer + */ + PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Classifier *cls_model, + ultrainfer::vision::ocr::Recognizer *rec_model); + + /** \brief Classification model is optional, so this function is set up the + * detection model path and recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer + */ + PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Recognizer *rec_model); + + /** \brief Clone a new PPOCRv2 with less memory usage when multiple instances + * of the same model are created + * + * \return new PPOCRv2* type unique pointer + */ + std::unique_ptr Clone() const; + + /** \brief Predict the input image and get OCR result. + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format. \param[in] result The output OCR result will + * be writen to this structure. \return true if the prediction successed, + * otherwise false. + */ + virtual bool Predict(cv::Mat *img, ultrainfer::vision::OCRResult *result); + virtual bool Predict(const cv::Mat &img, + ultrainfer::vision::OCRResult *result); + /** \brief BatchPredict the input image and get OCR result. + * + * \param[in] images The list of input image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] batch_result The output + * list of OCR result will be writen to this structure. \return true if the + * prediction successed, otherwise false. + */ + virtual bool + BatchPredict(const std::vector &images, + std::vector *batch_result); + + bool Initialized() const override; + bool SetClsBatchSize(int cls_batch_size); + int GetClsBatchSize(); + bool SetRecBatchSize(int rec_batch_size); + int GetRecBatchSize(); + +protected: + ultrainfer::vision::ocr::DBDetector *detector_ = nullptr; + ultrainfer::vision::ocr::Classifier *classifier_ = nullptr; + ultrainfer::vision::ocr::Recognizer *recognizer_ = nullptr; + +private: + int cls_batch_size_ = 1; + int rec_batch_size_ = 6; +}; + +namespace application { +namespace ocrsystem { +typedef pipeline::PPOCRv2 PPOCRSystemv2; +} // namespace ocrsystem +} // namespace application + +} // namespace pipeline +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h new file mode 100755 index 0000000000..5e7ff217d2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h @@ -0,0 +1,87 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/ocr/ppocr/ppocr_v2.h" + +namespace ultrainfer { +/** \brief This pipeline can launch detection model, classification model and + * recognition model sequentially. All OCR pipeline APIs are defined inside this + * namespace. + * + */ +namespace pipeline { +/*! @brief PPOCRv3 is used to load PP-OCRv3 series models provided by PaddleOCR. + */ +class ULTRAINFER_DECL PPOCRv3 : public PPOCRv2 { +public: + /** \brief Set up the detection model path, classification model path and + * recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer + * \param[in] cls_model Path of classification model, e.g + * ./ch_ppocr_mobile_v2.0_cls_infer \param[in] rec_model Path of recognition + * model, e.g ./ch_PP-OCRv3_rec_infer + */ + PPOCRv3(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Classifier *cls_model, + ultrainfer::vision::ocr::Recognizer *rec_model) + : PPOCRv2(det_model, cls_model, rec_model) { + // The only difference between v2 and v3 + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 48; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); + } + /** \brief Classification model is optional, so this function is set up the + * detection model path and recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv3_rec_infer + */ + PPOCRv3(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Recognizer *rec_model) + : PPOCRv2(det_model, rec_model) { + // The only difference between v2 and v3 + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 48; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); + } + + /** \brief Clone a new PPOCRv3 with less memory usage when multiple instances + * of the same model are created + * + * \return new PPOCRv3* type unique pointer + */ + std::unique_ptr Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PPOCRv3(*this)); + clone_model->detector_ = detector_->Clone().release(); + if (classifier_ != nullptr) { + clone_model->classifier_ = classifier_->Clone().release(); + } + clone_model->recognizer_ = recognizer_->Clone().release(); + return clone_model; + } +}; + +} // namespace pipeline + +namespace application { +namespace ocrsystem { +typedef pipeline::PPOCRv3 PPOCRSystemv3; +} // namespace ocrsystem +} // namespace application + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h new file mode 100755 index 0000000000..08867b5aa7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h @@ -0,0 +1,87 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/ocr/ppocr/ppocr_v3.h" + +namespace ultrainfer { +/** \brief This pipeline can launch detection model, classification model and + * recognition model sequentially. All OCR pipeline APIs are defined inside this + * namespace. + * + */ +namespace pipeline { +/*! @brief PPOCRv4 is used to load PP-OCRv4 series models provided by PaddleOCR. + */ +class ULTRAINFER_DECL PPOCRv4 : public PPOCRv3 { +public: + /** \brief Set up the detection model path, classification model path and + * recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer + * \param[in] cls_model Path of classification model, e.g + * ./ch_ppocr_mobile_v2.0_cls_infer \param[in] rec_model Path of recognition + * model, e.g ./ch_PP-OCRv4_rec_infer + */ + PPOCRv4(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Classifier *cls_model, + ultrainfer::vision::ocr::Recognizer *rec_model) + : PPOCRv3(det_model, cls_model, rec_model) { + // The only difference between v2 and v3 + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 48; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); + } + /** \brief Classification model is optional, so this function is set up the + * detection model path and recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv4_rec_infer + */ + PPOCRv4(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Recognizer *rec_model) + : PPOCRv3(det_model, rec_model) { + // The only difference between v2 and v4 + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 48; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); + } + + /** \brief Clone a new PPOCRv4 with less memory usage when multiple instances + * of the same model are created + * + * \return new PPOCRv4* type unique pointer + */ + std::unique_ptr Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PPOCRv4(*this)); + clone_model->detector_ = detector_->Clone().release(); + if (classifier_ != nullptr) { + clone_model->classifier_ = classifier_->Clone().release(); + } + clone_model->recognizer_ = recognizer_->Clone().release(); + return clone_model; + } +}; + +} // namespace pipeline + +namespace application { +namespace ocrsystem { +typedef pipeline::PPOCRv4 PPOCRSystemv4; +} // namespace ocrsystem +} // namespace application + +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h new file mode 100755 index 0000000000..bffa2e96d4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_layout.h" + +namespace ultrainfer { + +namespace pipeline { +typedef ultrainfer::vision::ocr::StructureV2Layout PPStructureV2Layout; + +namespace application { +namespace ocrsystem { + +// TODO(qiuyanjun): This pipeline may not need +typedef pipeline::PPStructureV2Layout PPStructureV2LayoutSystem; +} // namespace ocrsystem +} // namespace application + +} // namespace pipeline +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc new file mode 100755 index 0000000000..d163878322 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc @@ -0,0 +1,233 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace pipeline { +PPStructureV2Table::PPStructureV2Table( + ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Recognizer *rec_model, + ultrainfer::vision::ocr::StructureV2Table *table_model) + : detector_(det_model), recognizer_(rec_model), table_(table_model) { + Initialized(); +} + +bool PPStructureV2Table::SetRecBatchSize(int rec_batch_size) { + if (rec_batch_size < -1 || rec_batch_size == 0) { + FDERROR << "batch_size > 0 or batch_size == -1." << std::endl; + return false; + } + rec_batch_size_ = rec_batch_size; + return true; +} + +int PPStructureV2Table::GetRecBatchSize() { return rec_batch_size_; } + +bool PPStructureV2Table::Initialized() const { + if (detector_ != nullptr && !detector_->Initialized()) { + return false; + } + + if (recognizer_ != nullptr && !recognizer_->Initialized()) { + return false; + } + + if (table_ != nullptr && !table_->Initialized()) { + return false; + } + return true; +} + +std::unique_ptr PPStructureV2Table::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PPStructureV2Table(*this)); + clone_model->detector_ = detector_->Clone().release(); + clone_model->recognizer_ = recognizer_->Clone().release(); + clone_model->table_ = table_->Clone().release(); + return clone_model; +} + +bool PPStructureV2Table::Predict(cv::Mat *img, + ultrainfer::vision::OCRResult *result) { + return Predict(*img, result); +} + +bool PPStructureV2Table::Predict(const cv::Mat &img, + ultrainfer::vision::OCRResult *result) { + std::vector batch_result(1); + bool success = BatchPredict({img}, &batch_result); + if (!success) { + return success; + } + *result = std::move(batch_result[0]); + return true; +}; + +bool PPStructureV2Table::BatchPredict( + const std::vector &images, + std::vector *batch_result) { + batch_result->clear(); + batch_result->resize(images.size()); + std::vector>> batch_boxes(images.size()); + + if (!detector_->BatchPredict(images, &batch_boxes)) { + FDERROR << "There's error while detecting image in PPOCR." << std::endl; + return false; + } + + for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) { + vision::ocr::SortBoxes(&(batch_boxes[i_batch])); + (*batch_result)[i_batch].boxes = batch_boxes[i_batch]; + } + + for (int i_batch = 0; i_batch < images.size(); ++i_batch) { + ultrainfer::vision::OCRResult &ocr_result = (*batch_result)[i_batch]; + // Get croped images by detection result + const std::vector> &boxes = ocr_result.boxes; + const cv::Mat &img = images[i_batch]; + std::vector image_list; + if (boxes.size() == 0) { + image_list.emplace_back(img); + } else { + image_list.resize(boxes.size()); + for (size_t i_box = 0; i_box < boxes.size(); ++i_box) { + image_list[i_box] = vision::ocr::GetRotateCropImage(img, boxes[i_box]); + } + } + std::vector *cls_labels_ptr = &ocr_result.cls_labels; + std::vector *cls_scores_ptr = &ocr_result.cls_scores; + + std::vector *text_ptr = &ocr_result.text; + std::vector *rec_scores_ptr = &ocr_result.rec_scores; + + std::vector width_list; + for (int i = 0; i < image_list.size(); i++) { + width_list.push_back(float(image_list[i].cols) / image_list[i].rows); + } + std::vector indices = vision::ocr::ArgSort(width_list); + + for (size_t start_index = 0; start_index < image_list.size(); + start_index += rec_batch_size_) { + size_t end_index = + std::min(start_index + rec_batch_size_, image_list.size()); + if (!recognizer_->BatchPredict(image_list, text_ptr, rec_scores_ptr, + start_index, end_index, indices)) { + FDERROR << "There's error while recognizing image in PPOCR." + << std::endl; + return false; + } + } + } + + if (!table_->BatchPredict(images, batch_result)) { + FDERROR << "There's error while recognizing tables in images." << std::endl; + return false; + } + + for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) { + ultrainfer::vision::OCRResult &ocr_result = (*batch_result)[i_batch]; + std::vector> matched(ocr_result.table_boxes.size(), + std::vector()); + + std::vector ocr_box; + std::vector structure_box; + for (int i = 0; i < ocr_result.boxes.size(); i++) { + ocr_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.boxes[i]); + ocr_box[0] -= 1; + ocr_box[1] -= 1; + ocr_box[2] += 1; + ocr_box[3] += 1; + + std::vector> dis_list(ocr_result.table_boxes.size(), + std::vector(3, 100000.0)); + + for (int j = 0; j < ocr_result.table_boxes.size(); j++) { + structure_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.table_boxes[j]); + dis_list[j][0] = vision::ocr::Dis(ocr_box, structure_box); + dis_list[j][1] = 1 - vision::ocr::Iou(ocr_box, structure_box); + dis_list[j][2] = j; + } + + // find min dis idx + std::sort(dis_list.begin(), dis_list.end(), vision::ocr::ComparisonDis); + matched[dis_list[0][2]].push_back(ocr_result.text[i]); + } + + // get pred html + std::string html_str = ""; + int td_tag_idx = 0; + auto structure_html_tags = ocr_result.table_structure; + for (int i = 0; i < structure_html_tags.size(); i++) { + if (structure_html_tags[i].find("") != std::string::npos) { + if (structure_html_tags[i].find("") != std::string::npos) { + html_str += ""; + } + if (matched[td_tag_idx].size() > 0) { + bool b_with = false; + if (matched[td_tag_idx][0].find("") != std::string::npos && + matched[td_tag_idx].size() > 1) { + b_with = true; + html_str += ""; + } + for (int j = 0; j < matched[td_tag_idx].size(); j++) { + std::string content = matched[td_tag_idx][j]; + if (matched[td_tag_idx].size() > 1) { + // remove blank, and + if (content.length() > 0 && content.at(0) == ' ') { + content = content.substr(0); + } + if (content.length() > 2 && content.substr(0, 3) == "") { + content = content.substr(3); + } + if (content.length() > 4 && + content.substr(content.length() - 4) == "") { + content = content.substr(0, content.length() - 4); + } + if (content.empty()) { + continue; + } + // add blank + if (j != matched[td_tag_idx].size() - 1 && + content.at(content.length() - 1) != ' ') { + content += ' '; + } + } + html_str += content; + } + if (b_with) { + html_str += ""; + } + } + if (structure_html_tags[i].find("") != std::string::npos) { + html_str += ""; + } else { + html_str += structure_html_tags[i]; + } + td_tag_idx += 1; + } else { + html_str += structure_html_tags[i]; + } + } + (*batch_result)[i_batch].table_html = html_str; + } + + return true; +} + +} // namespace pipeline +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h new file mode 100755 index 0000000000..9cc6f7fb88 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/ocr/ppocr/dbdetector.h" +#include "ultrainfer/vision/ocr/ppocr/recognizer.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_table.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +/** \brief This pipeline can launch detection model, classification model and + * recognition model sequentially. All OCR pipeline APIs are defined inside this + * namespace. + * + */ +namespace pipeline { +/*! @brief PPStructureV2Table is used to load PP-OCRv2 series models provided by + * PaddleOCR. + */ +class ULTRAINFER_DECL PPStructureV2Table : public UltraInferModel { +public: + /** \brief Set up the detection model path, recognition model path and table + * model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer + * \param[in] table_model Path of table recognition model, e.g + * ./en_ppstructure_mobile_v2.0_SLANet_infer + */ + PPStructureV2Table(ultrainfer::vision::ocr::DBDetector *det_model, + ultrainfer::vision::ocr::Recognizer *rec_model, + ultrainfer::vision::ocr::StructureV2Table *table_model); + + /** \brief Clone a new PPStructureV2Table with less memory usage when multiple + * instances of the same model are created + * + * \return new PPStructureV2Table* type unique pointer + */ + std::unique_ptr Clone() const; + + /** \brief Predict the input image and get OCR result. + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format. \param[in] result The output OCR result will + * be writen to this structure. \return true if the prediction successed, + * otherwise false. + */ + virtual bool Predict(cv::Mat *img, ultrainfer::vision::OCRResult *result); + virtual bool Predict(const cv::Mat &img, + ultrainfer::vision::OCRResult *result); + /** \brief BatchPredict the input image and get OCR result. + * + * \param[in] images The list of input image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] batch_result The output + * list of OCR result will be writen to this structure. \return true if the + * prediction successed, otherwise false. + */ + virtual bool + BatchPredict(const std::vector &images, + std::vector *batch_result); + + bool Initialized() const override; + bool SetRecBatchSize(int rec_batch_size); + int GetRecBatchSize(); + +protected: + ultrainfer::vision::ocr::DBDetector *detector_ = nullptr; + ultrainfer::vision::ocr::Recognizer *recognizer_ = nullptr; + ultrainfer::vision::ocr::StructureV2Table *table_ = nullptr; + +private: + int rec_batch_size_ = 6; +}; + +namespace application { +namespace ocrsystem { +typedef pipeline::PPStructureV2Table PPStructureV2TableSystem; +} // namespace ocrsystem +} // namespace application + +} // namespace pipeline +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc new file mode 100755 index 0000000000..8790a5deee --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc @@ -0,0 +1,150 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/rec_postprocessor.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +std::vector ReadDict(const std::string &path) { + std::ifstream in(path); + FDASSERT(in, "Cannot open file %s to read.", path.c_str()); + std::string line; + std::vector m_vec; + while (getline(in, line)) { + m_vec.push_back(line); + } + m_vec.insert(m_vec.begin(), "#"); // blank char for ctc + m_vec.push_back(" "); + return m_vec; +} + +RecognizerPostprocessor::RecognizerPostprocessor() { initialized_ = false; } + +RecognizerPostprocessor::RecognizerPostprocessor( + const std::string &label_path) { + // init label_lsit + label_list_ = ReadDict(label_path); + initialized_ = true; +} + +bool RecognizerPostprocessor::SingleBatchPostprocessor( + const float *out_data, const std::vector &output_shape, + std::string *text, float *rec_score) { + std::string &str_res = *text; + float &score = *rec_score; + score = 0.f; + int argmax_idx; + int last_index = 0; + int count = 0; + float max_value = 0.0f; + + for (int n = 0; n < output_shape[1]; n++) { + argmax_idx = int( + std::distance(&out_data[n * output_shape[2]], + std::max_element(&out_data[n * output_shape[2]], + &out_data[(n + 1) * output_shape[2]]))); + + max_value = float(*std::max_element(&out_data[n * output_shape[2]], + &out_data[(n + 1) * output_shape[2]])); + + if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { + score += max_value; + count += 1; + if (argmax_idx > label_list_.size()) { + FDERROR << "The output index: " << argmax_idx + << " is larger than the size of label_list: " + << label_list_.size() << ". Please check the label file!" + << std::endl; + return false; + } + str_res += label_list_[argmax_idx]; + } + last_index = argmax_idx; + } + score /= (count + 1e-6); + if (count == 0 || std::isnan(score)) { + score = 0.f; + } + return true; +} + +bool RecognizerPostprocessor::Run(const std::vector &tensors, + std::vector *texts, + std::vector *rec_scores) { + // Recognizer have only 1 output tensor. + // For Recognizer, the output tensor shape = [batch, ?, 6625] + size_t total_size = tensors[0].shape[0]; + return Run(tensors, texts, rec_scores, 0, total_size, {}); +} + +bool RecognizerPostprocessor::Run(const std::vector &tensors, + std::vector *texts, + std::vector *rec_scores, + size_t start_index, size_t total_size, + const std::vector &indices) { + if (!initialized_) { + FDERROR << "Postprocessor is not initialized." << std::endl; + return false; + } + + // Recognizer have only 1 output tensor. + const FDTensor &tensor = tensors[0]; + // For Recognizer, the output tensor shape = [batch, ?, 6625] + size_t batch = tensor.shape[0]; + size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1, + std::multiplies()); + + if (batch <= 0) { + FDERROR << "The infer outputTensor.shape[0] <=0, wrong infer result." + << std::endl; + return false; + } + if (start_index < 0 || total_size <= 0) { + FDERROR << "start_index or total_size error. Correct is: 0 <= start_index " + "< total_size" + << std::endl; + return false; + } + if ((start_index + batch) > total_size) { + FDERROR << "start_index or total_size error. Correct is: start_index + " + "batch(outputTensor.shape[0]) <= total_size" + << std::endl; + return false; + } + texts->resize(total_size); + rec_scores->resize(total_size); + + const float *tensor_data = reinterpret_cast(tensor.Data()); + for (int i_batch = 0; i_batch < batch; ++i_batch) { + size_t real_index = i_batch + start_index; + if (indices.size() != 0) { + real_index = indices[i_batch + start_index]; + } + if (!SingleBatchPostprocessor(tensor_data + i_batch * length, tensor.shape, + &texts->at(real_index), + &rec_scores->at(real_index))) { + return false; + } + } + + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h new file mode 100755 index 0000000000..9b21a61837 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Postprocessor object for Recognizer serials model. + */ +class ULTRAINFER_DECL RecognizerPostprocessor { +public: + RecognizerPostprocessor(); + /** \brief Create a postprocessor instance for Recognizer serials model + * + * \param[in] label_path The path of label_dict + */ + explicit RecognizerPostprocessor(const std::string &label_path); + + /** \brief Process the result of runtime and fill to RecognizerResult + * + * \param[in] tensors The inference result from runtime + * \param[in] texts The output text results of recognizer + * \param[in] rec_scores The output score results of recognizer + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *texts, std::vector *rec_scores); + + bool Run(const std::vector &tensors, + std::vector *texts, std::vector *rec_scores, + size_t start_index, size_t total_size, + const std::vector &indices); + +private: + bool SingleBatchPostprocessor(const float *out_data, + const std::vector &output_shape, + std::string *text, float *rec_score); + bool initialized_ = false; + std::vector label_list_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc new file mode 100755 index 0000000000..4c49887caf --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc @@ -0,0 +1,142 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/rec_preprocessor.h" + +#include "ultrainfer/function/concat.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +RecognizerPreprocessor::RecognizerPreprocessor() { + resize_op_ = std::make_shared(-1, -1); + + std::vector value = {127, 127, 127}; + pad_op_ = std::make_shared(0, 0, 0, 0, value); + + std::vector mean = {0.5f, 0.5f, 0.5f}; + std::vector std = {0.5f, 0.5f, 0.5f}; + normalize_permute_op_ = + std::make_shared(mean, std, true); + normalize_op_ = std::make_shared(mean, std, true); + hwc2chw_op_ = std::make_shared(); + cast_op_ = std::make_shared("float"); +} + +void RecognizerPreprocessor::OcrRecognizerResizeImage( + FDMat *mat, float max_wh_ratio, const std::vector &rec_image_shape, + bool static_shape_infer) { + int img_h, img_w; + img_h = rec_image_shape[1]; + img_w = rec_image_shape[2]; + + if (!static_shape_infer) { + img_w = int(img_h * max_wh_ratio); + float ratio = float(mat->Width()) / float(mat->Height()); + + int resize_w; + if (ceilf(img_h * ratio) > img_w) { + resize_w = img_w; + } else { + resize_w = int(ceilf(img_h * ratio)); + } + resize_op_->SetWidthAndHeight(resize_w, img_h); + (*resize_op_)(mat); + pad_op_->SetPaddingSize(0, 0, 0, int(img_w - mat->Width())); + (*pad_op_)(mat); + } else { + if (mat->Width() >= img_w) { + // Reszie W to 320 + resize_op_->SetWidthAndHeight(img_w, img_h); + (*resize_op_)(mat); + } else { + resize_op_->SetWidthAndHeight(mat->Width(), img_h); + (*resize_op_)(mat); + // Pad to 320 + pad_op_->SetPaddingSize(0, 0, 0, int(img_w - mat->Width())); + (*pad_op_)(mat); + } + } +} + +bool RecognizerPreprocessor::Run(std::vector *images, + std::vector *outputs, + size_t start_index, size_t end_index, + const std::vector &indices) { + if (images->size() == 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; + return false; + } + + std::vector mats(end_index - start_index); + for (size_t i = start_index; i < end_index; ++i) { + size_t real_index = i; + if (indices.size() != 0) { + real_index = indices[i]; + } + mats[i - start_index] = images->at(real_index); + } + return Run(&mats, outputs); +} + +bool RecognizerPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + int img_h = rec_image_shape_[1]; + int img_w = rec_image_shape_[2]; + float max_wh_ratio = img_w * 1.0 / img_h; + float ori_wh_ratio; + + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + ori_wh_ratio = mat->Width() * 1.0 / mat->Height(); + max_wh_ratio = std::max(max_wh_ratio, ori_wh_ratio); + } + + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, + static_shape_infer_); + } + + if (!disable_normalize_ && !disable_permute_) { + (*normalize_permute_op_)(image_batch); + } else { + if (!disable_normalize_) { + (*normalize_op_)(image_batch); + } + if (!disable_permute_) { + (*hwc2chw_op_)(image_batch); + (*cast_op_)(image_batch); + } + } + + // Only have 1 output Tensor. + outputs->resize(1); + // Get the NCHW tensor + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h new file mode 100755 index 0000000000..58fd675468 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Preprocessor object for PaddleClas serials model. + */ +class ULTRAINFER_DECL RecognizerPreprocessor : public ProcessorManager { +public: + RecognizerPreprocessor(); + using ProcessorManager::Run; + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input data list, all the elements are FDMat + * \param[in] outputs The output tensors which will be fed into runtime + * \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + size_t start_index, size_t end_index, + const std::vector &indices); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// Set static_shape_infer is true or not. When deploy PP-OCR + /// on hardware which can not support dynamic input shape very well, + /// like Huawei Ascned, static_shape_infer needs to to be true. + void SetStaticShapeInfer(bool static_shape_infer) { + static_shape_infer_ = static_shape_infer; + } + /// Get static_shape_infer of the recognition preprocess + bool GetStaticShapeInfer() const { return static_shape_infer_; } + + /// Set preprocess normalize parameters, please call this API to customize + /// the normalize parameters, otherwise it will use the default normalize + /// parameters. + void SetNormalize(const std::vector &mean, + const std::vector &std, bool is_scale) { + normalize_permute_op_ = + std::make_shared(mean, std, is_scale); + normalize_op_ = std::make_shared(mean, std, is_scale); + } + + /// Set rec_image_shape for the recognition preprocess + void SetRecImageShape(const std::vector &rec_image_shape) { + rec_image_shape_ = rec_image_shape; + } + /// Get rec_image_shape for the recognition preprocess + std::vector GetRecImageShape() { return rec_image_shape_; } + + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + +private: + void OcrRecognizerResizeImage(FDMat *mat, float max_wh_ratio, + const std::vector &rec_image_shape, + bool static_shape_infer); + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + std::vector rec_image_shape_ = {3, 48, 320}; + bool static_shape_infer_ = false; + std::shared_ptr resize_op_; + std::shared_ptr pad_op_; + std::shared_ptr normalize_permute_op_; + std::shared_ptr normalize_op_; + std::shared_ptr hwc2chw_op_; + std::shared_ptr cast_op_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc new file mode 100755 index 0000000000..47dcdc283e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc @@ -0,0 +1,136 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/recognizer.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +Recognizer::Recognizer() {} + +Recognizer::Recognizer(const std::string &model_file, + const std::string ¶ms_file, + const std::string &label_path, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : postprocessor_(label_path) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +// Init +bool Recognizer::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + + return true; +} + +std::unique_ptr Recognizer::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(Recognizer(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool Recognizer::Predict(const cv::Mat &img, std::string *text, + float *rec_score) { + std::vector texts(1); + std::vector rec_scores(1); + bool success = BatchPredict({img}, &texts, &rec_scores); + if (!success) { + return success; + } + *text = std::move(texts[0]); + *rec_score = rec_scores[0]; + return true; +} + +bool Recognizer::Predict(const cv::Mat &img, vision::OCRResult *ocr_result) { + ocr_result->text.resize(1); + ocr_result->rec_scores.resize(1); + if (!Predict(img, &(ocr_result->text[0]), &(ocr_result->rec_scores[0]))) { + return false; + } + return true; +} + +bool Recognizer::BatchPredict(const std::vector &images, + std::vector *texts, + std::vector *rec_scores) { + return BatchPredict(images, texts, rec_scores, 0, images.size(), {}); +} + +bool Recognizer::BatchPredict(const std::vector &images, + vision::OCRResult *ocr_result) { + return BatchPredict(images, &(ocr_result->text), &(ocr_result->rec_scores)); +} + +bool Recognizer::BatchPredict(const std::vector &images, + std::vector *texts, + std::vector *rec_scores, + size_t start_index, size_t end_index, + const std::vector &indices) { + size_t total_size = images.size(); + if (indices.size() != 0 && indices.size() != total_size) { + FDERROR << "indices.size() should be 0 or images.size()." << std::endl; + return false; + } + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, + end_index, indices)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, + start_index, total_size, indices)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h new file mode 100755 index 0000000000..e156647665 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/rec_postprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/rec_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { +/** \brief All OCR series model APIs are defined inside this namespace + * + */ +namespace ocr { +/*! @brief Recognizer object is used to load the recognition model provided by + * PaddleOCR. + */ +class ULTRAINFER_DECL Recognizer : public UltraInferModel { +public: + Recognizer(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ./ch_PP-OCRv3_rec_infer/model.pdmodel. \param[in] params_file Path of + * parameter file, e.g ./ch_PP-OCRv3_rec_infer/model.pdiparams, if the model + * format is ONNX, this parameter will be ignored. \param[in] label_path Path + * of label file used by OCR recognition model. e.g ./ppocr_keys_v1.txt + * \param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends`. \param[in] + * model_format Model format of the loaded model, default is Paddle format. + */ + Recognizer(const std::string &model_file, const std::string ¶ms_file = "", + const std::string &label_path = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /// Get model's name + std::string ModelName() const { return "ppocr/ocr_rec"; } + + /** \brief Clone a new Recognizer with less memory usage when multiple + * instances of the same model are created + * + * \return new Recognizer* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /** \brief Predict the input image and get OCR recognition model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] text The text result of rec + * model will be written into this parameter. \param[in] rec_score The sccore + * result of rec model will be written into this parameter. \return true if + * the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, std::string *text, float *rec_score); + + /** \brief Predict the input image and get OCR recognition model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR + * recognition model result will be writen to this structure. \return true if + * the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result); + + /** \brief BatchPredict the input image and get OCR recognition model result. + * + * \param[in] images The list of input image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] ocr_result The output + * of OCR recognition model result will be writen to this structure. \return + * true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + vision::OCRResult *ocr_result); + + /** \brief BatchPredict the input image and get OCR recognition model result. + * + * \param[in] images The list of input image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] texts The list of text + * results of rec model will be written into this vector. \param[in] + * rec_scores The list of sccore result of rec model will be written into this + * vector. \return true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *texts, + std::vector *rec_scores); + + virtual bool BatchPredict(const std::vector &images, + std::vector *texts, + std::vector *rec_scores, size_t start_index, + size_t end_index, const std::vector &indices); + + /// Get preprocessor reference of DBDetectorPreprocessor + virtual RecognizerPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of DBDetectorPostprocessor + virtual RecognizerPostprocessor &GetPostprocessor() { return postprocessor_; } + +private: + bool Initialize(); + RecognizerPreprocessor preprocessor_; + RecognizerPostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc new file mode 100755 index 0000000000..a16be5aaa2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/structurev2_layout.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +StructureV2Layout::StructureV2Layout() {} +StructureV2Layout::StructureV2Layout(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool StructureV2Layout::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +std::unique_ptr StructureV2Layout::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(StructureV2Layout(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool StructureV2Layout::Predict(cv::Mat *im, DetectionResult *result) { + return Predict(*im, result); +} + +bool StructureV2Layout::Predict(const cv::Mat &im, DetectionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool StructureV2Layout::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + auto batch_layout_img_info = preprocessor_.GetBatchLayoutImgInfo(); + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, + *batch_layout_img_info)) { + FDERROR << "Failed to postprocess the inference results." << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h new file mode 100755 index 0000000000..132cd183f5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { +/*! @brief StructureV2Layout object is used to load the PP-StructureV2-Layout + * detection model. + */ +class ULTRAINFER_DECL StructureV2Layout : public UltraInferModel { +public: + StructureV2Layout(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ./picodet_lcnet_x1_0_fgd_layout_cdla_infer/model.pdmodel. \param[in] + * params_file Path of parameter file, e.g + * ./picodet_lcnet_x1_0_fgd_layout_cdla_infer/model.pdiparams, if the model + * format is ONNX, this parameter will be ignored. \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends`. \param[in] model_format Model + * format of the loaded model, default is Paddle format. + */ + StructureV2Layout(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new StructureV2Layout with less memory usage when multiple + * instances of the same model are created + * + * \return newStructureV2Layout* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "pp-structurev2-layout"; } + + /** \brief DEPRECATED Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, DetectionResult *result); + + /** \brief Predict the detection result for an input image + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output detection result + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, DetectionResult *result); + + /** \brief Predict the detection result for an input image list + * \param[in] im The input image list, all the elements come from + * cv::imread(), is a 3-D array with layout HWC, BGR format \param[in] results + * The output detection result list \return true if the prediction successed, + * otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference ofStructureV2LayoutPreprocessor + virtual StructureV2LayoutPreprocessor &GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference ofStructureV2LayoutPostprocessor + virtual StructureV2LayoutPostprocessor &GetPostprocessor() { + return postprocessor_; + } + +private: + bool Initialize(); + StructureV2LayoutPreprocessor preprocessor_; + StructureV2LayoutPostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc new file mode 100755 index 0000000000..b5a3385821 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc @@ -0,0 +1,174 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +bool StructureV2LayoutPostprocessor::Run( + const std::vector &tensors, std::vector *results, + const std::vector> &batch_layout_img_info) { + // A StructureV2Layout has 8 output tensors on which it then runs + // a GFL regression (namely, DisPred2Box), reference: + // PaddleOCR/blob/release/2.6/deploy/cpp_infer/src/postprocess_op.cpp#L511 + int tensor_size = tensors.size(); + FDASSERT(tensor_size == 8, + "StructureV2Layout should has 8 output tensors," + "but got %d now!", + tensor_size) + FDASSERT((tensor_size / 2) == fpn_stride_.size(), + "found (tensor_size / 2) != fpn_stride_.size() !") + // TODO(qiuyanjun): may need to reorder the tensors according to + // fpn_stride_ and the shape of output tensors. + size_t batch = tensors[0].Shape()[0]; // [batch, ...] + + results->resize(batch); + SetRegMax(tensors[fpn_stride_.size()].Shape()[2] / 4); + for (int batch_idx = 0; batch_idx < batch; ++batch_idx) { + std::vector single_batch_tensors(8); + SetSingleBatchExternalData(tensors, single_batch_tensors, batch_idx); + SingleBatchPostprocessor(single_batch_tensors, + batch_layout_img_info[batch_idx], + &results->at(batch_idx)); + } + return true; +} + +void StructureV2LayoutPostprocessor::SetSingleBatchExternalData( + const std::vector &tensors, + std::vector &single_batch_tensors, size_t batch_idx) { + single_batch_tensors.resize(tensors.size()); + for (int j = 0; j < tensors.size(); ++j) { + auto j_shape = tensors[j].Shape(); + j_shape[0] = 1; // process b=1 per loop + size_t j_step = + accumulate(j_shape.begin(), j_shape.end(), 1, std::multiplies()); + const float *j_data_ptr = + reinterpret_cast(tensors[j].Data()); + const float *j_start_ptr = j_data_ptr + j_step * batch_idx; + single_batch_tensors[j].SetExternalData( + j_shape, tensors[j].Dtype(), + const_cast(reinterpret_cast(j_start_ptr)), + tensors[j].device, tensors[j].device_id); + } +} + +bool StructureV2LayoutPostprocessor::SingleBatchPostprocessor( + const std::vector &single_batch_tensors, + const std::array &layout_img_info, DetectionResult *result) { + FDASSERT(single_batch_tensors.size() == 8, + "StructureV2Layout should has 8 output tensors," + "but got %d now!", + static_cast(single_batch_tensors.size())) + // layout_img_info: {image width, image height, resize width, resize height} + int img_w = layout_img_info[0]; + int img_h = layout_img_info[1]; + int in_w = layout_img_info[2]; + int in_h = layout_img_info[3]; + float scale_factor_w = static_cast(in_w) / static_cast(img_w); + float scale_factor_h = static_cast(in_h) / static_cast(img_h); + + std::vector bbox_results; + bbox_results.resize(num_class_); // tmp result for each class + + // decode score, label, box + for (int i = 0; i < fpn_stride_.size(); ++i) { + int feature_h = std::ceil(static_cast(in_h) / fpn_stride_[i]); + int feature_w = std::ceil(static_cast(in_w) / fpn_stride_[i]); + const FDTensor &prob_tensor = single_batch_tensors[i]; + const FDTensor &bbox_tensor = single_batch_tensors[i + fpn_stride_.size()]; + const float *prob_data = + reinterpret_cast(prob_tensor.Data()); + const float *bbox_data = + reinterpret_cast(bbox_tensor.Data()); + for (int idx = 0; idx < feature_h * feature_w; ++idx) { + // score and label + float score = 0.f; + int label = 0; + for (int j = 0; j < num_class_; ++j) { + if (prob_data[idx * num_class_ + j] > score) { + score = prob_data[idx * num_class_ + j]; + label = j; + } + } + // bbox + if (score > score_threshold_) { + int row = idx / feature_w; + int col = idx % feature_w; + std::vector bbox_pred(bbox_data + idx * 4 * reg_max_, + bbox_data + (idx + 1) * 4 * reg_max_); + bbox_results[label].boxes.push_back(DisPred2Bbox( + bbox_pred, col, row, fpn_stride_[i], in_w, in_h, reg_max_)); + bbox_results[label].scores.push_back(score); + bbox_results[label].label_ids.push_back(label); + } + } + } + + result->Clear(); + // nms for per class, i in [0~num_class-1] + for (int i = 0; i < bbox_results.size(); ++i) { + if (bbox_results[i].boxes.size() <= 0) { + continue; + } + vision::utils::NMS(&bbox_results[i], nms_threshold_); + // fill output results + for (int j = 0; j < bbox_results[i].boxes.size(); ++j) { + result->scores.push_back(bbox_results[i].scores[j]); + result->label_ids.push_back(bbox_results[i].label_ids[j]); + result->boxes.push_back({ + bbox_results[i].boxes[j][0] / scale_factor_w, + bbox_results[i].boxes[j][1] / scale_factor_h, + bbox_results[i].boxes[j][2] / scale_factor_w, + bbox_results[i].boxes[j][3] / scale_factor_h, + }); + } + } + return true; +} + +std::array StructureV2LayoutPostprocessor::DisPred2Bbox( + const std::vector &bbox_pred, int x, int y, int stride, int resize_w, + int resize_h, int reg_max) { + float ct_x = (static_cast(x) + 0.5f) * static_cast(stride); + float ct_y = (static_cast(y) + 0.5f) * static_cast(stride); + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + std::vector bbox_pred_i(bbox_pred.begin() + i * reg_max, + bbox_pred.begin() + (i + 1) * reg_max); + std::vector dis_after_sm = ocr::Softmax(bbox_pred_i); + float dis = 0.0f; + for (int j = 0; j < reg_max; j++) { + dis += static_cast(j) * dis_after_sm[j]; + } + dis *= static_cast(stride); + dis_pred[i] = dis; + } + + float xmin = std::max(ct_x - dis_pred[0], 0.0f); + float ymin = std::max(ct_y - dis_pred[1], 0.0f); + float xmax = std::min(ct_x + dis_pred[2], static_cast(resize_w)); + float ymax = std::min(ct_y + dis_pred[3], static_cast(resize_h)); + + return {xmin, ymin, xmax, ymax}; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h new file mode 100755 index 0000000000..421fbea3f0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +/*! @brief Postprocessor object for PaddleDet serials model. + */ +class ULTRAINFER_DECL StructureV2LayoutPostprocessor { +public: + StructureV2LayoutPostprocessor() {} + /** \brief Process the result of runtime and fill to batch DetectionResult + * + * \param[in] tensors The inference result from runtime + * \param[in] results The output result of layout detection + * \param[in] batch_layout_img_info The image info of input images, + * {{image width, image height, resize width, resize height},...} + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *results, + const std::vector> &batch_layout_img_info); + + /// Set score_threshold_ for layout detection postprocess, default is 0.4 + void SetScoreThreshold(float score_threshold) { + score_threshold_ = score_threshold; + } + /// Set nms_threshold_ for layout detection postprocess, default is 0.5 + void SetNMSThreshold(float nms_threshold) { nms_threshold_ = nms_threshold; } + /// Set num_class_ for layout detection postprocess, default is 5 + void SetNumClass(int num_class) { num_class_ = num_class; } + /// Set fpn_stride_ for layout detection postprocess, default is {8, 16, 32, + /// 64} + void SetFPNStride(const std::vector &fpn_stride) { + fpn_stride_ = fpn_stride; + } + /// Set reg_max_ for layout detection postprocess, default is 8 + void SetRegMax(int reg_max) { reg_max_ = reg_max; } // should private ? + /// Get score_threshold_ of layout detection postprocess, default is 0.4 + float GetScoreThreshold() const { return score_threshold_; } + /// Get nms_threshold_ of layout detection postprocess, default is 0.5 + float GetNMSThreshold() const { return nms_threshold_; } + /// Get num_class_ of layout detection postprocess, default is 5 + int GetNumClass() const { return num_class_; } + /// Get fpn_stride_ of layout detection postprocess, default is {8, 16, 32, + /// 64} + std::vector GetFPNStride() const { return fpn_stride_; } + /// Get reg_max_ of layout detection postprocess, default is 8 + int GetRegMax() const { return reg_max_; } + +private: + std::array DisPred2Bbox(const std::vector &bbox_pred, int x, + int y, int stride, int resize_w, + int resize_h, int reg_max); + bool + SingleBatchPostprocessor(const std::vector &single_batch_tensors, + const std::array &layout_img_info, + DetectionResult *result); + void SetSingleBatchExternalData(const std::vector &tensors, + std::vector &single_batch_tensors, + size_t batch_idx); + + std::vector fpn_stride_ = {8, 16, 32, 64}; + float score_threshold_ = 0.4; + float nms_threshold_ = 0.5; + int num_class_ = 5; + int reg_max_ = 8; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc new file mode 100755 index 0000000000..4aed3e1c89 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +StructureV2LayoutPreprocessor::StructureV2LayoutPreprocessor() { + // default width(608) and height(900) + resize_op_ = + std::make_shared(layout_image_shape_[2], layout_image_shape_[1]); + normalize_permute_op_ = std::make_shared( + std::vector({0.485f, 0.456f, 0.406f}), + std::vector({0.229f, 0.224f, 0.225f}), true); +} + +std::array StructureV2LayoutPreprocessor::GetLayoutImgInfo(FDMat *img) { + if (static_shape_infer_) { + return {img->Width(), img->Height(), layout_image_shape_[2], + layout_image_shape_[1]}; + } else { + FDASSERT(false, "not support dynamic shape inference now!") + } + return {img->Width(), img->Height(), layout_image_shape_[2], + layout_image_shape_[1]}; +} + +bool StructureV2LayoutPreprocessor::ResizeLayoutImage(FDMat *img, int resize_w, + int resize_h) { + resize_op_->SetWidthAndHeight(resize_w, resize_h); + (*resize_op_)(img); + return true; +} + +bool StructureV2LayoutPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + batch_layout_img_info_.clear(); + batch_layout_img_info_.resize(image_batch->mats->size()); + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + batch_layout_img_info_[i] = GetLayoutImgInfo(mat); + ResizeLayoutImage(mat, batch_layout_img_info_[i][2], + batch_layout_img_info_[i][3]); + } + if (!disable_normalize_ && !disable_permute_) { + (*normalize_permute_op_)(image_batch); + } + + outputs->resize(1); + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h new file mode 100755 index 0000000000..1288f5cbe8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h @@ -0,0 +1,90 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Preprocessor object for DBDetector serials model. + */ +class ULTRAINFER_DECL StructureV2LayoutPreprocessor : public ProcessorManager { +public: + StructureV2LayoutPreprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// Set preprocess normalize parameters, please call this API to customize + /// the normalize parameters, otherwise it will use the default normalize + /// parameters. + void SetNormalize(const std::vector &mean, + const std::vector &std, bool is_scale) { + normalize_permute_op_ = + std::make_shared(mean, std, is_scale); + } + + /// Get the image info of the last batch, return a list of array + /// {image width, image height, resize width, resize height} + const std::vector> *GetBatchLayoutImgInfo() { + return &batch_layout_img_info_; + } + + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + /// Set image_shape for the detection preprocess. + /// This api is usually used when you retrain the model. + /// Generally, you do not need to use it. + void SetLayoutImageShape(const std::vector &image_shape) { + layout_image_shape_ = image_shape; + } + /// Get cls_image_shape for the classification preprocess + std::vector GetLayoutImageShape() const { return layout_image_shape_; } + /// Set static_shape_infer is true or not. When deploy PP-StructureV2 + /// on hardware which can not support dynamic input shape very well, + /// like Huawei Ascned, static_shape_infer needs to to be true. + void SetStaticShapeInfer(bool static_shape_infer) { + static_shape_infer_ = static_shape_infer; + } + /// Get static_shape_infer of the recognition preprocess + bool GetStaticShapeInfer() const { return static_shape_infer_; } + +private: + bool ResizeLayoutImage(FDMat *img, int resize_w, int resize_h); + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + std::vector> batch_layout_img_info_; + std::shared_ptr resize_op_; + std::shared_ptr normalize_permute_op_; + std::vector layout_image_shape_ = {3, 800, 608}; // c,h,w + // default true for pp-structurev2-layout model, backbone picodet. + bool static_shape_infer_ = true; + std::array GetLayoutImgInfo(FDMat *img); +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc new file mode 100755 index 0000000000..47c3109cb9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h" + +#include "ultrainfer/utils/unique_ptr.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +StructureV2SERViLayoutXLMModel::StructureV2SERViLayoutXLMModel( + const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::PADDLE) { + valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, + Backend::LITE}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_timvx_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ipu_backends = {Backend::PDINFER}; + valid_directml_backends = {Backend::ORT}; + } else if (model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + } else { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_rknpu_backends = {Backend::RKNPU2}; + valid_directml_backends = {Backend::ORT}; + valid_horizon_backends = {Backend::HORIZONNPU}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +std::unique_ptr +StructureV2SERViLayoutXLMModel::Clone() const { + std::unique_ptr clone_model = + utils::make_unique( + StructureV2SERViLayoutXLMModel(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool StructureV2SERViLayoutXLMModel::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h new file mode 100755 index 0000000000..480acd748b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" + +namespace ultrainfer { +namespace vision { +/** \brief All classification model APIs are defined inside this namespace + * + */ +namespace ocr { +/*! @brief StructureV2SERViLayoutXLM model object used when to load a + * StructureV2SERViLayoutXLM model exported by StructureV2SERViLayoutXLMModel + * repository + */ +class ULTRAINFER_DECL StructureV2SERViLayoutXLMModel : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ser_vi_layoutxlm/model.pdmodel \param[in] params_file Path of parameter + * file, e.g ser_vi_layoutxlm/model.pdiparams, if the model format is ONNX, + * this parameter will be ignored \param[in] config_file Path of configuration + * file for deployment, e.g ser_vi_layoutxlm/infer_cfg.yml \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in `valid_cpu_backends` \param[in] model_format + * Model format of the loaded model, default is Paddle format + */ + StructureV2SERViLayoutXLMModel( + const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new StructureV2SERViLayoutXLMModel with less memory usage + * when multiple instances of the same model are created + * + * \return new StructureV2SERViLayoutXLMModel* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + virtual std::string ModelName() const { + return "StructureV2SERViLayoutXLMModel"; + } + +protected: + bool Initialize(); +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc new file mode 100755 index 0000000000..f2e4cf0e60 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc @@ -0,0 +1,134 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/structurev2_table.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +StructureV2Table::StructureV2Table() {} +StructureV2Table::StructureV2Table(const std::string &model_file, + const std::string ¶ms_file, + const std::string &table_char_dict_path, + const std::string &box_shape, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : postprocessor_(table_char_dict_path, box_shape) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +// Init +bool StructureV2Table::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +std::unique_ptr StructureV2Table::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(StructureV2Table(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool StructureV2Table::Predict(const cv::Mat &img, + std::vector> *boxes_result, + std::vector *structure_result) { + std::vector>> det_results; + std::vector> structure_results; + if (!BatchPredict({img}, &det_results, &structure_results)) { + return false; + } + *boxes_result = std::move(det_results[0]); + *structure_result = std::move(structure_results[0]); + return true; +} + +bool StructureV2Table::Predict(const cv::Mat &img, + vision::OCRResult *ocr_result) { + if (!Predict(img, &(ocr_result->table_boxes), + &(ocr_result->table_structure))) { + return false; + } + return true; +} + +bool StructureV2Table::BatchPredict( + const std::vector &images, + std::vector *ocr_results) { + std::vector>> det_results; + std::vector> structure_results; + if (!BatchPredict(images, &det_results, &structure_results)) { + return false; + } + ocr_results->resize(det_results.size()); + for (int i = 0; i < det_results.size(); i++) { + (*ocr_results)[i].table_boxes = std::move(det_results[i]); + (*ocr_results)[i].table_structure = std::move(structure_results[i]); + } + return true; +} + +bool StructureV2Table::BatchPredict( + const std::vector &images, + std::vector>> *det_results, + std::vector> *structure_results) { + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + auto batch_det_img_info = preprocessor_.GetBatchImgInfo(); + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, det_results, + structure_results, *batch_det_img_info)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h new file mode 100755 index 0000000000..152d6cc0f6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h @@ -0,0 +1,126 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { +/** \brief All OCR series model APIs are defined inside this namespace + * + */ +namespace ocr { + +/*! @brief DBDetector object is used to load the detection model provided by + * PaddleOCR. + */ +class ULTRAINFER_DECL StructureV2Table : public UltraInferModel { +public: + StructureV2Table(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel. \param[in] + * params_file Path of parameter file, e.g + * ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model + * format is ONNX, this parameter will be ignored. \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends`. \param[in] model_format Model + * format of the loaded model, default is Paddle format. \param[in] box_shape + * Type of output box, default is ori. + */ + StructureV2Table(const std::string &model_file, + const std::string ¶ms_file = "", + const std::string &table_char_dict_path = "", + const std::string &box_shape = "ori", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new StructureV2Table Recognizer with less memory usage when + * multiple instances of the same model are created + * + * \return new StructureV2Table* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "ppocr/ocr_table"; } + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] boxes_result The output of + * OCR detection model result will be writen to this structure. \return true + * if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, + std::vector> *boxes_result, + std::vector *structure_result); + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR + * detection model result will be writen to this structure. \return true if + * the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] det_results The output + * of OCR detection model result will be writen to this structure. \return + * true if the prediction is successed, otherwise false. + */ + virtual bool + BatchPredict(const std::vector &images, + std::vector>> *det_results, + std::vector> *structure_results); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] ocr_results The output + * of OCR detection model result will be writen to this structure. \return + * true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *ocr_results); + + /// Get preprocessor reference of StructureV2TablePreprocessor + virtual StructureV2TablePreprocessor &GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of StructureV2TablePostprocessor + virtual StructureV2TablePostprocessor &GetPostprocessor() { + return postprocessor_; + } + +private: + bool Initialize(); + StructureV2TablePreprocessor preprocessor_; + StructureV2TablePostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc new file mode 100755 index 0000000000..36498215b1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h" + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +StructureV2TablePostprocessor::StructureV2TablePostprocessor() { + initialized_ = false; +} + +StructureV2TablePostprocessor::StructureV2TablePostprocessor( + const std::string &dict_path, const std::string &box_shape) + : box_shape(box_shape) { + std::ifstream in(dict_path); + + FDASSERT(in, "Cannot open file %s to read.", dict_path.c_str()); + std::string line; + dict_character.clear(); + dict_character.push_back("sos"); // add special character + while (getline(in, line)) { + dict_character.push_back(line); + } + + if (merge_no_span_structure) { + if (std::find(dict_character.begin(), dict_character.end(), "") == + dict_character.end()) { + dict_character.push_back(""); + } + for (auto it = dict_character.begin(); it != dict_character.end();) { + if (*it == "") { + it = dict_character.erase(it); + } else { + ++it; + } + } + } + + dict_character.push_back("eos"); // add special character + dict.clear(); + for (size_t i = 0; i < dict_character.size(); i++) { + dict[dict_character[i]] = int(i); + if (dict_character[i] == "beg") { + ignore_beg_token_idx = i; + } else if (dict_character[i] == "end") { + ignore_end_token_idx = i; + } + } + dict_end_idx = dict_character.size() - 1; + + initialized_ = true; +} + +bool StructureV2TablePostprocessor::SingleBatchPostprocessor( + const float *structure_probs, const float *bbox_preds, size_t slice_dim, + size_t prob_dim, size_t box_dim, int img_width, int img_height, + float ratio_h, float ratio_w, int pad_h, int pad_w, + std::vector> *boxes_result, + std::vector *structure_list_result) { + structure_list_result->push_back(""); + structure_list_result->push_back(""); + structure_list_result->push_back(""); + + for (int i = 0; i < slice_dim; i++) { + int structure_idx = 0; + float structure_prob = structure_probs[i * prob_dim]; + for (int j = 0; j < prob_dim; j++) { + if (structure_probs[i * prob_dim + j] > structure_prob) { + structure_prob = structure_probs[i * prob_dim + j]; + structure_idx = j; + } + } + + if (structure_idx > 0 && structure_idx == dict_end_idx) + break; + + if (structure_idx == ignore_end_token_idx || + structure_idx == ignore_beg_token_idx) + continue; + + std::string text = dict_character[structure_idx]; + if (std::find(td_tokens.begin(), td_tokens.end(), text) != + td_tokens.end()) { + std::array bbox; + // box dim: en->4, ch->8 + + if (box_dim == 4) { + bbox[0] = bbox_preds[i * box_dim] * img_width; + bbox[1] = bbox_preds[i * box_dim + 1] * img_height; + + bbox[2] = bbox_preds[i * box_dim + 2] * img_width; + bbox[3] = bbox_preds[i * box_dim + 1] * img_height; + + bbox[4] = bbox_preds[i * box_dim + 2] * img_width; + bbox[5] = bbox_preds[i * box_dim + 3] * img_height; + + bbox[6] = bbox_preds[i * box_dim] * img_width; + bbox[7] = bbox_preds[i * box_dim + 3] * img_height; + } else { + for (int k = 0; k < 8; k++) { + float bbox_pred = bbox_preds[i * box_dim + k]; + if (box_shape == "pad") { + bbox[k] = int(k % 2 == 0 ? bbox_pred * pad_w / ratio_w + : bbox_pred * pad_h / ratio_h); + } else { + bbox[k] = int(k % 2 == 0 ? bbox_pred * img_width + : bbox_pred * img_height); + } + } + } + + boxes_result->push_back(bbox); + } + structure_list_result->push_back(text); + } + structure_list_result->push_back("
"); + structure_list_result->push_back(""); + structure_list_result->push_back(""); + + return true; +} + +bool StructureV2TablePostprocessor::Run( + const std::vector &tensors, + std::vector>> *bbox_batch_list, + std::vector> *structure_batch_list, + const std::vector> &batch_det_img_info) { + // Table have 2 output tensors. + const FDTensor &structure_probs = tensors[1]; + const FDTensor &bbox_preds = tensors[0]; + + const float *structure_probs_data = + reinterpret_cast(structure_probs.Data()); + size_t structure_probs_length = + accumulate(structure_probs.shape.begin() + 1, structure_probs.shape.end(), + 1, std::multiplies()); + + const float *bbox_preds_data = + reinterpret_cast(bbox_preds.Data()); + size_t bbox_preds_length = + accumulate(bbox_preds.shape.begin() + 1, bbox_preds.shape.end(), 1, + std::multiplies()); + size_t batch = bbox_preds.shape[0]; + size_t slice_dim = bbox_preds.shape[1]; + size_t prob_dim = structure_probs.shape[2]; + size_t box_dim = bbox_preds.shape[2]; + + bbox_batch_list->resize(batch); + structure_batch_list->resize(batch); + + for (int i_batch = 0; i_batch < batch; ++i_batch) { + SingleBatchPostprocessor( + structure_probs_data, bbox_preds_data, slice_dim, prob_dim, box_dim, + batch_det_img_info[i_batch][0], batch_det_img_info[i_batch][1], + batch_det_img_info[i_batch][2], batch_det_img_info[i_batch][3], + batch_det_img_info[i_batch][4], batch_det_img_info[i_batch][5], + &bbox_batch_list->at(i_batch), &structure_batch_list->at(i_batch)); + structure_probs_data = structure_probs_data + structure_probs_length; + bbox_preds_data = bbox_preds_data + bbox_preds_length; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h new file mode 100755 index 0000000000..97b91f0545 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Postprocessor object for DBDetector serials model. + */ + +class ULTRAINFER_DECL StructureV2TablePostprocessor { +public: + StructureV2TablePostprocessor(); + /** \brief Create a postprocessor instance for Recognizer serials model + * + * \param[in] label_path The path of label_dict + * \param[in] box_shape Type of output box, default is ori. + */ + + explicit StructureV2TablePostprocessor(const std::string &dict_path, + const std::string &box_shape); + + /** \brief Process the result of runtime and fill to RecognizerResult + * + * \param[in] tensors The inference result from runtime + * \param[in] texts The output text results of recognizer + * \param[in] rec_scores The output score results of recognizer + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector>> *bbox_batch_list, + std::vector> *structure_batch_list, + const std::vector> &batch_det_img_info_); + +private: + PostProcessor util_post_processor_; + bool SingleBatchPostprocessor( + const float *structure_probs, const float *bbox_preds, size_t slice_dim, + size_t prob_dim, size_t box_dim, int img_width, int img_height, + float ratio_h, float ratio_w, int pad_h, int pad_w, + std::vector> *boxes_result, + std::vector *structure_list_result); + + bool merge_no_span_structure{true}; + std::vector dict_character; + std::string box_shape; + std::vector td_tokens{"", ""}; + std::map dict; + int ignore_beg_token_idx; + int ignore_end_token_idx; + int dict_end_idx; + bool initialized_ = false; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc new file mode 100755 index 0000000000..64cce7b452 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h" + +#include "ultrainfer/function/concat.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +StructureV2TablePreprocessor::StructureV2TablePreprocessor() { + resize_op_ = std::make_shared(-1, -1); + + std::vector value = {0, 0, 0}; + pad_op_ = std::make_shared(0, 0, 0, 0, value); + + std::vector mean = {0.485f, 0.456f, 0.406f}; + std::vector std = {0.229f, 0.224f, 0.225f}; + normalize_op_ = std::make_shared(mean, std, true); + hwc2chw_op_ = std::make_shared(); +} + +void StructureV2TablePreprocessor::StructureV2TableResizeImage(FDMat *mat, + int batch_idx) { + float img_h = float(rec_image_shape_[1]); + float img_w = float(rec_image_shape_[2]); + float width = float(mat->Width()); + float height = float(mat->Height()); + float ratio = float(float(max_len) / (std::max(height, width) * 1.0)); + int resize_h = int(height * ratio); + int resize_w = int(width * ratio); + + resize_op_->SetWidthAndHeight(resize_w, resize_h); + (*resize_op_)(mat); + + (*normalize_op_)(mat); + pad_op_->SetPaddingSize(0, int(max_len - resize_h), 0, + int(max_len - resize_w)); + (*pad_op_)(mat); + + (*hwc2chw_op_)(mat); + + batch_det_img_info_[batch_idx] = {int(width), int(height), float(ratio), + float(ratio), int(max_len), int(max_len)}; +} + +bool StructureV2TablePreprocessor::Run(std::vector *images, + std::vector *outputs, + size_t start_index, size_t end_index, + const std::vector &indices) { + if (images->size() == 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; + return false; + } + + std::vector mats(end_index - start_index); + for (size_t i = start_index; i < end_index; ++i) { + size_t real_index = i; + if (indices.size() != 0) { + real_index = indices[i]; + } + mats[i - start_index] = images->at(real_index); + } + return Run(&mats, outputs); +} + +bool StructureV2TablePreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + batch_det_img_info_.clear(); + batch_det_img_info_.resize(image_batch->mats->size()); + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + StructureV2TableResizeImage(mat, i); + } + + // Only have 1 output Tensor. + outputs->resize(1); + // Get the NCHW tensor + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h new file mode 100755 index 0000000000..3d86933516 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Preprocessor object for table model. + */ +class ULTRAINFER_DECL StructureV2TablePreprocessor : public ProcessorManager { +public: + StructureV2TablePreprocessor(); + using ProcessorManager::Run; + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input data list, all the elements are FDMat + * \param[in] outputs The output tensors which will be fed into runtime + * \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector *images, std::vector *outputs, + size_t start_index, size_t end_index, + const std::vector &indices); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// Get the image info of the last batch, return a list of array + /// {image width, image height, resize width, resize height} + const std::vector> *GetBatchImgInfo() { + return &batch_det_img_info_; + } + +private: + void StructureV2TableResizeImage(FDMat *mat, int batch_idx); + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + int max_len = 488; + std::vector rec_image_shape_ = {3, max_len, max_len}; + bool static_shape_infer_ = false; + std::shared_ptr resize_op_; + std::shared_ptr pad_op_; + std::shared_ptr normalize_op_; + std::shared_ptr hwc2chw_op_; + std::vector> batch_det_img_info_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc new file mode 100755 index 0000000000..14ede4120b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc @@ -0,0 +1,4374 @@ +/******************************************************************************* + * * + * Author : Angus Johnson * Version : 6.4.2 * Date : 27 February + *2017 * Website : + *http://www.angusj.com * Copyright : + *Angus Johnson 2010-2017 * + * * + * License: * Use, modification & distribution is subject to Boost Software + *License Ver 1. * http://www.boost.org/LICENSE_1_0.txt * + * * + * Attributions: * The code in this library is an extension of Bala Vatti's + *clipping algorithm: * "A generic solution to polygon clipping" * + * Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. * + * http://portal.acm.org/citation.cfm?id=129906 * + * * + * Computer graphics and geometric modeling: implementation and algorithms * By + *Max K. Agoston * + * Springer; 1 edition (January 4, 2005) * + * http://books.google.com/books?q=vatti+clipping+agoston * + * * + * See also: * "Polygon Offsetting by Computing Winding Numbers" * Paper no. + *DETC2005-85513 pp. 565-575 * ASME 2005 + *International Design Engineering Technical Conferences * and + *Computers and Information in Engineering Conference (IDETC/CIE2005) * + * September 24-28, 2005 , Long Beach, California, USA * + * http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf * + * * + *******************************************************************************/ + +/******************************************************************************* + * * + * This is a translation of the Delphi Clipper library and the naming style * + * used has retained a Delphi flavour. * + * * + *******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "clipper.h" + +namespace ClipperLib { + +static double const pi = 3.141592653589793238; +static double const two_pi = pi * 2; +static double const def_arc_tolerance = 0.25; + +enum Direction { dRightToLeft, dLeftToRight }; + +static int const Unassigned = -1; // edge not currently 'owning' a solution +static int const Skip = -2; // edge that would otherwise close a path + +#define HORIZONTAL (-1.0E+40) +#define TOLERANCE (1.0e-20) +#define NEAR_ZERO(val) (((val) > -TOLERANCE) && ((val) < TOLERANCE)) + +struct TEdge { + IntPoint Bot; + IntPoint Curr; // current (updated for every new scanbeam) + IntPoint Top; + double Dx; + PolyType PolyTyp; + EdgeSide Side; // side only refers to current side of solution poly + int WindDelta; // 1 or -1 depending on winding direction + int WindCnt; + int WindCnt2; // winding count of the opposite polytype + int OutIdx; + TEdge *Next; + TEdge *Prev; + TEdge *NextInLML; + TEdge *NextInAEL; + TEdge *PrevInAEL; + TEdge *NextInSEL; + TEdge *PrevInSEL; +}; + +struct IntersectNode { + TEdge *Edge1; + TEdge *Edge2; + IntPoint Pt; +}; + +struct LocalMinimum { + cInt Y; + TEdge *LeftBound; + TEdge *RightBound; +}; + +struct OutPt; + +// OutRec: contains a path in the clipping solution. Edges in the AEL will +// carry a pointer to an OutRec when they are part of the clipping solution. +struct OutRec { + int Idx; + bool IsHole; + bool IsOpen; + OutRec *FirstLeft; // see comments in clipper.pas + PolyNode *PolyNd; + OutPt *Pts; + OutPt *BottomPt; +}; + +struct OutPt { + int Idx; + IntPoint Pt; + OutPt *Next; + OutPt *Prev; +}; + +struct Join { + OutPt *OutPt1; + OutPt *OutPt2; + IntPoint OffPt; +}; + +struct LocMinSorter { + inline bool operator()(const LocalMinimum &locMin1, + const LocalMinimum &locMin2) { + return locMin2.Y < locMin1.Y; + } +}; + +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ + +inline cInt Round(double val) { + if ((val < 0)) + return static_cast(val - 0.5); + else + return static_cast(val + 0.5); +} +//------------------------------------------------------------------------------ + +inline cInt Abs(cInt val) { return val < 0 ? -val : val; } + +//------------------------------------------------------------------------------ +// PolyTree methods ... +//------------------------------------------------------------------------------ + +void PolyTree::Clear() { + for (PolyNodes::size_type i = 0; i < AllNodes.size(); ++i) + delete AllNodes[i]; + AllNodes.resize(0); + Childs.resize(0); +} +//------------------------------------------------------------------------------ + +PolyNode *PolyTree::GetFirst() const { + if (!Childs.empty()) + return Childs[0]; + else + return 0; +} +//------------------------------------------------------------------------------ + +int PolyTree::Total() const { + int result = (int)AllNodes.size(); + // with negative offsets, ignore the hidden outer polygon ... + if (result > 0 && Childs[0] != AllNodes[0]) + result--; + return result; +} + +//------------------------------------------------------------------------------ +// PolyNode methods ... +//------------------------------------------------------------------------------ + +PolyNode::PolyNode() : Parent(0), Index(0), m_IsOpen(false) {} +//------------------------------------------------------------------------------ + +int PolyNode::ChildCount() const { return (int)Childs.size(); } +//------------------------------------------------------------------------------ + +void PolyNode::AddChild(PolyNode &child) { + unsigned cnt = (unsigned)Childs.size(); + Childs.push_back(&child); + child.Parent = this; + child.Index = cnt; +} +//------------------------------------------------------------------------------ + +PolyNode *PolyNode::GetNext() const { + if (!Childs.empty()) + return Childs[0]; + else + return GetNextSiblingUp(); +} +//------------------------------------------------------------------------------ + +PolyNode *PolyNode::GetNextSiblingUp() const { + if (!Parent) // protects against PolyTree.GetNextSiblingUp() + return 0; + else if (Index == Parent->Childs.size() - 1) + return Parent->GetNextSiblingUp(); + else + return Parent->Childs[Index + 1]; +} +//------------------------------------------------------------------------------ + +bool PolyNode::IsHole() const { + bool result = true; + PolyNode *node = Parent; + while (node) { + result = !result; + node = node->Parent; + } + return result; +} +//------------------------------------------------------------------------------ + +bool PolyNode::IsOpen() const { return m_IsOpen; } +//------------------------------------------------------------------------------ + +#ifndef use_int32 + +//------------------------------------------------------------------------------ +// Int128 class (enables safe math on signed 64bit integers) +// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1 +// Int128 val2((long64)9223372036854775807); +// Int128 val3 = val1 * val2; +// val3.AsString => "85070591730234615847396907784232501249" (8.5e+37) +//------------------------------------------------------------------------------ + +class Int128 { +public: + ulong64 lo; + long64 hi; + + Int128(long64 _lo = 0) { + lo = (ulong64)_lo; + if (_lo < 0) + hi = -1; + else + hi = 0; + } + + Int128(const Int128 &val) : lo(val.lo), hi(val.hi) {} + + Int128(const long64 &_hi, const ulong64 &_lo) : lo(_lo), hi(_hi) {} + + Int128 &operator=(const long64 &val) { + lo = (ulong64)val; + if (val < 0) + hi = -1; + else + hi = 0; + return *this; + } + + bool operator==(const Int128 &val) const { + return (hi == val.hi && lo == val.lo); + } + + bool operator!=(const Int128 &val) const { return !(*this == val); } + + bool operator>(const Int128 &val) const { + if (hi != val.hi) + return hi > val.hi; + else + return lo > val.lo; + } + + bool operator<(const Int128 &val) const { + if (hi != val.hi) + return hi < val.hi; + else + return lo < val.lo; + } + + bool operator>=(const Int128 &val) const { return !(*this < val); } + + bool operator<=(const Int128 &val) const { return !(*this > val); } + + Int128 &operator+=(const Int128 &rhs) { + hi += rhs.hi; + lo += rhs.lo; + if (lo < rhs.lo) + hi++; + return *this; + } + + Int128 operator+(const Int128 &rhs) const { + Int128 result(*this); + result += rhs; + return result; + } + + Int128 &operator-=(const Int128 &rhs) { + *this += -rhs; + return *this; + } + + Int128 operator-(const Int128 &rhs) const { + Int128 result(*this); + result -= rhs; + return result; + } + + Int128 operator-() const // unary negation + { + if (lo == 0) + return Int128(-hi, 0); + else + return Int128(~hi, ~lo + 1); + } + + operator double() const { + const double shift64 = 18446744073709551616.0; // 2^64 + if (hi < 0) { + if (lo == 0) + return (double)hi * shift64; + else + return -(double)(~lo + ~hi * shift64); + } else + return (double)(lo + hi * shift64); + } +}; +//------------------------------------------------------------------------------ + +Int128 Int128Mul(long64 lhs, long64 rhs) { + bool negate = (lhs < 0) != (rhs < 0); + + if (lhs < 0) + lhs = -lhs; + ulong64 int1Hi = ulong64(lhs) >> 32; + ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF); + + if (rhs < 0) + rhs = -rhs; + ulong64 int2Hi = ulong64(rhs) >> 32; + ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF); + + // nb: see comments in clipper.pas + ulong64 a = int1Hi * int2Hi; + ulong64 b = int1Lo * int2Lo; + ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi; + + Int128 tmp; + tmp.hi = long64(a + (c >> 32)); + tmp.lo = long64(c << 32); + tmp.lo += long64(b); + if (tmp.lo < b) + tmp.hi++; + if (negate) + tmp = -tmp; + return tmp; +}; +#endif + +//------------------------------------------------------------------------------ +// Miscellaneous global functions +//------------------------------------------------------------------------------ + +bool Orientation(const Path &poly) { return Area(poly) >= 0; } +//------------------------------------------------------------------------------ + +double Area(const Path &poly) { + int size = (int)poly.size(); + if (size < 3) + return 0; + + double a = 0; + for (int i = 0, j = size - 1; i < size; ++i) { + a += ((double)poly[j].X + poly[i].X) * ((double)poly[j].Y - poly[i].Y); + j = i; + } + return -a * 0.5; +} +//------------------------------------------------------------------------------ + +double Area(const OutPt *op) { + const OutPt *startOp = op; + if (!op) + return 0; + double a = 0; + do { + a += (double)(op->Prev->Pt.X + op->Pt.X) * + (double)(op->Prev->Pt.Y - op->Pt.Y); + op = op->Next; + } while (op != startOp); + return a * 0.5; +} +//------------------------------------------------------------------------------ + +double Area(const OutRec &outRec) { return Area(outRec.Pts); } +//------------------------------------------------------------------------------ + +bool PointIsVertex(const IntPoint &Pt, OutPt *pp) { + OutPt *pp2 = pp; + do { + if (pp2->Pt == Pt) + return true; + pp2 = pp2->Next; + } while (pp2 != pp); + return false; +} +//------------------------------------------------------------------------------ + +// See "The Point in Polygon Problem for Arbitrary Polygons" by Hormann & +// Agathos +// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.88.5498&rep=rep1&type=pdf +int PointInPolygon(const IntPoint &pt, const Path &path) { + // returns 0 if false, +1 if true, -1 if pt ON polygon boundary + int result = 0; + size_t cnt = path.size(); + if (cnt < 3) + return 0; + IntPoint ip = path[0]; + for (size_t i = 1; i <= cnt; ++i) { + IntPoint ipNext = (i == cnt ? path[0] : path[i]); + if (ipNext.Y == pt.Y) { + if ((ipNext.X == pt.X) || + (ip.Y == pt.Y && ((ipNext.X > pt.X) == (ip.X < pt.X)))) + return -1; + } + if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y)) { + if (ip.X >= pt.X) { + if (ipNext.X > pt.X) + result = 1 - result; + else { + double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - + (double)(ipNext.X - pt.X) * (ip.Y - pt.Y); + if (!d) + return -1; + if ((d > 0) == (ipNext.Y > ip.Y)) + result = 1 - result; + } + } else { + if (ipNext.X > pt.X) { + double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - + (double)(ipNext.X - pt.X) * (ip.Y - pt.Y); + if (!d) + return -1; + if ((d > 0) == (ipNext.Y > ip.Y)) + result = 1 - result; + } + } + } + ip = ipNext; + } + return result; +} +//------------------------------------------------------------------------------ + +int PointInPolygon(const IntPoint &pt, OutPt *op) { + // returns 0 if false, +1 if true, -1 if pt ON polygon boundary + int result = 0; + OutPt *startOp = op; + for (;;) { + if (op->Next->Pt.Y == pt.Y) { + if ((op->Next->Pt.X == pt.X) || + (op->Pt.Y == pt.Y && ((op->Next->Pt.X > pt.X) == (op->Pt.X < pt.X)))) + return -1; + } + if ((op->Pt.Y < pt.Y) != (op->Next->Pt.Y < pt.Y)) { + if (op->Pt.X >= pt.X) { + if (op->Next->Pt.X > pt.X) + result = 1 - result; + else { + double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - + (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y); + if (!d) + return -1; + if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) + result = 1 - result; + } + } else { + if (op->Next->Pt.X > pt.X) { + double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - + (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y); + if (!d) + return -1; + if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) + result = 1 - result; + } + } + } + op = op->Next; + if (startOp == op) + break; + } + return result; +} +//------------------------------------------------------------------------------ + +bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2) { + OutPt *op = OutPt1; + do { + // nb: PointInPolygon returns 0 if false, +1 if true, -1 if pt on polygon + int res = PointInPolygon(op->Pt, OutPt2); + if (res >= 0) + return res > 0; + op = op->Next; + } while (op != OutPt1); + return true; +} +//---------------------------------------------------------------------- + +bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range) { +#ifndef use_int32 + if (UseFullInt64Range) + return Int128Mul(e1.Top.Y - e1.Bot.Y, e2.Top.X - e2.Bot.X) == + Int128Mul(e1.Top.X - e1.Bot.X, e2.Top.Y - e2.Bot.Y); + else +#endif + return (e1.Top.Y - e1.Bot.Y) * (e2.Top.X - e2.Bot.X) == + (e1.Top.X - e1.Bot.X) * (e2.Top.Y - e2.Bot.Y); +} +//------------------------------------------------------------------------------ + +bool SlopesEqual(const IntPoint pt1, const IntPoint pt2, const IntPoint pt3, + bool UseFullInt64Range) { +#ifndef use_int32 + if (UseFullInt64Range) + return Int128Mul(pt1.Y - pt2.Y, pt2.X - pt3.X) == + Int128Mul(pt1.X - pt2.X, pt2.Y - pt3.Y); + else +#endif + return (pt1.Y - pt2.Y) * (pt2.X - pt3.X) == + (pt1.X - pt2.X) * (pt2.Y - pt3.Y); +} +//------------------------------------------------------------------------------ + +bool SlopesEqual(const IntPoint pt1, const IntPoint pt2, const IntPoint pt3, + const IntPoint pt4, bool UseFullInt64Range) { +#ifndef use_int32 + if (UseFullInt64Range) + return Int128Mul(pt1.Y - pt2.Y, pt3.X - pt4.X) == + Int128Mul(pt1.X - pt2.X, pt3.Y - pt4.Y); + else +#endif + return (pt1.Y - pt2.Y) * (pt3.X - pt4.X) == + (pt1.X - pt2.X) * (pt3.Y - pt4.Y); +} +//------------------------------------------------------------------------------ + +inline bool IsHorizontal(TEdge &e) { return e.Dx == HORIZONTAL; } +//------------------------------------------------------------------------------ + +inline double GetDx(const IntPoint pt1, const IntPoint pt2) { + return (pt1.Y == pt2.Y) ? HORIZONTAL + : (double)(pt2.X - pt1.X) / (pt2.Y - pt1.Y); +} +//--------------------------------------------------------------------------- + +inline void SetDx(TEdge &e) { + cInt dy = (e.Top.Y - e.Bot.Y); + if (dy == 0) + e.Dx = HORIZONTAL; + else + e.Dx = (double)(e.Top.X - e.Bot.X) / dy; +} +//--------------------------------------------------------------------------- + +inline void SwapSides(TEdge &Edge1, TEdge &Edge2) { + EdgeSide Side = Edge1.Side; + Edge1.Side = Edge2.Side; + Edge2.Side = Side; +} +//------------------------------------------------------------------------------ + +inline void SwapPolyIndexes(TEdge &Edge1, TEdge &Edge2) { + int OutIdx = Edge1.OutIdx; + Edge1.OutIdx = Edge2.OutIdx; + Edge2.OutIdx = OutIdx; +} +//------------------------------------------------------------------------------ + +inline cInt TopX(TEdge &edge, const cInt currentY) { + return (currentY == edge.Top.Y) + ? edge.Top.X + : edge.Bot.X + Round(edge.Dx * (currentY - edge.Bot.Y)); +} +//------------------------------------------------------------------------------ + +void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip) { +#ifdef use_xyz + ip.Z = 0; +#endif + + double b1, b2; + if (Edge1.Dx == Edge2.Dx) { + ip.Y = Edge1.Curr.Y; + ip.X = TopX(Edge1, ip.Y); + return; + } else if (Edge1.Dx == 0) { + ip.X = Edge1.Bot.X; + if (IsHorizontal(Edge2)) + ip.Y = Edge2.Bot.Y; + else { + b2 = Edge2.Bot.Y - (Edge2.Bot.X / Edge2.Dx); + ip.Y = Round(ip.X / Edge2.Dx + b2); + } + } else if (Edge2.Dx == 0) { + ip.X = Edge2.Bot.X; + if (IsHorizontal(Edge1)) + ip.Y = Edge1.Bot.Y; + else { + b1 = Edge1.Bot.Y - (Edge1.Bot.X / Edge1.Dx); + ip.Y = Round(ip.X / Edge1.Dx + b1); + } + } else { + b1 = Edge1.Bot.X - Edge1.Bot.Y * Edge1.Dx; + b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx; + double q = (b2 - b1) / (Edge1.Dx - Edge2.Dx); + ip.Y = Round(q); + if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx)) + ip.X = Round(Edge1.Dx * q + b1); + else + ip.X = Round(Edge2.Dx * q + b2); + } + + if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y) { + if (Edge1.Top.Y > Edge2.Top.Y) + ip.Y = Edge1.Top.Y; + else + ip.Y = Edge2.Top.Y; + if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx)) + ip.X = TopX(Edge1, ip.Y); + else + ip.X = TopX(Edge2, ip.Y); + } + // finally, don't allow 'ip' to be BELOW curr.Y (ie bottom of scanbeam) ... + if (ip.Y > Edge1.Curr.Y) { + ip.Y = Edge1.Curr.Y; + // use the more vertical edge to derive X ... + if (std::fabs(Edge1.Dx) > std::fabs(Edge2.Dx)) + ip.X = TopX(Edge2, ip.Y); + else + ip.X = TopX(Edge1, ip.Y); + } +} +//------------------------------------------------------------------------------ + +void ReversePolyPtLinks(OutPt *pp) { + if (!pp) + return; + OutPt *pp1, *pp2; + pp1 = pp; + do { + pp2 = pp1->Next; + pp1->Next = pp1->Prev; + pp1->Prev = pp2; + pp1 = pp2; + } while (pp1 != pp); +} +//------------------------------------------------------------------------------ + +void DisposeOutPts(OutPt *&pp) { + if (pp == 0) + return; + pp->Prev->Next = 0; + while (pp) { + OutPt *tmpPp = pp; + pp = pp->Next; + delete tmpPp; + } +} +//------------------------------------------------------------------------------ + +inline void InitEdge(TEdge *e, TEdge *eNext, TEdge *ePrev, const IntPoint &Pt) { + std::memset(e, int(0), sizeof(TEdge)); + e->Next = eNext; + e->Prev = ePrev; + e->Curr = Pt; + e->OutIdx = Unassigned; +} +//------------------------------------------------------------------------------ + +void InitEdge2(TEdge &e, PolyType Pt) { + if (e.Curr.Y >= e.Next->Curr.Y) { + e.Bot = e.Curr; + e.Top = e.Next->Curr; + } else { + e.Top = e.Curr; + e.Bot = e.Next->Curr; + } + SetDx(e); + e.PolyTyp = Pt; +} +//------------------------------------------------------------------------------ + +TEdge *RemoveEdge(TEdge *e) { + // removes e from double_linked_list (but without removing from memory) + e->Prev->Next = e->Next; + e->Next->Prev = e->Prev; + TEdge *result = e->Next; + e->Prev = 0; // flag as removed (see ClipperBase.Clear) + return result; +} +//------------------------------------------------------------------------------ + +inline void ReverseHorizontal(TEdge &e) { + // swap horizontal edges' Top and Bottom x's so they follow the natural + // progression of the bounds - ie so their xbots will align with the + // adjoining lower edge. [Helpful in the ProcessHorizontal() method.] + std::swap(e.Top.X, e.Bot.X); +#ifdef use_xyz + std::swap(e.Top.Z, e.Bot.Z); +#endif +} +//------------------------------------------------------------------------------ + +void SwapPoints(IntPoint &pt1, IntPoint &pt2) { + IntPoint tmp = pt1; + pt1 = pt2; + pt2 = tmp; +} +//------------------------------------------------------------------------------ + +bool GetOverlapSegment(IntPoint pt1a, IntPoint pt1b, IntPoint pt2a, + IntPoint pt2b, IntPoint &pt1, IntPoint &pt2) { + // precondition: segments are Collinear. + if (Abs(pt1a.X - pt1b.X) > Abs(pt1a.Y - pt1b.Y)) { + if (pt1a.X > pt1b.X) + SwapPoints(pt1a, pt1b); + if (pt2a.X > pt2b.X) + SwapPoints(pt2a, pt2b); + if (pt1a.X > pt2a.X) + pt1 = pt1a; + else + pt1 = pt2a; + if (pt1b.X < pt2b.X) + pt2 = pt1b; + else + pt2 = pt2b; + return pt1.X < pt2.X; + } else { + if (pt1a.Y < pt1b.Y) + SwapPoints(pt1a, pt1b); + if (pt2a.Y < pt2b.Y) + SwapPoints(pt2a, pt2b); + if (pt1a.Y < pt2a.Y) + pt1 = pt1a; + else + pt1 = pt2a; + if (pt1b.Y > pt2b.Y) + pt2 = pt1b; + else + pt2 = pt2b; + return pt1.Y > pt2.Y; + } +} +//------------------------------------------------------------------------------ + +bool FirstIsBottomPt(const OutPt *btmPt1, const OutPt *btmPt2) { + OutPt *p = btmPt1->Prev; + while ((p->Pt == btmPt1->Pt) && (p != btmPt1)) + p = p->Prev; + double dx1p = std::fabs(GetDx(btmPt1->Pt, p->Pt)); + p = btmPt1->Next; + while ((p->Pt == btmPt1->Pt) && (p != btmPt1)) + p = p->Next; + double dx1n = std::fabs(GetDx(btmPt1->Pt, p->Pt)); + + p = btmPt2->Prev; + while ((p->Pt == btmPt2->Pt) && (p != btmPt2)) + p = p->Prev; + double dx2p = std::fabs(GetDx(btmPt2->Pt, p->Pt)); + p = btmPt2->Next; + while ((p->Pt == btmPt2->Pt) && (p != btmPt2)) + p = p->Next; + double dx2n = std::fabs(GetDx(btmPt2->Pt, p->Pt)); + + if (std::max(dx1p, dx1n) == std::max(dx2p, dx2n) && + std::min(dx1p, dx1n) == std::min(dx2p, dx2n)) + return Area(btmPt1) > 0; // if otherwise identical use orientation + else + return (dx1p >= dx2p && dx1p >= dx2n) || (dx1n >= dx2p && dx1n >= dx2n); +} +//------------------------------------------------------------------------------ + +OutPt *GetBottomPt(OutPt *pp) { + OutPt *dups = 0; + OutPt *p = pp->Next; + while (p != pp) { + if (p->Pt.Y > pp->Pt.Y) { + pp = p; + dups = 0; + } else if (p->Pt.Y == pp->Pt.Y && p->Pt.X <= pp->Pt.X) { + if (p->Pt.X < pp->Pt.X) { + dups = 0; + pp = p; + } else { + if (p->Next != pp && p->Prev != pp) + dups = p; + } + } + p = p->Next; + } + if (dups) { + // there appears to be at least 2 vertices at BottomPt so ... + while (dups != p) { + if (!FirstIsBottomPt(p, dups)) + pp = dups; + dups = dups->Next; + while (dups->Pt != pp->Pt) + dups = dups->Next; + } + } + return pp; +} +//------------------------------------------------------------------------------ + +bool Pt2IsBetweenPt1AndPt3(const IntPoint pt1, const IntPoint pt2, + const IntPoint pt3) { + if ((pt1 == pt3) || (pt1 == pt2) || (pt3 == pt2)) + return false; + else if (pt1.X != pt3.X) + return (pt2.X > pt1.X) == (pt2.X < pt3.X); + else + return (pt2.Y > pt1.Y) == (pt2.Y < pt3.Y); +} +//------------------------------------------------------------------------------ + +bool HorzSegmentsOverlap(cInt seg1a, cInt seg1b, cInt seg2a, cInt seg2b) { + if (seg1a > seg1b) + std::swap(seg1a, seg1b); + if (seg2a > seg2b) + std::swap(seg2a, seg2b); + return (seg1a < seg2b) && (seg2a < seg1b); +} + +//------------------------------------------------------------------------------ +// ClipperBase class methods ... +//------------------------------------------------------------------------------ + +ClipperBase::ClipperBase() // constructor +{ + m_CurrentLM = m_MinimaList.begin(); // begin() == end() here + m_UseFullRange = false; +} +//------------------------------------------------------------------------------ + +ClipperBase::~ClipperBase() // destructor +{ + Clear(); +} +//------------------------------------------------------------------------------ + +void RangeTest(const IntPoint &Pt, bool &useFullRange) { + if (useFullRange) { + if (Pt.X > hiRange || Pt.Y > hiRange || -Pt.X > hiRange || -Pt.Y > hiRange) + throw clipperException("Coordinate outside allowed range"); + } else if (Pt.X > loRange || Pt.Y > loRange || -Pt.X > loRange || + -Pt.Y > loRange) { + useFullRange = true; + RangeTest(Pt, useFullRange); + } +} +//------------------------------------------------------------------------------ + +TEdge *FindNextLocMin(TEdge *E) { + for (;;) { + while (E->Bot != E->Prev->Bot || E->Curr == E->Top) + E = E->Next; + if (!IsHorizontal(*E) && !IsHorizontal(*E->Prev)) + break; + while (IsHorizontal(*E->Prev)) + E = E->Prev; + TEdge *E2 = E; + while (IsHorizontal(*E)) + E = E->Next; + if (E->Top.Y == E->Prev->Bot.Y) + continue; // ie just an intermediate horz. + if (E2->Prev->Bot.X < E->Bot.X) + E = E2; + break; + } + return E; +} +//------------------------------------------------------------------------------ + +TEdge *ClipperBase::ProcessBound(TEdge *E, bool NextIsForward) { + TEdge *Result = E; + TEdge *Horz = 0; + + if (E->OutIdx == Skip) { + // if edges still remain in the current bound beyond the skip edge then + // create another LocMin and call ProcessBound once more + if (NextIsForward) { + while (E->Top.Y == E->Next->Bot.Y) + E = E->Next; + // don't include top horizontals when parsing a bound a second time, + // they will be contained in the opposite bound ... + while (E != Result && IsHorizontal(*E)) + E = E->Prev; + } else { + while (E->Top.Y == E->Prev->Bot.Y) + E = E->Prev; + while (E != Result && IsHorizontal(*E)) + E = E->Next; + } + + if (E == Result) { + if (NextIsForward) + Result = E->Next; + else + Result = E->Prev; + } else { + // there are more edges in the bound beyond result starting with E + if (NextIsForward) + E = Result->Next; + else + E = Result->Prev; + MinimaList::value_type locMin; + locMin.Y = E->Bot.Y; + locMin.LeftBound = 0; + locMin.RightBound = E; + E->WindDelta = 0; + Result = ProcessBound(E, NextIsForward); + m_MinimaList.push_back(locMin); + } + return Result; + } + + TEdge *EStart; + + if (IsHorizontal(*E)) { + // We need to be careful with open paths because this may not be a + // true local minima (ie E may be following a skip edge). + // Also, consecutive horz. edges may start heading left before going right. + if (NextIsForward) + EStart = E->Prev; + else + EStart = E->Next; + if (IsHorizontal(*EStart)) // ie an adjoining horizontal skip edge + { + if (EStart->Bot.X != E->Bot.X && EStart->Top.X != E->Bot.X) + ReverseHorizontal(*E); + } else if (EStart->Bot.X != E->Bot.X) + ReverseHorizontal(*E); + } + + EStart = E; + if (NextIsForward) { + while (Result->Top.Y == Result->Next->Bot.Y && Result->Next->OutIdx != Skip) + Result = Result->Next; + if (IsHorizontal(*Result) && Result->Next->OutIdx != Skip) { + // nb: at the top of a bound, horizontals are added to the bound + // only when the preceding edge attaches to the horizontal's left vertex + // unless a Skip edge is encountered when that becomes the top divide + Horz = Result; + while (IsHorizontal(*Horz->Prev)) + Horz = Horz->Prev; + if (Horz->Prev->Top.X > Result->Next->Top.X) + Result = Horz->Prev; + } + while (E != Result) { + E->NextInLML = E->Next; + if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X) + ReverseHorizontal(*E); + E = E->Next; + } + if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X) + ReverseHorizontal(*E); + Result = Result->Next; // move to the edge just beyond current bound + } else { + while (Result->Top.Y == Result->Prev->Bot.Y && Result->Prev->OutIdx != Skip) + Result = Result->Prev; + if (IsHorizontal(*Result) && Result->Prev->OutIdx != Skip) { + Horz = Result; + while (IsHorizontal(*Horz->Next)) + Horz = Horz->Next; + if (Horz->Next->Top.X == Result->Prev->Top.X || + Horz->Next->Top.X > Result->Prev->Top.X) + Result = Horz->Next; + } + + while (E != Result) { + E->NextInLML = E->Prev; + if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X) + ReverseHorizontal(*E); + E = E->Prev; + } + if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X) + ReverseHorizontal(*E); + Result = Result->Prev; // move to the edge just beyond current bound + } + + return Result; +} +//------------------------------------------------------------------------------ + +bool ClipperBase::AddPath(const Path &pg, PolyType PolyTyp, bool Closed) { +#ifdef use_lines + if (!Closed && PolyTyp == ptClip) + throw clipperException("AddPath: Open paths must be subject."); +#else + if (!Closed) + throw clipperException("AddPath: Open paths have been disabled."); +#endif + + int highI = (int)pg.size() - 1; + if (Closed) + while (highI > 0 && (pg[highI] == pg[0])) + --highI; + while (highI > 0 && (pg[highI] == pg[highI - 1])) + --highI; + if ((Closed && highI < 2) || (!Closed && highI < 1)) + return false; + + // create a new edge array ... + TEdge *edges = new TEdge[highI + 1]; + + bool IsFlat = true; + // 1. Basic (first) edge initialization ... + try { + edges[1].Curr = pg[1]; + RangeTest(pg[0], m_UseFullRange); + RangeTest(pg[highI], m_UseFullRange); + InitEdge(&edges[0], &edges[1], &edges[highI], pg[0]); + InitEdge(&edges[highI], &edges[0], &edges[highI - 1], pg[highI]); + for (int i = highI - 1; i >= 1; --i) { + RangeTest(pg[i], m_UseFullRange); + InitEdge(&edges[i], &edges[i + 1], &edges[i - 1], pg[i]); + } + } catch (...) { + delete[] edges; + throw; // range test fails + } + TEdge *eStart = &edges[0]; + + // 2. Remove duplicate vertices, and (when closed) collinear edges ... + TEdge *E = eStart, *eLoopStop = eStart; + for (;;) { + // nb: allows matching start and end points when not Closed ... + if (E->Curr == E->Next->Curr && (Closed || E->Next != eStart)) { + if (E == E->Next) + break; + if (E == eStart) + eStart = E->Next; + E = RemoveEdge(E); + eLoopStop = E; + continue; + } + if (E->Prev == E->Next) + break; // only two vertices + else if (Closed && + SlopesEqual(E->Prev->Curr, E->Curr, E->Next->Curr, + m_UseFullRange) && + (!m_PreserveCollinear || + !Pt2IsBetweenPt1AndPt3(E->Prev->Curr, E->Curr, E->Next->Curr))) { + // Collinear edges are allowed for open paths but in closed paths + // the default is to merge adjacent collinear edges into a single edge. + // However, if the PreserveCollinear property is enabled, only overlapping + // collinear edges (ie spikes) will be removed from closed paths. + if (E == eStart) + eStart = E->Next; + E = RemoveEdge(E); + E = E->Prev; + eLoopStop = E; + continue; + } + E = E->Next; + if ((E == eLoopStop) || (!Closed && E->Next == eStart)) + break; + } + + if ((!Closed && (E == E->Next)) || (Closed && (E->Prev == E->Next))) { + delete[] edges; + return false; + } + + if (!Closed) { + m_HasOpenPaths = true; + eStart->Prev->OutIdx = Skip; + } + + // 3. Do second stage of edge initialization ... + E = eStart; + do { + InitEdge2(*E, PolyTyp); + E = E->Next; + if (IsFlat && E->Curr.Y != eStart->Curr.Y) + IsFlat = false; + } while (E != eStart); + + // 4. Finally, add edge bounds to LocalMinima list ... + + // Totally flat paths must be handled differently when adding them + // to LocalMinima list to avoid endless loops etc ... + if (IsFlat) { + if (Closed) { + delete[] edges; + return false; + } + E->Prev->OutIdx = Skip; + MinimaList::value_type locMin; + locMin.Y = E->Bot.Y; + locMin.LeftBound = 0; + locMin.RightBound = E; + locMin.RightBound->Side = esRight; + locMin.RightBound->WindDelta = 0; + for (;;) { + if (E->Bot.X != E->Prev->Top.X) + ReverseHorizontal(*E); + if (E->Next->OutIdx == Skip) + break; + E->NextInLML = E->Next; + E = E->Next; + } + m_MinimaList.push_back(locMin); + m_edges.push_back(edges); + return true; + } + + m_edges.push_back(edges); + bool leftBoundIsForward; + TEdge *EMin = 0; + + // workaround to avoid an endless loop in the while loop below when + // open paths have matching start and end points ... + if (E->Prev->Bot == E->Prev->Top) + E = E->Next; + + for (;;) { + E = FindNextLocMin(E); + if (E == EMin) + break; + else if (!EMin) + EMin = E; + + // E and E.Prev now share a local minima (left aligned if horizontal). + // Compare their slopes to find which starts which bound ... + MinimaList::value_type locMin; + locMin.Y = E->Bot.Y; + if (E->Dx < E->Prev->Dx) { + locMin.LeftBound = E->Prev; + locMin.RightBound = E; + leftBoundIsForward = false; // Q.nextInLML = Q.prev + } else { + locMin.LeftBound = E; + locMin.RightBound = E->Prev; + leftBoundIsForward = true; // Q.nextInLML = Q.next + } + + if (!Closed) + locMin.LeftBound->WindDelta = 0; + else if (locMin.LeftBound->Next == locMin.RightBound) + locMin.LeftBound->WindDelta = -1; + else + locMin.LeftBound->WindDelta = 1; + locMin.RightBound->WindDelta = -locMin.LeftBound->WindDelta; + + E = ProcessBound(locMin.LeftBound, leftBoundIsForward); + if (E->OutIdx == Skip) + E = ProcessBound(E, leftBoundIsForward); + + TEdge *E2 = ProcessBound(locMin.RightBound, !leftBoundIsForward); + if (E2->OutIdx == Skip) + E2 = ProcessBound(E2, !leftBoundIsForward); + + if (locMin.LeftBound->OutIdx == Skip) + locMin.LeftBound = 0; + else if (locMin.RightBound->OutIdx == Skip) + locMin.RightBound = 0; + m_MinimaList.push_back(locMin); + if (!leftBoundIsForward) + E = E2; + } + return true; +} +//------------------------------------------------------------------------------ + +bool ClipperBase::AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed) { + bool result = false; + for (Paths::size_type i = 0; i < ppg.size(); ++i) + if (AddPath(ppg[i], PolyTyp, Closed)) + result = true; + return result; +} +//------------------------------------------------------------------------------ + +void ClipperBase::Clear() { + DisposeLocalMinimaList(); + for (EdgeList::size_type i = 0; i < m_edges.size(); ++i) { + TEdge *edges = m_edges[i]; + delete[] edges; + } + m_edges.clear(); + m_UseFullRange = false; + m_HasOpenPaths = false; +} +//------------------------------------------------------------------------------ + +void ClipperBase::Reset() { + m_CurrentLM = m_MinimaList.begin(); + if (m_CurrentLM == m_MinimaList.end()) + return; // ie nothing to process + std::sort(m_MinimaList.begin(), m_MinimaList.end(), LocMinSorter()); + + m_Scanbeam = ScanbeamList(); // clears/resets priority_queue + // reset all edges ... + for (MinimaList::iterator lm = m_MinimaList.begin(); lm != m_MinimaList.end(); + ++lm) { + InsertScanbeam(lm->Y); + TEdge *e = lm->LeftBound; + if (e) { + e->Curr = e->Bot; + e->Side = esLeft; + e->OutIdx = Unassigned; + } + + e = lm->RightBound; + if (e) { + e->Curr = e->Bot; + e->Side = esRight; + e->OutIdx = Unassigned; + } + } + m_ActiveEdges = 0; + m_CurrentLM = m_MinimaList.begin(); +} +//------------------------------------------------------------------------------ + +void ClipperBase::DisposeLocalMinimaList() { + m_MinimaList.clear(); + m_CurrentLM = m_MinimaList.begin(); +} +//------------------------------------------------------------------------------ + +bool ClipperBase::PopLocalMinima(cInt Y, const LocalMinimum *&locMin) { + if (m_CurrentLM == m_MinimaList.end() || (*m_CurrentLM).Y != Y) + return false; + locMin = &(*m_CurrentLM); + ++m_CurrentLM; + return true; +} +//------------------------------------------------------------------------------ + +IntRect ClipperBase::GetBounds() { + IntRect result; + MinimaList::iterator lm = m_MinimaList.begin(); + if (lm == m_MinimaList.end()) { + result.left = result.top = result.right = result.bottom = 0; + return result; + } + result.left = lm->LeftBound->Bot.X; + result.top = lm->LeftBound->Bot.Y; + result.right = lm->LeftBound->Bot.X; + result.bottom = lm->LeftBound->Bot.Y; + while (lm != m_MinimaList.end()) { + // todo - needs fixing for open paths + result.bottom = std::max(result.bottom, lm->LeftBound->Bot.Y); + TEdge *e = lm->LeftBound; + for (;;) { + TEdge *bottomE = e; + while (e->NextInLML) { + if (e->Bot.X < result.left) + result.left = e->Bot.X; + if (e->Bot.X > result.right) + result.right = e->Bot.X; + e = e->NextInLML; + } + result.left = std::min(result.left, e->Bot.X); + result.right = std::max(result.right, e->Bot.X); + result.left = std::min(result.left, e->Top.X); + result.right = std::max(result.right, e->Top.X); + result.top = std::min(result.top, e->Top.Y); + if (bottomE == lm->LeftBound) + e = lm->RightBound; + else + break; + } + ++lm; + } + return result; +} +//------------------------------------------------------------------------------ + +void ClipperBase::InsertScanbeam(const cInt Y) { m_Scanbeam.push(Y); } +//------------------------------------------------------------------------------ + +bool ClipperBase::PopScanbeam(cInt &Y) { + if (m_Scanbeam.empty()) + return false; + Y = m_Scanbeam.top(); + m_Scanbeam.pop(); + while (!m_Scanbeam.empty() && Y == m_Scanbeam.top()) { + m_Scanbeam.pop(); + } // Pop duplicates. + return true; +} +//------------------------------------------------------------------------------ + +void ClipperBase::DisposeAllOutRecs() { + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) + DisposeOutRec(i); + m_PolyOuts.clear(); +} +//------------------------------------------------------------------------------ + +void ClipperBase::DisposeOutRec(PolyOutList::size_type index) { + OutRec *outRec = m_PolyOuts[index]; + if (outRec->Pts) + DisposeOutPts(outRec->Pts); + delete outRec; + m_PolyOuts[index] = 0; +} +//------------------------------------------------------------------------------ + +void ClipperBase::DeleteFromAEL(TEdge *e) { + TEdge *AelPrev = e->PrevInAEL; + TEdge *AelNext = e->NextInAEL; + if (!AelPrev && !AelNext && (e != m_ActiveEdges)) + return; // already deleted + if (AelPrev) + AelPrev->NextInAEL = AelNext; + else + m_ActiveEdges = AelNext; + if (AelNext) + AelNext->PrevInAEL = AelPrev; + e->NextInAEL = 0; + e->PrevInAEL = 0; +} +//------------------------------------------------------------------------------ + +OutRec *ClipperBase::CreateOutRec() { + OutRec *result = new OutRec; + result->IsHole = false; + result->IsOpen = false; + result->FirstLeft = 0; + result->Pts = 0; + result->BottomPt = 0; + result->PolyNd = 0; + m_PolyOuts.push_back(result); + result->Idx = (int)m_PolyOuts.size() - 1; + return result; +} +//------------------------------------------------------------------------------ + +void ClipperBase::SwapPositionsInAEL(TEdge *Edge1, TEdge *Edge2) { + // check that one or other edge hasn't already been removed from AEL ... + if (Edge1->NextInAEL == Edge1->PrevInAEL || + Edge2->NextInAEL == Edge2->PrevInAEL) + return; + + if (Edge1->NextInAEL == Edge2) { + TEdge *Next = Edge2->NextInAEL; + if (Next) + Next->PrevInAEL = Edge1; + TEdge *Prev = Edge1->PrevInAEL; + if (Prev) + Prev->NextInAEL = Edge2; + Edge2->PrevInAEL = Prev; + Edge2->NextInAEL = Edge1; + Edge1->PrevInAEL = Edge2; + Edge1->NextInAEL = Next; + } else if (Edge2->NextInAEL == Edge1) { + TEdge *Next = Edge1->NextInAEL; + if (Next) + Next->PrevInAEL = Edge2; + TEdge *Prev = Edge2->PrevInAEL; + if (Prev) + Prev->NextInAEL = Edge1; + Edge1->PrevInAEL = Prev; + Edge1->NextInAEL = Edge2; + Edge2->PrevInAEL = Edge1; + Edge2->NextInAEL = Next; + } else { + TEdge *Next = Edge1->NextInAEL; + TEdge *Prev = Edge1->PrevInAEL; + Edge1->NextInAEL = Edge2->NextInAEL; + if (Edge1->NextInAEL) + Edge1->NextInAEL->PrevInAEL = Edge1; + Edge1->PrevInAEL = Edge2->PrevInAEL; + if (Edge1->PrevInAEL) + Edge1->PrevInAEL->NextInAEL = Edge1; + Edge2->NextInAEL = Next; + if (Edge2->NextInAEL) + Edge2->NextInAEL->PrevInAEL = Edge2; + Edge2->PrevInAEL = Prev; + if (Edge2->PrevInAEL) + Edge2->PrevInAEL->NextInAEL = Edge2; + } + + if (!Edge1->PrevInAEL) + m_ActiveEdges = Edge1; + else if (!Edge2->PrevInAEL) + m_ActiveEdges = Edge2; +} +//------------------------------------------------------------------------------ + +void ClipperBase::UpdateEdgeIntoAEL(TEdge *&e) { + if (!e->NextInLML) + throw clipperException("UpdateEdgeIntoAEL: invalid call"); + + e->NextInLML->OutIdx = e->OutIdx; + TEdge *AelPrev = e->PrevInAEL; + TEdge *AelNext = e->NextInAEL; + if (AelPrev) + AelPrev->NextInAEL = e->NextInLML; + else + m_ActiveEdges = e->NextInLML; + if (AelNext) + AelNext->PrevInAEL = e->NextInLML; + e->NextInLML->Side = e->Side; + e->NextInLML->WindDelta = e->WindDelta; + e->NextInLML->WindCnt = e->WindCnt; + e->NextInLML->WindCnt2 = e->WindCnt2; + e = e->NextInLML; + e->Curr = e->Bot; + e->PrevInAEL = AelPrev; + e->NextInAEL = AelNext; + if (!IsHorizontal(*e)) + InsertScanbeam(e->Top.Y); +} +//------------------------------------------------------------------------------ + +bool ClipperBase::LocalMinimaPending() { + return (m_CurrentLM != m_MinimaList.end()); +} + +//------------------------------------------------------------------------------ +// TClipper methods ... +//------------------------------------------------------------------------------ + +Clipper::Clipper(int initOptions) + : ClipperBase() // constructor +{ + m_ExecuteLocked = false; + m_UseFullRange = false; + m_ReverseOutput = ((initOptions & ioReverseSolution) != 0); + m_StrictSimple = ((initOptions & ioStrictlySimple) != 0); + m_PreserveCollinear = ((initOptions & ioPreserveCollinear) != 0); + m_HasOpenPaths = false; +#ifdef use_xyz + m_ZFill = 0; +#endif +} +//------------------------------------------------------------------------------ + +#ifdef use_xyz +void Clipper::ZFillFunction(ZFillCallback zFillFunc) { m_ZFill = zFillFunc; } +//------------------------------------------------------------------------------ +#endif + +bool Clipper::Execute(ClipType clipType, Paths &solution, + PolyFillType fillType) { + return Execute(clipType, solution, fillType, fillType); +} +//------------------------------------------------------------------------------ + +bool Clipper::Execute(ClipType clipType, PolyTree &polytree, + PolyFillType fillType) { + return Execute(clipType, polytree, fillType, fillType); +} +//------------------------------------------------------------------------------ + +bool Clipper::Execute(ClipType clipType, Paths &solution, + PolyFillType subjFillType, PolyFillType clipFillType) { + if (m_ExecuteLocked) + return false; + if (m_HasOpenPaths) + throw clipperException( + "Error: PolyTree struct is needed for open path clipping."); + m_ExecuteLocked = true; + solution.resize(0); + m_SubjFillType = subjFillType; + m_ClipFillType = clipFillType; + m_ClipType = clipType; + m_UsingPolyTree = false; + bool succeeded = ExecuteInternal(); + if (succeeded) + BuildResult(solution); + DisposeAllOutRecs(); + m_ExecuteLocked = false; + return succeeded; +} +//------------------------------------------------------------------------------ + +bool Clipper::Execute(ClipType clipType, PolyTree &polytree, + PolyFillType subjFillType, PolyFillType clipFillType) { + if (m_ExecuteLocked) + return false; + m_ExecuteLocked = true; + m_SubjFillType = subjFillType; + m_ClipFillType = clipFillType; + m_ClipType = clipType; + m_UsingPolyTree = true; + bool succeeded = ExecuteInternal(); + if (succeeded) + BuildResult2(polytree); + DisposeAllOutRecs(); + m_ExecuteLocked = false; + return succeeded; +} +//------------------------------------------------------------------------------ + +void Clipper::FixHoleLinkage(OutRec &outrec) { + // skip OutRecs that (a) contain outermost polygons or + //(b) already have the correct owner/child linkage ... + if (!outrec.FirstLeft || + (outrec.IsHole != outrec.FirstLeft->IsHole && outrec.FirstLeft->Pts)) + return; + + OutRec *orfl = outrec.FirstLeft; + while (orfl && ((orfl->IsHole == outrec.IsHole) || !orfl->Pts)) + orfl = orfl->FirstLeft; + outrec.FirstLeft = orfl; +} +//------------------------------------------------------------------------------ + +bool Clipper::ExecuteInternal() { + bool succeeded = true; + try { + Reset(); + m_Maxima = MaximaList(); + m_SortedEdges = 0; + + succeeded = true; + cInt botY, topY; + if (!PopScanbeam(botY)) + return false; + InsertLocalMinimaIntoAEL(botY); + while (PopScanbeam(topY) || LocalMinimaPending()) { + ProcessHorizontals(); + ClearGhostJoins(); + if (!ProcessIntersections(topY)) { + succeeded = false; + break; + } + ProcessEdgesAtTopOfScanbeam(topY); + botY = topY; + InsertLocalMinimaIntoAEL(botY); + } + } catch (...) { + succeeded = false; + } + + if (succeeded) { + // fix orientations ... + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) { + OutRec *outRec = m_PolyOuts[i]; + if (!outRec->Pts || outRec->IsOpen) + continue; + if ((outRec->IsHole ^ m_ReverseOutput) == (Area(*outRec) > 0)) + ReversePolyPtLinks(outRec->Pts); + } + + if (!m_Joins.empty()) + JoinCommonEdges(); + + // unfortunately FixupOutPolygon() must be done after JoinCommonEdges() + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) { + OutRec *outRec = m_PolyOuts[i]; + if (!outRec->Pts) + continue; + if (outRec->IsOpen) + FixupOutPolyline(*outRec); + else + FixupOutPolygon(*outRec); + } + + if (m_StrictSimple) + DoSimplePolygons(); + } + + ClearJoins(); + ClearGhostJoins(); + return succeeded; +} +//------------------------------------------------------------------------------ + +void Clipper::SetWindingCount(TEdge &edge) { + TEdge *e = edge.PrevInAEL; + // find the edge of the same polytype that immediately preceeds 'edge' in AEL + while (e && ((e->PolyTyp != edge.PolyTyp) || (e->WindDelta == 0))) + e = e->PrevInAEL; + if (!e) { + if (edge.WindDelta == 0) { + PolyFillType pft = + (edge.PolyTyp == ptSubject ? m_SubjFillType : m_ClipFillType); + edge.WindCnt = (pft == pftNegative ? -1 : 1); + } else + edge.WindCnt = edge.WindDelta; + edge.WindCnt2 = 0; + e = m_ActiveEdges; // ie get ready to calc WindCnt2 + } else if (edge.WindDelta == 0 && m_ClipType != ctUnion) { + edge.WindCnt = 1; + edge.WindCnt2 = e->WindCnt2; + e = e->NextInAEL; // ie get ready to calc WindCnt2 + } else if (IsEvenOddFillType(edge)) { + // EvenOdd filling ... + if (edge.WindDelta == 0) { + // are we inside a subj polygon ... + bool Inside = true; + TEdge *e2 = e->PrevInAEL; + while (e2) { + if (e2->PolyTyp == e->PolyTyp && e2->WindDelta != 0) + Inside = !Inside; + e2 = e2->PrevInAEL; + } + edge.WindCnt = (Inside ? 0 : 1); + } else { + edge.WindCnt = edge.WindDelta; + } + edge.WindCnt2 = e->WindCnt2; + e = e->NextInAEL; // ie get ready to calc WindCnt2 + } else { + // nonZero, Positive or Negative filling ... + if (e->WindCnt * e->WindDelta < 0) { + // prev edge is 'decreasing' WindCount (WC) toward zero + // so we're outside the previous polygon ... + if (Abs(e->WindCnt) > 1) { + // outside prev poly but still inside another. + // when reversing direction of prev poly use the same WC + if (e->WindDelta * edge.WindDelta < 0) + edge.WindCnt = e->WindCnt; + // otherwise continue to 'decrease' WC ... + else + edge.WindCnt = e->WindCnt + edge.WindDelta; + } else + // now outside all polys of same polytype so set own WC ... + edge.WindCnt = (edge.WindDelta == 0 ? 1 : edge.WindDelta); + } else { + // prev edge is 'increasing' WindCount (WC) away from zero + // so we're inside the previous polygon ... + if (edge.WindDelta == 0) + edge.WindCnt = (e->WindCnt < 0 ? e->WindCnt - 1 : e->WindCnt + 1); + // if wind direction is reversing prev then use same WC + else if (e->WindDelta * edge.WindDelta < 0) + edge.WindCnt = e->WindCnt; + // otherwise add to WC ... + else + edge.WindCnt = e->WindCnt + edge.WindDelta; + } + edge.WindCnt2 = e->WindCnt2; + e = e->NextInAEL; // ie get ready to calc WindCnt2 + } + + // update WindCnt2 ... + if (IsEvenOddAltFillType(edge)) { + // EvenOdd filling ... + while (e != &edge) { + if (e->WindDelta != 0) + edge.WindCnt2 = (edge.WindCnt2 == 0 ? 1 : 0); + e = e->NextInAEL; + } + } else { + // nonZero, Positive or Negative filling ... + while (e != &edge) { + edge.WindCnt2 += e->WindDelta; + e = e->NextInAEL; + } + } +} +//------------------------------------------------------------------------------ + +bool Clipper::IsEvenOddFillType(const TEdge &edge) const { + if (edge.PolyTyp == ptSubject) + return m_SubjFillType == pftEvenOdd; + else + return m_ClipFillType == pftEvenOdd; +} +//------------------------------------------------------------------------------ + +bool Clipper::IsEvenOddAltFillType(const TEdge &edge) const { + if (edge.PolyTyp == ptSubject) + return m_ClipFillType == pftEvenOdd; + else + return m_SubjFillType == pftEvenOdd; +} +//------------------------------------------------------------------------------ + +bool Clipper::IsContributing(const TEdge &edge) const { + PolyFillType pft, pft2; + if (edge.PolyTyp == ptSubject) { + pft = m_SubjFillType; + pft2 = m_ClipFillType; + } else { + pft = m_ClipFillType; + pft2 = m_SubjFillType; + } + + switch (pft) { + case pftEvenOdd: + // return false if a subj line has been flagged as inside a subj polygon + if (edge.WindDelta == 0 && edge.WindCnt != 1) + return false; + break; + case pftNonZero: + if (Abs(edge.WindCnt) != 1) + return false; + break; + case pftPositive: + if (edge.WindCnt != 1) + return false; + break; + default: // pftNegative + if (edge.WindCnt != -1) + return false; + } + + switch (m_ClipType) { + case ctIntersection: + switch (pft2) { + case pftEvenOdd: + case pftNonZero: + return (edge.WindCnt2 != 0); + case pftPositive: + return (edge.WindCnt2 > 0); + default: + return (edge.WindCnt2 < 0); + } + break; + case ctUnion: + switch (pft2) { + case pftEvenOdd: + case pftNonZero: + return (edge.WindCnt2 == 0); + case pftPositive: + return (edge.WindCnt2 <= 0); + default: + return (edge.WindCnt2 >= 0); + } + break; + case ctDifference: + if (edge.PolyTyp == ptSubject) + switch (pft2) { + case pftEvenOdd: + case pftNonZero: + return (edge.WindCnt2 == 0); + case pftPositive: + return (edge.WindCnt2 <= 0); + default: + return (edge.WindCnt2 >= 0); + } + else + switch (pft2) { + case pftEvenOdd: + case pftNonZero: + return (edge.WindCnt2 != 0); + case pftPositive: + return (edge.WindCnt2 > 0); + default: + return (edge.WindCnt2 < 0); + } + break; + case ctXor: + if (edge.WindDelta == 0) // XOr always contributing unless open + switch (pft2) { + case pftEvenOdd: + case pftNonZero: + return (edge.WindCnt2 == 0); + case pftPositive: + return (edge.WindCnt2 <= 0); + default: + return (edge.WindCnt2 >= 0); + } + else + return true; + break; + default: + return true; + } +} +//------------------------------------------------------------------------------ + +OutPt *Clipper::AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt) { + OutPt *result; + TEdge *e, *prevE; + if (IsHorizontal(*e2) || (e1->Dx > e2->Dx)) { + result = AddOutPt(e1, Pt); + e2->OutIdx = e1->OutIdx; + e1->Side = esLeft; + e2->Side = esRight; + e = e1; + if (e->PrevInAEL == e2) + prevE = e2->PrevInAEL; + else + prevE = e->PrevInAEL; + } else { + result = AddOutPt(e2, Pt); + e1->OutIdx = e2->OutIdx; + e1->Side = esRight; + e2->Side = esLeft; + e = e2; + if (e->PrevInAEL == e1) + prevE = e1->PrevInAEL; + else + prevE = e->PrevInAEL; + } + + if (prevE && prevE->OutIdx >= 0 && prevE->Top.Y < Pt.Y && e->Top.Y < Pt.Y) { + cInt xPrev = TopX(*prevE, Pt.Y); + cInt xE = TopX(*e, Pt.Y); + if (xPrev == xE && (e->WindDelta != 0) && (prevE->WindDelta != 0) && + SlopesEqual(IntPoint(xPrev, Pt.Y), prevE->Top, IntPoint(xE, Pt.Y), + e->Top, m_UseFullRange)) { + OutPt *outPt = AddOutPt(prevE, Pt); + AddJoin(result, outPt, e->Top); + } + } + return result; +} +//------------------------------------------------------------------------------ + +void Clipper::AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt) { + AddOutPt(e1, Pt); + if (e2->WindDelta == 0) + AddOutPt(e2, Pt); + if (e1->OutIdx == e2->OutIdx) { + e1->OutIdx = Unassigned; + e2->OutIdx = Unassigned; + } else if (e1->OutIdx < e2->OutIdx) + AppendPolygon(e1, e2); + else + AppendPolygon(e2, e1); +} +//------------------------------------------------------------------------------ + +void Clipper::AddEdgeToSEL(TEdge *edge) { + // SEL pointers in PEdge are reused to build a list of horizontal edges. + // However, we don't need to worry about order with horizontal edge + // processing. + if (!m_SortedEdges) { + m_SortedEdges = edge; + edge->PrevInSEL = 0; + edge->NextInSEL = 0; + } else { + edge->NextInSEL = m_SortedEdges; + edge->PrevInSEL = 0; + m_SortedEdges->PrevInSEL = edge; + m_SortedEdges = edge; + } +} +//------------------------------------------------------------------------------ + +bool Clipper::PopEdgeFromSEL(TEdge *&edge) { + if (!m_SortedEdges) + return false; + edge = m_SortedEdges; + DeleteFromSEL(m_SortedEdges); + return true; +} +//------------------------------------------------------------------------------ + +void Clipper::CopyAELToSEL() { + TEdge *e = m_ActiveEdges; + m_SortedEdges = e; + while (e) { + e->PrevInSEL = e->PrevInAEL; + e->NextInSEL = e->NextInAEL; + e = e->NextInAEL; + } +} +//------------------------------------------------------------------------------ + +void Clipper::AddJoin(OutPt *op1, OutPt *op2, const IntPoint OffPt) { + Join *j = new Join; + j->OutPt1 = op1; + j->OutPt2 = op2; + j->OffPt = OffPt; + m_Joins.push_back(j); +} +//------------------------------------------------------------------------------ + +void Clipper::ClearJoins() { + for (JoinList::size_type i = 0; i < m_Joins.size(); i++) + delete m_Joins[i]; + m_Joins.resize(0); +} +//------------------------------------------------------------------------------ + +void Clipper::ClearGhostJoins() { + for (JoinList::size_type i = 0; i < m_GhostJoins.size(); i++) + delete m_GhostJoins[i]; + m_GhostJoins.resize(0); +} +//------------------------------------------------------------------------------ + +void Clipper::AddGhostJoin(OutPt *op, const IntPoint OffPt) { + Join *j = new Join; + j->OutPt1 = op; + j->OutPt2 = 0; + j->OffPt = OffPt; + m_GhostJoins.push_back(j); +} +//------------------------------------------------------------------------------ + +void Clipper::InsertLocalMinimaIntoAEL(const cInt botY) { + const LocalMinimum *lm; + while (PopLocalMinima(botY, lm)) { + TEdge *lb = lm->LeftBound; + TEdge *rb = lm->RightBound; + + OutPt *Op1 = 0; + if (!lb || !rb) { + // nb: don't insert LB into either AEL or SEL + InsertEdgeIntoAEL(rb, 0); + SetWindingCount(*rb); + if (IsContributing(*rb)) + Op1 = AddOutPt(rb, rb->Bot); + //} else if (!rb) { + // InsertEdgeIntoAEL(lb, 0); + // SetWindingCount(*lb); + // if (IsContributing(*lb)) + // Op1 = AddOutPt(lb, lb->Bot); + InsertScanbeam(lb->Top.Y); + } else { + InsertEdgeIntoAEL(lb, 0); + InsertEdgeIntoAEL(rb, lb); + SetWindingCount(*lb); + rb->WindCnt = lb->WindCnt; + rb->WindCnt2 = lb->WindCnt2; + if (IsContributing(*lb)) + Op1 = AddLocalMinPoly(lb, rb, lb->Bot); + InsertScanbeam(lb->Top.Y); + } + + if (rb) { + if (IsHorizontal(*rb)) { + AddEdgeToSEL(rb); + if (rb->NextInLML) + InsertScanbeam(rb->NextInLML->Top.Y); + } else + InsertScanbeam(rb->Top.Y); + } + + if (!lb || !rb) + continue; + + // if any output polygons share an edge, they'll need joining later ... + if (Op1 && IsHorizontal(*rb) && m_GhostJoins.size() > 0 && + (rb->WindDelta != 0)) { + for (JoinList::size_type i = 0; i < m_GhostJoins.size(); ++i) { + Join *jr = m_GhostJoins[i]; + // if the horizontal Rb and a 'ghost' horizontal overlap, then convert + // the 'ghost' join to a real join ready for later ... + if (HorzSegmentsOverlap(jr->OutPt1->Pt.X, jr->OffPt.X, rb->Bot.X, + rb->Top.X)) + AddJoin(jr->OutPt1, Op1, jr->OffPt); + } + } + + if (lb->OutIdx >= 0 && lb->PrevInAEL && + lb->PrevInAEL->Curr.X == lb->Bot.X && lb->PrevInAEL->OutIdx >= 0 && + SlopesEqual(lb->PrevInAEL->Bot, lb->PrevInAEL->Top, lb->Curr, lb->Top, + m_UseFullRange) && + (lb->WindDelta != 0) && (lb->PrevInAEL->WindDelta != 0)) { + OutPt *Op2 = AddOutPt(lb->PrevInAEL, lb->Bot); + AddJoin(Op1, Op2, lb->Top); + } + + if (lb->NextInAEL != rb) { + if (rb->OutIdx >= 0 && rb->PrevInAEL->OutIdx >= 0 && + SlopesEqual(rb->PrevInAEL->Curr, rb->PrevInAEL->Top, rb->Curr, + rb->Top, m_UseFullRange) && + (rb->WindDelta != 0) && (rb->PrevInAEL->WindDelta != 0)) { + OutPt *Op2 = AddOutPt(rb->PrevInAEL, rb->Bot); + AddJoin(Op1, Op2, rb->Top); + } + + TEdge *e = lb->NextInAEL; + if (e) { + while (e != rb) { + // nb: For calculating winding counts etc, IntersectEdges() assumes + // that param1 will be to the Right of param2 ABOVE the intersection + // ... + IntersectEdges(rb, e, lb->Curr); // order important here + e = e->NextInAEL; + } + } + } + } +} +//------------------------------------------------------------------------------ + +void Clipper::DeleteFromSEL(TEdge *e) { + TEdge *SelPrev = e->PrevInSEL; + TEdge *SelNext = e->NextInSEL; + if (!SelPrev && !SelNext && (e != m_SortedEdges)) + return; // already deleted + if (SelPrev) + SelPrev->NextInSEL = SelNext; + else + m_SortedEdges = SelNext; + if (SelNext) + SelNext->PrevInSEL = SelPrev; + e->NextInSEL = 0; + e->PrevInSEL = 0; +} +//------------------------------------------------------------------------------ + +#ifdef use_xyz +void Clipper::SetZ(IntPoint &pt, TEdge &e1, TEdge &e2) { + if (pt.Z != 0 || !m_ZFill) + return; + else if (pt == e1.Bot) + pt.Z = e1.Bot.Z; + else if (pt == e1.Top) + pt.Z = e1.Top.Z; + else if (pt == e2.Bot) + pt.Z = e2.Bot.Z; + else if (pt == e2.Top) + pt.Z = e2.Top.Z; + else + (*m_ZFill)(e1.Bot, e1.Top, e2.Bot, e2.Top, pt); +} +//------------------------------------------------------------------------------ +#endif + +void Clipper::IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &Pt) { + bool e1Contributing = (e1->OutIdx >= 0); + bool e2Contributing = (e2->OutIdx >= 0); + +#ifdef use_xyz + SetZ(Pt, *e1, *e2); +#endif + +#ifdef use_lines + // if either edge is on an OPEN path ... + if (e1->WindDelta == 0 || e2->WindDelta == 0) { + // ignore subject-subject open path intersections UNLESS they + // are both open paths, AND they are both 'contributing maximas' ... + if (e1->WindDelta == 0 && e2->WindDelta == 0) + return; + + // if intersecting a subj line with a subj poly ... + else if (e1->PolyTyp == e2->PolyTyp && e1->WindDelta != e2->WindDelta && + m_ClipType == ctUnion) { + if (e1->WindDelta == 0) { + if (e2Contributing) { + AddOutPt(e1, Pt); + if (e1Contributing) + e1->OutIdx = Unassigned; + } + } else { + if (e1Contributing) { + AddOutPt(e2, Pt); + if (e2Contributing) + e2->OutIdx = Unassigned; + } + } + } else if (e1->PolyTyp != e2->PolyTyp) { + // toggle subj open path OutIdx on/off when Abs(clip.WndCnt) == 1 ... + if ((e1->WindDelta == 0) && abs(e2->WindCnt) == 1 && + (m_ClipType != ctUnion || e2->WindCnt2 == 0)) { + AddOutPt(e1, Pt); + if (e1Contributing) + e1->OutIdx = Unassigned; + } else if ((e2->WindDelta == 0) && (abs(e1->WindCnt) == 1) && + (m_ClipType != ctUnion || e1->WindCnt2 == 0)) { + AddOutPt(e2, Pt); + if (e2Contributing) + e2->OutIdx = Unassigned; + } + } + return; + } +#endif + + // update winding counts... + // assumes that e1 will be to the Right of e2 ABOVE the intersection + if (e1->PolyTyp == e2->PolyTyp) { + if (IsEvenOddFillType(*e1)) { + int oldE1WindCnt = e1->WindCnt; + e1->WindCnt = e2->WindCnt; + e2->WindCnt = oldE1WindCnt; + } else { + if (e1->WindCnt + e2->WindDelta == 0) + e1->WindCnt = -e1->WindCnt; + else + e1->WindCnt += e2->WindDelta; + if (e2->WindCnt - e1->WindDelta == 0) + e2->WindCnt = -e2->WindCnt; + else + e2->WindCnt -= e1->WindDelta; + } + } else { + if (!IsEvenOddFillType(*e2)) + e1->WindCnt2 += e2->WindDelta; + else + e1->WindCnt2 = (e1->WindCnt2 == 0) ? 1 : 0; + if (!IsEvenOddFillType(*e1)) + e2->WindCnt2 -= e1->WindDelta; + else + e2->WindCnt2 = (e2->WindCnt2 == 0) ? 1 : 0; + } + + PolyFillType e1FillType, e2FillType, e1FillType2, e2FillType2; + if (e1->PolyTyp == ptSubject) { + e1FillType = m_SubjFillType; + e1FillType2 = m_ClipFillType; + } else { + e1FillType = m_ClipFillType; + e1FillType2 = m_SubjFillType; + } + if (e2->PolyTyp == ptSubject) { + e2FillType = m_SubjFillType; + e2FillType2 = m_ClipFillType; + } else { + e2FillType = m_ClipFillType; + e2FillType2 = m_SubjFillType; + } + + cInt e1Wc, e2Wc; + switch (e1FillType) { + case pftPositive: + e1Wc = e1->WindCnt; + break; + case pftNegative: + e1Wc = -e1->WindCnt; + break; + default: + e1Wc = Abs(e1->WindCnt); + } + switch (e2FillType) { + case pftPositive: + e2Wc = e2->WindCnt; + break; + case pftNegative: + e2Wc = -e2->WindCnt; + break; + default: + e2Wc = Abs(e2->WindCnt); + } + + if (e1Contributing && e2Contributing) { + if ((e1Wc != 0 && e1Wc != 1) || (e2Wc != 0 && e2Wc != 1) || + (e1->PolyTyp != e2->PolyTyp && m_ClipType != ctXor)) { + AddLocalMaxPoly(e1, e2, Pt); + } else { + AddOutPt(e1, Pt); + AddOutPt(e2, Pt); + SwapSides(*e1, *e2); + SwapPolyIndexes(*e1, *e2); + } + } else if (e1Contributing) { + if (e2Wc == 0 || e2Wc == 1) { + AddOutPt(e1, Pt); + SwapSides(*e1, *e2); + SwapPolyIndexes(*e1, *e2); + } + } else if (e2Contributing) { + if (e1Wc == 0 || e1Wc == 1) { + AddOutPt(e2, Pt); + SwapSides(*e1, *e2); + SwapPolyIndexes(*e1, *e2); + } + } else if ((e1Wc == 0 || e1Wc == 1) && (e2Wc == 0 || e2Wc == 1)) { + // neither edge is currently contributing ... + + cInt e1Wc2, e2Wc2; + switch (e1FillType2) { + case pftPositive: + e1Wc2 = e1->WindCnt2; + break; + case pftNegative: + e1Wc2 = -e1->WindCnt2; + break; + default: + e1Wc2 = Abs(e1->WindCnt2); + } + switch (e2FillType2) { + case pftPositive: + e2Wc2 = e2->WindCnt2; + break; + case pftNegative: + e2Wc2 = -e2->WindCnt2; + break; + default: + e2Wc2 = Abs(e2->WindCnt2); + } + + if (e1->PolyTyp != e2->PolyTyp) { + AddLocalMinPoly(e1, e2, Pt); + } else if (e1Wc == 1 && e2Wc == 1) + switch (m_ClipType) { + case ctIntersection: + if (e1Wc2 > 0 && e2Wc2 > 0) + AddLocalMinPoly(e1, e2, Pt); + break; + case ctUnion: + if (e1Wc2 <= 0 && e2Wc2 <= 0) + AddLocalMinPoly(e1, e2, Pt); + break; + case ctDifference: + if (((e1->PolyTyp == ptClip) && (e1Wc2 > 0) && (e2Wc2 > 0)) || + ((e1->PolyTyp == ptSubject) && (e1Wc2 <= 0) && (e2Wc2 <= 0))) + AddLocalMinPoly(e1, e2, Pt); + break; + case ctXor: + AddLocalMinPoly(e1, e2, Pt); + } + else + SwapSides(*e1, *e2); + } +} +//------------------------------------------------------------------------------ + +void Clipper::SetHoleState(TEdge *e, OutRec *outrec) { + TEdge *e2 = e->PrevInAEL; + TEdge *eTmp = 0; + while (e2) { + if (e2->OutIdx >= 0 && e2->WindDelta != 0) { + if (!eTmp) + eTmp = e2; + else if (eTmp->OutIdx == e2->OutIdx) + eTmp = 0; + } + e2 = e2->PrevInAEL; + } + if (!eTmp) { + outrec->FirstLeft = 0; + outrec->IsHole = false; + } else { + outrec->FirstLeft = m_PolyOuts[eTmp->OutIdx]; + outrec->IsHole = !outrec->FirstLeft->IsHole; + } +} +//------------------------------------------------------------------------------ + +OutRec *GetLowermostRec(OutRec *outRec1, OutRec *outRec2) { + // work out which polygon fragment has the correct hole state ... + if (!outRec1->BottomPt) + outRec1->BottomPt = GetBottomPt(outRec1->Pts); + if (!outRec2->BottomPt) + outRec2->BottomPt = GetBottomPt(outRec2->Pts); + OutPt *OutPt1 = outRec1->BottomPt; + OutPt *OutPt2 = outRec2->BottomPt; + if (OutPt1->Pt.Y > OutPt2->Pt.Y) + return outRec1; + else if (OutPt1->Pt.Y < OutPt2->Pt.Y) + return outRec2; + else if (OutPt1->Pt.X < OutPt2->Pt.X) + return outRec1; + else if (OutPt1->Pt.X > OutPt2->Pt.X) + return outRec2; + else if (OutPt1->Next == OutPt1) + return outRec2; + else if (OutPt2->Next == OutPt2) + return outRec1; + else if (FirstIsBottomPt(OutPt1, OutPt2)) + return outRec1; + else + return outRec2; +} +//------------------------------------------------------------------------------ + +bool OutRec1RightOfOutRec2(OutRec *outRec1, OutRec *outRec2) { + do { + outRec1 = outRec1->FirstLeft; + if (outRec1 == outRec2) + return true; + } while (outRec1); + return false; +} +//------------------------------------------------------------------------------ + +OutRec *Clipper::GetOutRec(int Idx) { + OutRec *outrec = m_PolyOuts[Idx]; + while (outrec != m_PolyOuts[outrec->Idx]) + outrec = m_PolyOuts[outrec->Idx]; + return outrec; +} +//------------------------------------------------------------------------------ + +void Clipper::AppendPolygon(TEdge *e1, TEdge *e2) { + // get the start and ends of both output polygons ... + OutRec *outRec1 = m_PolyOuts[e1->OutIdx]; + OutRec *outRec2 = m_PolyOuts[e2->OutIdx]; + + OutRec *holeStateRec; + if (OutRec1RightOfOutRec2(outRec1, outRec2)) + holeStateRec = outRec2; + else if (OutRec1RightOfOutRec2(outRec2, outRec1)) + holeStateRec = outRec1; + else + holeStateRec = GetLowermostRec(outRec1, outRec2); + + // get the start and ends of both output polygons and + // join e2 poly onto e1 poly and delete pointers to e2 ... + + OutPt *p1_lft = outRec1->Pts; + OutPt *p1_rt = p1_lft->Prev; + OutPt *p2_lft = outRec2->Pts; + OutPt *p2_rt = p2_lft->Prev; + + // join e2 poly onto e1 poly and delete pointers to e2 ... + if (e1->Side == esLeft) { + if (e2->Side == esLeft) { + // z y x a b c + ReversePolyPtLinks(p2_lft); + p2_lft->Next = p1_lft; + p1_lft->Prev = p2_lft; + p1_rt->Next = p2_rt; + p2_rt->Prev = p1_rt; + outRec1->Pts = p2_rt; + } else { + // x y z a b c + p2_rt->Next = p1_lft; + p1_lft->Prev = p2_rt; + p2_lft->Prev = p1_rt; + p1_rt->Next = p2_lft; + outRec1->Pts = p2_lft; + } + } else { + if (e2->Side == esRight) { + // a b c z y x + ReversePolyPtLinks(p2_lft); + p1_rt->Next = p2_rt; + p2_rt->Prev = p1_rt; + p2_lft->Next = p1_lft; + p1_lft->Prev = p2_lft; + } else { + // a b c x y z + p1_rt->Next = p2_lft; + p2_lft->Prev = p1_rt; + p1_lft->Prev = p2_rt; + p2_rt->Next = p1_lft; + } + } + + outRec1->BottomPt = 0; + if (holeStateRec == outRec2) { + if (outRec2->FirstLeft != outRec1) + outRec1->FirstLeft = outRec2->FirstLeft; + outRec1->IsHole = outRec2->IsHole; + } + outRec2->Pts = 0; + outRec2->BottomPt = 0; + outRec2->FirstLeft = outRec1; + + int OKIdx = e1->OutIdx; + int ObsoleteIdx = e2->OutIdx; + + e1->OutIdx = + Unassigned; // nb: safe because we only get here via AddLocalMaxPoly + e2->OutIdx = Unassigned; + + TEdge *e = m_ActiveEdges; + while (e) { + if (e->OutIdx == ObsoleteIdx) { + e->OutIdx = OKIdx; + e->Side = e1->Side; + break; + } + e = e->NextInAEL; + } + + outRec2->Idx = outRec1->Idx; +} +//------------------------------------------------------------------------------ + +OutPt *Clipper::AddOutPt(TEdge *e, const IntPoint &pt) { + if (e->OutIdx < 0) { + OutRec *outRec = CreateOutRec(); + outRec->IsOpen = (e->WindDelta == 0); + OutPt *newOp = new OutPt; + outRec->Pts = newOp; + newOp->Idx = outRec->Idx; + newOp->Pt = pt; + newOp->Next = newOp; + newOp->Prev = newOp; + if (!outRec->IsOpen) + SetHoleState(e, outRec); + e->OutIdx = outRec->Idx; + return newOp; + } else { + OutRec *outRec = m_PolyOuts[e->OutIdx]; + // OutRec.Pts is the 'Left-most' point & OutRec.Pts.Prev is the 'Right-most' + OutPt *op = outRec->Pts; + + bool ToFront = (e->Side == esLeft); + if (ToFront && (pt == op->Pt)) + return op; + else if (!ToFront && (pt == op->Prev->Pt)) + return op->Prev; + + OutPt *newOp = new OutPt; + newOp->Idx = outRec->Idx; + newOp->Pt = pt; + newOp->Next = op; + newOp->Prev = op->Prev; + newOp->Prev->Next = newOp; + op->Prev = newOp; + if (ToFront) + outRec->Pts = newOp; + return newOp; + } +} +//------------------------------------------------------------------------------ + +OutPt *Clipper::GetLastOutPt(TEdge *e) { + OutRec *outRec = m_PolyOuts[e->OutIdx]; + if (e->Side == esLeft) + return outRec->Pts; + else + return outRec->Pts->Prev; +} +//------------------------------------------------------------------------------ + +void Clipper::ProcessHorizontals() { + TEdge *horzEdge; + while (PopEdgeFromSEL(horzEdge)) + ProcessHorizontal(horzEdge); +} +//------------------------------------------------------------------------------ + +inline bool IsMinima(TEdge *e) { + return e && (e->Prev->NextInLML != e) && (e->Next->NextInLML != e); +} +//------------------------------------------------------------------------------ + +inline bool IsMaxima(TEdge *e, const cInt Y) { + return e && e->Top.Y == Y && !e->NextInLML; +} +//------------------------------------------------------------------------------ + +inline bool IsIntermediate(TEdge *e, const cInt Y) { + return e->Top.Y == Y && e->NextInLML; +} +//------------------------------------------------------------------------------ + +TEdge *GetMaximaPair(TEdge *e) { + if ((e->Next->Top == e->Top) && !e->Next->NextInLML) + return e->Next; + else if ((e->Prev->Top == e->Top) && !e->Prev->NextInLML) + return e->Prev; + else + return 0; +} +//------------------------------------------------------------------------------ + +TEdge *GetMaximaPairEx(TEdge *e) { + // as GetMaximaPair() but returns 0 if MaxPair isn't in AEL (unless it's + // horizontal) + TEdge *result = GetMaximaPair(e); + if (result && + (result->OutIdx == Skip || + (result->NextInAEL == result->PrevInAEL && !IsHorizontal(*result)))) + return 0; + return result; +} +//------------------------------------------------------------------------------ + +void Clipper::SwapPositionsInSEL(TEdge *Edge1, TEdge *Edge2) { + if (!(Edge1->NextInSEL) && !(Edge1->PrevInSEL)) + return; + if (!(Edge2->NextInSEL) && !(Edge2->PrevInSEL)) + return; + + if (Edge1->NextInSEL == Edge2) { + TEdge *Next = Edge2->NextInSEL; + if (Next) + Next->PrevInSEL = Edge1; + TEdge *Prev = Edge1->PrevInSEL; + if (Prev) + Prev->NextInSEL = Edge2; + Edge2->PrevInSEL = Prev; + Edge2->NextInSEL = Edge1; + Edge1->PrevInSEL = Edge2; + Edge1->NextInSEL = Next; + } else if (Edge2->NextInSEL == Edge1) { + TEdge *Next = Edge1->NextInSEL; + if (Next) + Next->PrevInSEL = Edge2; + TEdge *Prev = Edge2->PrevInSEL; + if (Prev) + Prev->NextInSEL = Edge1; + Edge1->PrevInSEL = Prev; + Edge1->NextInSEL = Edge2; + Edge2->PrevInSEL = Edge1; + Edge2->NextInSEL = Next; + } else { + TEdge *Next = Edge1->NextInSEL; + TEdge *Prev = Edge1->PrevInSEL; + Edge1->NextInSEL = Edge2->NextInSEL; + if (Edge1->NextInSEL) + Edge1->NextInSEL->PrevInSEL = Edge1; + Edge1->PrevInSEL = Edge2->PrevInSEL; + if (Edge1->PrevInSEL) + Edge1->PrevInSEL->NextInSEL = Edge1; + Edge2->NextInSEL = Next; + if (Edge2->NextInSEL) + Edge2->NextInSEL->PrevInSEL = Edge2; + Edge2->PrevInSEL = Prev; + if (Edge2->PrevInSEL) + Edge2->PrevInSEL->NextInSEL = Edge2; + } + + if (!Edge1->PrevInSEL) + m_SortedEdges = Edge1; + else if (!Edge2->PrevInSEL) + m_SortedEdges = Edge2; +} +//------------------------------------------------------------------------------ + +TEdge *GetNextInAEL(TEdge *e, Direction dir) { + return dir == dLeftToRight ? e->NextInAEL : e->PrevInAEL; +} +//------------------------------------------------------------------------------ + +void GetHorzDirection(TEdge &HorzEdge, Direction &Dir, cInt &Left, + cInt &Right) { + if (HorzEdge.Bot.X < HorzEdge.Top.X) { + Left = HorzEdge.Bot.X; + Right = HorzEdge.Top.X; + Dir = dLeftToRight; + } else { + Left = HorzEdge.Top.X; + Right = HorzEdge.Bot.X; + Dir = dRightToLeft; + } +} +//------------------------------------------------------------------------ + +/******************************************************************************* + * Notes: Horizontal edges (HEs) at scanline intersections (ie at the Top or * + * Bottom of a scanbeam) are processed as if layered. The order in which HEs * + * are processed doesn't matter. HEs intersect with other HE Bot.Xs only [#] * + * (or they could intersect with Top.Xs only, ie EITHER Bot.Xs OR Top.Xs), * and + *with other non-horizontal edges [*]. Once these intersections are * + * processed, intermediate HEs then 'promote' the Edge above (NextInLML) into * + * the AEL. These 'promoted' edges may in turn intersect [%] with other HEs. * + *******************************************************************************/ + +void Clipper::ProcessHorizontal(TEdge *horzEdge) { + Direction dir; + cInt horzLeft, horzRight; + bool IsOpen = (horzEdge->WindDelta == 0); + + GetHorzDirection(*horzEdge, dir, horzLeft, horzRight); + + TEdge *eLastHorz = horzEdge, *eMaxPair = 0; + while (eLastHorz->NextInLML && IsHorizontal(*eLastHorz->NextInLML)) + eLastHorz = eLastHorz->NextInLML; + if (!eLastHorz->NextInLML) + eMaxPair = GetMaximaPair(eLastHorz); + + MaximaList::const_iterator maxIt; + MaximaList::const_reverse_iterator maxRit; + if (m_Maxima.size() > 0) { + // get the first maxima in range (X) ... + if (dir == dLeftToRight) { + maxIt = m_Maxima.begin(); + while (maxIt != m_Maxima.end() && *maxIt <= horzEdge->Bot.X) + ++maxIt; + if (maxIt != m_Maxima.end() && *maxIt >= eLastHorz->Top.X) + maxIt = m_Maxima.end(); + } else { + maxRit = m_Maxima.rbegin(); + while (maxRit != m_Maxima.rend() && *maxRit > horzEdge->Bot.X) + ++maxRit; + if (maxRit != m_Maxima.rend() && *maxRit <= eLastHorz->Top.X) + maxRit = m_Maxima.rend(); + } + } + + OutPt *op1 = 0; + + for (;;) // loop through consec. horizontal edges + { + bool IsLastHorz = (horzEdge == eLastHorz); + TEdge *e = GetNextInAEL(horzEdge, dir); + while (e) { + // this code block inserts extra coords into horizontal edges (in output + // polygons) whereever maxima touch these horizontal edges. This helps + //'simplifying' polygons (ie if the Simplify property is set). + if (m_Maxima.size() > 0) { + if (dir == dLeftToRight) { + while (maxIt != m_Maxima.end() && *maxIt < e->Curr.X) { + if (horzEdge->OutIdx >= 0 && !IsOpen) + AddOutPt(horzEdge, IntPoint(*maxIt, horzEdge->Bot.Y)); + ++maxIt; + } + } else { + while (maxRit != m_Maxima.rend() && *maxRit > e->Curr.X) { + if (horzEdge->OutIdx >= 0 && !IsOpen) + AddOutPt(horzEdge, IntPoint(*maxRit, horzEdge->Bot.Y)); + ++maxRit; + } + } + }; + + if ((dir == dLeftToRight && e->Curr.X > horzRight) || + (dir == dRightToLeft && e->Curr.X < horzLeft)) + break; + + // Also break if we've got to the end of an intermediate horizontal edge + // ... + // nb: Smaller Dx's are to the right of larger Dx's ABOVE the horizontal. + if (e->Curr.X == horzEdge->Top.X && horzEdge->NextInLML && + e->Dx < horzEdge->NextInLML->Dx) + break; + + if (horzEdge->OutIdx >= 0 && !IsOpen) // note: may be done multiple times + { +#ifdef use_xyz + if (dir == dLeftToRight) + SetZ(e->Curr, *horzEdge, *e); + else + SetZ(e->Curr, *e, *horzEdge); +#endif + op1 = AddOutPt(horzEdge, e->Curr); + TEdge *eNextHorz = m_SortedEdges; + while (eNextHorz) { + if (eNextHorz->OutIdx >= 0 && + HorzSegmentsOverlap(horzEdge->Bot.X, horzEdge->Top.X, + eNextHorz->Bot.X, eNextHorz->Top.X)) { + OutPt *op2 = GetLastOutPt(eNextHorz); + AddJoin(op2, op1, eNextHorz->Top); + } + eNextHorz = eNextHorz->NextInSEL; + } + AddGhostJoin(op1, horzEdge->Bot); + } + + // OK, so far we're still in range of the horizontal Edge but make sure + // we're at the last of consec. horizontals when matching with eMaxPair + if (e == eMaxPair && IsLastHorz) { + if (horzEdge->OutIdx >= 0) + AddLocalMaxPoly(horzEdge, eMaxPair, horzEdge->Top); + DeleteFromAEL(horzEdge); + DeleteFromAEL(eMaxPair); + return; + } + + if (dir == dLeftToRight) { + IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y); + IntersectEdges(horzEdge, e, Pt); + } else { + IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y); + IntersectEdges(e, horzEdge, Pt); + } + TEdge *eNext = GetNextInAEL(e, dir); + SwapPositionsInAEL(horzEdge, e); + e = eNext; + } // end while(e) + + // Break out of loop if HorzEdge.NextInLML is not also horizontal ... + if (!horzEdge->NextInLML || !IsHorizontal(*horzEdge->NextInLML)) + break; + + UpdateEdgeIntoAEL(horzEdge); + if (horzEdge->OutIdx >= 0) + AddOutPt(horzEdge, horzEdge->Bot); + GetHorzDirection(*horzEdge, dir, horzLeft, horzRight); + + } // end for (;;) + + if (horzEdge->OutIdx >= 0 && !op1) { + op1 = GetLastOutPt(horzEdge); + TEdge *eNextHorz = m_SortedEdges; + while (eNextHorz) { + if (eNextHorz->OutIdx >= 0 && + HorzSegmentsOverlap(horzEdge->Bot.X, horzEdge->Top.X, + eNextHorz->Bot.X, eNextHorz->Top.X)) { + OutPt *op2 = GetLastOutPt(eNextHorz); + AddJoin(op2, op1, eNextHorz->Top); + } + eNextHorz = eNextHorz->NextInSEL; + } + AddGhostJoin(op1, horzEdge->Top); + } + + if (horzEdge->NextInLML) { + if (horzEdge->OutIdx >= 0) { + op1 = AddOutPt(horzEdge, horzEdge->Top); + UpdateEdgeIntoAEL(horzEdge); + if (horzEdge->WindDelta == 0) + return; + // nb: HorzEdge is no longer horizontal here + TEdge *ePrev = horzEdge->PrevInAEL; + TEdge *eNext = horzEdge->NextInAEL; + if (ePrev && ePrev->Curr.X == horzEdge->Bot.X && + ePrev->Curr.Y == horzEdge->Bot.Y && ePrev->WindDelta != 0 && + (ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y && + SlopesEqual(*horzEdge, *ePrev, m_UseFullRange))) { + OutPt *op2 = AddOutPt(ePrev, horzEdge->Bot); + AddJoin(op1, op2, horzEdge->Top); + } else if (eNext && eNext->Curr.X == horzEdge->Bot.X && + eNext->Curr.Y == horzEdge->Bot.Y && eNext->WindDelta != 0 && + eNext->OutIdx >= 0 && eNext->Curr.Y > eNext->Top.Y && + SlopesEqual(*horzEdge, *eNext, m_UseFullRange)) { + OutPt *op2 = AddOutPt(eNext, horzEdge->Bot); + AddJoin(op1, op2, horzEdge->Top); + } + } else + UpdateEdgeIntoAEL(horzEdge); + } else { + if (horzEdge->OutIdx >= 0) + AddOutPt(horzEdge, horzEdge->Top); + DeleteFromAEL(horzEdge); + } +} +//------------------------------------------------------------------------------ + +bool Clipper::ProcessIntersections(const cInt topY) { + if (!m_ActiveEdges) + return true; + try { + BuildIntersectList(topY); + size_t IlSize = m_IntersectList.size(); + if (IlSize == 0) + return true; + if (IlSize == 1 || FixupIntersectionOrder()) + ProcessIntersectList(); + else + return false; + } catch (...) { + m_SortedEdges = 0; + DisposeIntersectNodes(); + throw clipperException("ProcessIntersections error"); + } + m_SortedEdges = 0; + return true; +} +//------------------------------------------------------------------------------ + +void Clipper::DisposeIntersectNodes() { + for (size_t i = 0; i < m_IntersectList.size(); ++i) + delete m_IntersectList[i]; + m_IntersectList.clear(); +} +//------------------------------------------------------------------------------ + +void Clipper::BuildIntersectList(const cInt topY) { + if (!m_ActiveEdges) + return; + + // prepare for sorting ... + TEdge *e = m_ActiveEdges; + m_SortedEdges = e; + while (e) { + e->PrevInSEL = e->PrevInAEL; + e->NextInSEL = e->NextInAEL; + e->Curr.X = TopX(*e, topY); + e = e->NextInAEL; + } + + // bubblesort ... + bool isModified; + do { + isModified = false; + e = m_SortedEdges; + while (e->NextInSEL) { + TEdge *eNext = e->NextInSEL; + IntPoint Pt; + if (e->Curr.X > eNext->Curr.X) { + IntersectPoint(*e, *eNext, Pt); + if (Pt.Y < topY) + Pt = IntPoint(TopX(*e, topY), topY); + IntersectNode *newNode = new IntersectNode; + newNode->Edge1 = e; + newNode->Edge2 = eNext; + newNode->Pt = Pt; + m_IntersectList.push_back(newNode); + + SwapPositionsInSEL(e, eNext); + isModified = true; + } else + e = eNext; + } + if (e->PrevInSEL) + e->PrevInSEL->NextInSEL = 0; + else + break; + } while (isModified); + m_SortedEdges = 0; // important +} +//------------------------------------------------------------------------------ + +void Clipper::ProcessIntersectList() { + for (size_t i = 0; i < m_IntersectList.size(); ++i) { + IntersectNode *iNode = m_IntersectList[i]; + { + IntersectEdges(iNode->Edge1, iNode->Edge2, iNode->Pt); + SwapPositionsInAEL(iNode->Edge1, iNode->Edge2); + } + delete iNode; + } + m_IntersectList.clear(); +} +//------------------------------------------------------------------------------ + +bool IntersectListSort(IntersectNode *node1, IntersectNode *node2) { + return node2->Pt.Y < node1->Pt.Y; +} +//------------------------------------------------------------------------------ + +inline bool EdgesAdjacent(const IntersectNode &inode) { + return (inode.Edge1->NextInSEL == inode.Edge2) || + (inode.Edge1->PrevInSEL == inode.Edge2); +} +//------------------------------------------------------------------------------ + +bool Clipper::FixupIntersectionOrder() { + // pre-condition: intersections are sorted Bottom-most first. + // Now it's crucial that intersections are made only between adjacent edges, + // so to ensure this the order of intersections may need adjusting ... + CopyAELToSEL(); + std::sort(m_IntersectList.begin(), m_IntersectList.end(), IntersectListSort); + size_t cnt = m_IntersectList.size(); + for (size_t i = 0; i < cnt; ++i) { + if (!EdgesAdjacent(*m_IntersectList[i])) { + size_t j = i + 1; + while (j < cnt && !EdgesAdjacent(*m_IntersectList[j])) + j++; + if (j == cnt) + return false; + std::swap(m_IntersectList[i], m_IntersectList[j]); + } + SwapPositionsInSEL(m_IntersectList[i]->Edge1, m_IntersectList[i]->Edge2); + } + return true; +} +//------------------------------------------------------------------------------ + +void Clipper::DoMaxima(TEdge *e) { + TEdge *eMaxPair = GetMaximaPairEx(e); + if (!eMaxPair) { + if (e->OutIdx >= 0) + AddOutPt(e, e->Top); + DeleteFromAEL(e); + return; + } + + TEdge *eNext = e->NextInAEL; + while (eNext && eNext != eMaxPair) { + IntersectEdges(e, eNext, e->Top); + SwapPositionsInAEL(e, eNext); + eNext = e->NextInAEL; + } + + if (e->OutIdx == Unassigned && eMaxPair->OutIdx == Unassigned) { + DeleteFromAEL(e); + DeleteFromAEL(eMaxPair); + } else if (e->OutIdx >= 0 && eMaxPair->OutIdx >= 0) { + if (e->OutIdx >= 0) + AddLocalMaxPoly(e, eMaxPair, e->Top); + DeleteFromAEL(e); + DeleteFromAEL(eMaxPair); + } +#ifdef use_lines + else if (e->WindDelta == 0) { + if (e->OutIdx >= 0) { + AddOutPt(e, e->Top); + e->OutIdx = Unassigned; + } + DeleteFromAEL(e); + + if (eMaxPair->OutIdx >= 0) { + AddOutPt(eMaxPair, e->Top); + eMaxPair->OutIdx = Unassigned; + } + DeleteFromAEL(eMaxPair); + } +#endif + else + throw clipperException("DoMaxima error"); +} +//------------------------------------------------------------------------------ + +void Clipper::ProcessEdgesAtTopOfScanbeam(const cInt topY) { + TEdge *e = m_ActiveEdges; + while (e) { + // 1. process maxima, treating them as if they're 'bent' horizontal edges, + // but exclude maxima with horizontal edges. nb: e can't be a horizontal. + bool IsMaximaEdge = IsMaxima(e, topY); + + if (IsMaximaEdge) { + TEdge *eMaxPair = GetMaximaPairEx(e); + IsMaximaEdge = (!eMaxPair || !IsHorizontal(*eMaxPair)); + } + + if (IsMaximaEdge) { + if (m_StrictSimple) + m_Maxima.push_back(e->Top.X); + TEdge *ePrev = e->PrevInAEL; + DoMaxima(e); + if (!ePrev) + e = m_ActiveEdges; + else + e = ePrev->NextInAEL; + } else { + // 2. promote horizontal edges, otherwise update Curr.X and Curr.Y ... + if (IsIntermediate(e, topY) && IsHorizontal(*e->NextInLML)) { + UpdateEdgeIntoAEL(e); + if (e->OutIdx >= 0) + AddOutPt(e, e->Bot); + AddEdgeToSEL(e); + } else { + e->Curr.X = TopX(*e, topY); + e->Curr.Y = topY; +#ifdef use_xyz + e->Curr.Z = + topY == e->Top.Y ? e->Top.Z : (topY == e->Bot.Y ? e->Bot.Z : 0); +#endif + } + + // When StrictlySimple and 'e' is being touched by another edge, then + // make sure both edges have a vertex here ... + if (m_StrictSimple) { + TEdge *ePrev = e->PrevInAEL; + if ((e->OutIdx >= 0) && (e->WindDelta != 0) && ePrev && + (ePrev->OutIdx >= 0) && (ePrev->Curr.X == e->Curr.X) && + (ePrev->WindDelta != 0)) { + IntPoint pt = e->Curr; +#ifdef use_xyz + SetZ(pt, *ePrev, *e); +#endif + OutPt *op = AddOutPt(ePrev, pt); + OutPt *op2 = AddOutPt(e, pt); + AddJoin(op, op2, pt); // StrictlySimple (type-3) join + } + } + + e = e->NextInAEL; + } + } + + // 3. Process horizontals at the Top of the scanbeam ... + m_Maxima.sort(); + ProcessHorizontals(); + m_Maxima.clear(); + + // 4. Promote intermediate vertices ... + e = m_ActiveEdges; + while (e) { + if (IsIntermediate(e, topY)) { + OutPt *op = 0; + if (e->OutIdx >= 0) + op = AddOutPt(e, e->Top); + UpdateEdgeIntoAEL(e); + + // if output polygons share an edge, they'll need joining later ... + TEdge *ePrev = e->PrevInAEL; + TEdge *eNext = e->NextInAEL; + if (ePrev && ePrev->Curr.X == e->Bot.X && ePrev->Curr.Y == e->Bot.Y && + op && ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y && + SlopesEqual(e->Curr, e->Top, ePrev->Curr, ePrev->Top, + m_UseFullRange) && + (e->WindDelta != 0) && (ePrev->WindDelta != 0)) { + OutPt *op2 = AddOutPt(ePrev, e->Bot); + AddJoin(op, op2, e->Top); + } else if (eNext && eNext->Curr.X == e->Bot.X && + eNext->Curr.Y == e->Bot.Y && op && eNext->OutIdx >= 0 && + eNext->Curr.Y > eNext->Top.Y && + SlopesEqual(e->Curr, e->Top, eNext->Curr, eNext->Top, + m_UseFullRange) && + (e->WindDelta != 0) && (eNext->WindDelta != 0)) { + OutPt *op2 = AddOutPt(eNext, e->Bot); + AddJoin(op, op2, e->Top); + } + } + e = e->NextInAEL; + } +} +//------------------------------------------------------------------------------ + +void Clipper::FixupOutPolyline(OutRec &outrec) { + OutPt *pp = outrec.Pts; + OutPt *lastPP = pp->Prev; + while (pp != lastPP) { + pp = pp->Next; + if (pp->Pt == pp->Prev->Pt) { + if (pp == lastPP) + lastPP = pp->Prev; + OutPt *tmpPP = pp->Prev; + tmpPP->Next = pp->Next; + pp->Next->Prev = tmpPP; + delete pp; + pp = tmpPP; + } + } + + if (pp == pp->Prev) { + DisposeOutPts(pp); + outrec.Pts = 0; + return; + } +} +//------------------------------------------------------------------------------ + +void Clipper::FixupOutPolygon(OutRec &outrec) { + // FixupOutPolygon() - removes duplicate points and simplifies consecutive + // parallel edges by removing the middle vertex. + OutPt *lastOK = 0; + outrec.BottomPt = 0; + OutPt *pp = outrec.Pts; + bool preserveCol = m_PreserveCollinear || m_StrictSimple; + + for (;;) { + if (pp->Prev == pp || pp->Prev == pp->Next) { + DisposeOutPts(pp); + outrec.Pts = 0; + return; + } + + // test for duplicate points and collinear edges ... + if ((pp->Pt == pp->Next->Pt) || (pp->Pt == pp->Prev->Pt) || + (SlopesEqual(pp->Prev->Pt, pp->Pt, pp->Next->Pt, m_UseFullRange) && + (!preserveCol || + !Pt2IsBetweenPt1AndPt3(pp->Prev->Pt, pp->Pt, pp->Next->Pt)))) { + lastOK = 0; + OutPt *tmp = pp; + pp->Prev->Next = pp->Next; + pp->Next->Prev = pp->Prev; + pp = pp->Prev; + delete tmp; + } else if (pp == lastOK) + break; + else { + if (!lastOK) + lastOK = pp; + pp = pp->Next; + } + } + outrec.Pts = pp; +} +//------------------------------------------------------------------------------ + +int PointCount(OutPt *Pts) { + if (!Pts) + return 0; + int result = 0; + OutPt *p = Pts; + do { + result++; + p = p->Next; + } while (p != Pts); + return result; +} +//------------------------------------------------------------------------------ + +void Clipper::BuildResult(Paths &polys) { + polys.reserve(m_PolyOuts.size()); + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) { + if (!m_PolyOuts[i]->Pts) + continue; + Path pg; + OutPt *p = m_PolyOuts[i]->Pts->Prev; + int cnt = PointCount(p); + if (cnt < 2) + continue; + pg.reserve(cnt); + for (int i = 0; i < cnt; ++i) { + pg.push_back(p->Pt); + p = p->Prev; + } + polys.push_back(pg); + } +} +//------------------------------------------------------------------------------ + +void Clipper::BuildResult2(PolyTree &polytree) { + polytree.Clear(); + polytree.AllNodes.reserve(m_PolyOuts.size()); + // add each output polygon/contour to polytree ... + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++) { + OutRec *outRec = m_PolyOuts[i]; + int cnt = PointCount(outRec->Pts); + if ((outRec->IsOpen && cnt < 2) || (!outRec->IsOpen && cnt < 3)) + continue; + FixHoleLinkage(*outRec); + PolyNode *pn = new PolyNode(); + // nb: polytree takes ownership of all the PolyNodes + polytree.AllNodes.push_back(pn); + outRec->PolyNd = pn; + pn->Parent = 0; + pn->Index = 0; + pn->Contour.reserve(cnt); + OutPt *op = outRec->Pts->Prev; + for (int j = 0; j < cnt; j++) { + pn->Contour.push_back(op->Pt); + op = op->Prev; + } + } + + // fixup PolyNode links etc ... + polytree.Childs.reserve(m_PolyOuts.size()); + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++) { + OutRec *outRec = m_PolyOuts[i]; + if (!outRec->PolyNd) + continue; + if (outRec->IsOpen) { + outRec->PolyNd->m_IsOpen = true; + polytree.AddChild(*outRec->PolyNd); + } else if (outRec->FirstLeft && outRec->FirstLeft->PolyNd) + outRec->FirstLeft->PolyNd->AddChild(*outRec->PolyNd); + else + polytree.AddChild(*outRec->PolyNd); + } +} +//------------------------------------------------------------------------------ + +void SwapIntersectNodes(IntersectNode &int1, IntersectNode &int2) { + // just swap the contents (because fIntersectNodes is a single-linked-list) + IntersectNode inode = int1; // gets a copy of Int1 + int1.Edge1 = int2.Edge1; + int1.Edge2 = int2.Edge2; + int1.Pt = int2.Pt; + int2.Edge1 = inode.Edge1; + int2.Edge2 = inode.Edge2; + int2.Pt = inode.Pt; +} +//------------------------------------------------------------------------------ + +inline bool E2InsertsBeforeE1(TEdge &e1, TEdge &e2) { + if (e2.Curr.X == e1.Curr.X) { + if (e2.Top.Y > e1.Top.Y) + return e2.Top.X < TopX(e1, e2.Top.Y); + else + return e1.Top.X > TopX(e2, e1.Top.Y); + } else + return e2.Curr.X < e1.Curr.X; +} +//------------------------------------------------------------------------------ + +bool GetOverlap(const cInt a1, const cInt a2, const cInt b1, const cInt b2, + cInt &Left, cInt &Right) { + if (a1 < a2) { + if (b1 < b2) { + Left = std::max(a1, b1); + Right = std::min(a2, b2); + } else { + Left = std::max(a1, b2); + Right = std::min(a2, b1); + } + } else { + if (b1 < b2) { + Left = std::max(a2, b1); + Right = std::min(a1, b2); + } else { + Left = std::max(a2, b2); + Right = std::min(a1, b1); + } + } + return Left < Right; +} +//------------------------------------------------------------------------------ + +inline void UpdateOutPtIdxs(OutRec &outrec) { + OutPt *op = outrec.Pts; + do { + op->Idx = outrec.Idx; + op = op->Prev; + } while (op != outrec.Pts); +} +//------------------------------------------------------------------------------ + +void Clipper::InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge) { + if (!m_ActiveEdges) { + edge->PrevInAEL = 0; + edge->NextInAEL = 0; + m_ActiveEdges = edge; + } else if (!startEdge && E2InsertsBeforeE1(*m_ActiveEdges, *edge)) { + edge->PrevInAEL = 0; + edge->NextInAEL = m_ActiveEdges; + m_ActiveEdges->PrevInAEL = edge; + m_ActiveEdges = edge; + } else { + if (!startEdge) + startEdge = m_ActiveEdges; + while (startEdge->NextInAEL && + !E2InsertsBeforeE1(*startEdge->NextInAEL, *edge)) + startEdge = startEdge->NextInAEL; + edge->NextInAEL = startEdge->NextInAEL; + if (startEdge->NextInAEL) + startEdge->NextInAEL->PrevInAEL = edge; + edge->PrevInAEL = startEdge; + startEdge->NextInAEL = edge; + } +} +//---------------------------------------------------------------------- + +OutPt *DupOutPt(OutPt *outPt, bool InsertAfter) { + OutPt *result = new OutPt; + result->Pt = outPt->Pt; + result->Idx = outPt->Idx; + if (InsertAfter) { + result->Next = outPt->Next; + result->Prev = outPt; + outPt->Next->Prev = result; + outPt->Next = result; + } else { + result->Prev = outPt->Prev; + result->Next = outPt; + outPt->Prev->Next = result; + outPt->Prev = result; + } + return result; +} +//------------------------------------------------------------------------------ + +bool JoinHorz(OutPt *op1, OutPt *op1b, OutPt *op2, OutPt *op2b, + const IntPoint Pt, bool DiscardLeft) { + Direction Dir1 = (op1->Pt.X > op1b->Pt.X ? dRightToLeft : dLeftToRight); + Direction Dir2 = (op2->Pt.X > op2b->Pt.X ? dRightToLeft : dLeftToRight); + if (Dir1 == Dir2) + return false; + + // When DiscardLeft, we want Op1b to be on the Left of Op1, otherwise we + // want Op1b to be on the Right. (And likewise with Op2 and Op2b.) + // So, to facilitate this while inserting Op1b and Op2b ... + // when DiscardLeft, make sure we're AT or RIGHT of Pt before adding Op1b, + // otherwise make sure we're AT or LEFT of Pt. (Likewise with Op2b.) + if (Dir1 == dLeftToRight) { + while (op1->Next->Pt.X <= Pt.X && op1->Next->Pt.X >= op1->Pt.X && + op1->Next->Pt.Y == Pt.Y) + op1 = op1->Next; + if (DiscardLeft && (op1->Pt.X != Pt.X)) + op1 = op1->Next; + op1b = DupOutPt(op1, !DiscardLeft); + if (op1b->Pt != Pt) { + op1 = op1b; + op1->Pt = Pt; + op1b = DupOutPt(op1, !DiscardLeft); + } + } else { + while (op1->Next->Pt.X >= Pt.X && op1->Next->Pt.X <= op1->Pt.X && + op1->Next->Pt.Y == Pt.Y) + op1 = op1->Next; + if (!DiscardLeft && (op1->Pt.X != Pt.X)) + op1 = op1->Next; + op1b = DupOutPt(op1, DiscardLeft); + if (op1b->Pt != Pt) { + op1 = op1b; + op1->Pt = Pt; + op1b = DupOutPt(op1, DiscardLeft); + } + } + + if (Dir2 == dLeftToRight) { + while (op2->Next->Pt.X <= Pt.X && op2->Next->Pt.X >= op2->Pt.X && + op2->Next->Pt.Y == Pt.Y) + op2 = op2->Next; + if (DiscardLeft && (op2->Pt.X != Pt.X)) + op2 = op2->Next; + op2b = DupOutPt(op2, !DiscardLeft); + if (op2b->Pt != Pt) { + op2 = op2b; + op2->Pt = Pt; + op2b = DupOutPt(op2, !DiscardLeft); + }; + } else { + while (op2->Next->Pt.X >= Pt.X && op2->Next->Pt.X <= op2->Pt.X && + op2->Next->Pt.Y == Pt.Y) + op2 = op2->Next; + if (!DiscardLeft && (op2->Pt.X != Pt.X)) + op2 = op2->Next; + op2b = DupOutPt(op2, DiscardLeft); + if (op2b->Pt != Pt) { + op2 = op2b; + op2->Pt = Pt; + op2b = DupOutPt(op2, DiscardLeft); + }; + }; + + if ((Dir1 == dLeftToRight) == DiscardLeft) { + op1->Prev = op2; + op2->Next = op1; + op1b->Next = op2b; + op2b->Prev = op1b; + } else { + op1->Next = op2; + op2->Prev = op1; + op1b->Prev = op2b; + op2b->Next = op1b; + } + return true; +} +//------------------------------------------------------------------------------ + +bool Clipper::JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2) { + OutPt *op1 = j->OutPt1, *op1b; + OutPt *op2 = j->OutPt2, *op2b; + + // There are 3 kinds of joins for output polygons ... + // 1. Horizontal joins where Join.OutPt1 & Join.OutPt2 are vertices anywhere + // along (horizontal) collinear edges (& Join.OffPt is on the same + // horizontal). + // 2. Non-horizontal joins where Join.OutPt1 & Join.OutPt2 are at the same + // location at the Bottom of the overlapping segment (& Join.OffPt is above). + // 3. StrictSimple joins where edges touch but are not collinear and where + // Join.OutPt1, Join.OutPt2 & Join.OffPt all share the same point. + bool isHorizontal = (j->OutPt1->Pt.Y == j->OffPt.Y); + + if (isHorizontal && (j->OffPt == j->OutPt1->Pt) && + (j->OffPt == j->OutPt2->Pt)) { + // Strictly Simple join ... + if (outRec1 != outRec2) + return false; + op1b = j->OutPt1->Next; + while (op1b != op1 && (op1b->Pt == j->OffPt)) + op1b = op1b->Next; + bool reverse1 = (op1b->Pt.Y > j->OffPt.Y); + op2b = j->OutPt2->Next; + while (op2b != op2 && (op2b->Pt == j->OffPt)) + op2b = op2b->Next; + bool reverse2 = (op2b->Pt.Y > j->OffPt.Y); + if (reverse1 == reverse2) + return false; + if (reverse1) { + op1b = DupOutPt(op1, false); + op2b = DupOutPt(op2, true); + op1->Prev = op2; + op2->Next = op1; + op1b->Next = op2b; + op2b->Prev = op1b; + j->OutPt1 = op1; + j->OutPt2 = op1b; + return true; + } else { + op1b = DupOutPt(op1, true); + op2b = DupOutPt(op2, false); + op1->Next = op2; + op2->Prev = op1; + op1b->Prev = op2b; + op2b->Next = op1b; + j->OutPt1 = op1; + j->OutPt2 = op1b; + return true; + } + } else if (isHorizontal) { + // treat horizontal joins differently to non-horizontal joins since with + // them we're not yet sure where the overlapping is. OutPt1.Pt & OutPt2.Pt + // may be anywhere along the horizontal edge. + op1b = op1; + while (op1->Prev->Pt.Y == op1->Pt.Y && op1->Prev != op1b && + op1->Prev != op2) + op1 = op1->Prev; + while (op1b->Next->Pt.Y == op1b->Pt.Y && op1b->Next != op1 && + op1b->Next != op2) + op1b = op1b->Next; + if (op1b->Next == op1 || op1b->Next == op2) + return false; // a flat 'polygon' + + op2b = op2; + while (op2->Prev->Pt.Y == op2->Pt.Y && op2->Prev != op2b && + op2->Prev != op1b) + op2 = op2->Prev; + while (op2b->Next->Pt.Y == op2b->Pt.Y && op2b->Next != op2 && + op2b->Next != op1) + op2b = op2b->Next; + if (op2b->Next == op2 || op2b->Next == op1) + return false; // a flat 'polygon' + + cInt Left, Right; + // Op1 --> Op1b & Op2 --> Op2b are the extremites of the horizontal edges + if (!GetOverlap(op1->Pt.X, op1b->Pt.X, op2->Pt.X, op2b->Pt.X, Left, Right)) + return false; + + // DiscardLeftSide: when overlapping edges are joined, a spike will created + // which needs to be cleaned up. However, we don't want Op1 or Op2 caught up + // on the discard Side as either may still be needed for other joins ... + IntPoint Pt; + bool DiscardLeftSide; + if (op1->Pt.X >= Left && op1->Pt.X <= Right) { + Pt = op1->Pt; + DiscardLeftSide = (op1->Pt.X > op1b->Pt.X); + } else if (op2->Pt.X >= Left && op2->Pt.X <= Right) { + Pt = op2->Pt; + DiscardLeftSide = (op2->Pt.X > op2b->Pt.X); + } else if (op1b->Pt.X >= Left && op1b->Pt.X <= Right) { + Pt = op1b->Pt; + DiscardLeftSide = op1b->Pt.X > op1->Pt.X; + } else { + Pt = op2b->Pt; + DiscardLeftSide = (op2b->Pt.X > op2->Pt.X); + } + j->OutPt1 = op1; + j->OutPt2 = op2; + return JoinHorz(op1, op1b, op2, op2b, Pt, DiscardLeftSide); + } else { + // nb: For non-horizontal joins ... + // 1. Jr.OutPt1.Pt.Y == Jr.OutPt2.Pt.Y + // 2. Jr.OutPt1.Pt > Jr.OffPt.Y + + // make sure the polygons are correctly oriented ... + op1b = op1->Next; + while ((op1b->Pt == op1->Pt) && (op1b != op1)) + op1b = op1b->Next; + bool Reverse1 = ((op1b->Pt.Y > op1->Pt.Y) || + !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange)); + if (Reverse1) { + op1b = op1->Prev; + while ((op1b->Pt == op1->Pt) && (op1b != op1)) + op1b = op1b->Prev; + if ((op1b->Pt.Y > op1->Pt.Y) || + !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange)) + return false; + }; + op2b = op2->Next; + while ((op2b->Pt == op2->Pt) && (op2b != op2)) + op2b = op2b->Next; + bool Reverse2 = ((op2b->Pt.Y > op2->Pt.Y) || + !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange)); + if (Reverse2) { + op2b = op2->Prev; + while ((op2b->Pt == op2->Pt) && (op2b != op2)) + op2b = op2b->Prev; + if ((op2b->Pt.Y > op2->Pt.Y) || + !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange)) + return false; + } + + if ((op1b == op1) || (op2b == op2) || (op1b == op2b) || + ((outRec1 == outRec2) && (Reverse1 == Reverse2))) + return false; + + if (Reverse1) { + op1b = DupOutPt(op1, false); + op2b = DupOutPt(op2, true); + op1->Prev = op2; + op2->Next = op1; + op1b->Next = op2b; + op2b->Prev = op1b; + j->OutPt1 = op1; + j->OutPt2 = op1b; + return true; + } else { + op1b = DupOutPt(op1, true); + op2b = DupOutPt(op2, false); + op1->Next = op2; + op2->Prev = op1; + op1b->Prev = op2b; + op2b->Next = op1b; + j->OutPt1 = op1; + j->OutPt2 = op1b; + return true; + } + } +} +//---------------------------------------------------------------------- + +static OutRec *ParseFirstLeft(OutRec *FirstLeft) { + while (FirstLeft && !FirstLeft->Pts) + FirstLeft = FirstLeft->FirstLeft; + return FirstLeft; +} +//------------------------------------------------------------------------------ + +void Clipper::FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec) { + // tests if NewOutRec contains the polygon before reassigning FirstLeft + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) { + OutRec *outRec = m_PolyOuts[i]; + OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft); + if (outRec->Pts && firstLeft == OldOutRec) { + if (Poly2ContainsPoly1(outRec->Pts, NewOutRec->Pts)) + outRec->FirstLeft = NewOutRec; + } + } +} +//---------------------------------------------------------------------- + +void Clipper::FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec) { + // A polygon has split into two such that one is now the inner of the other. + // It's possible that these polygons now wrap around other polygons, so check + // every polygon that's also contained by OuterOutRec's FirstLeft container + //(including 0) to see if they've become inner to the new inner polygon ... + OutRec *orfl = OuterOutRec->FirstLeft; + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) { + OutRec *outRec = m_PolyOuts[i]; + + if (!outRec->Pts || outRec == OuterOutRec || outRec == InnerOutRec) + continue; + OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft); + if (firstLeft != orfl && firstLeft != InnerOutRec && + firstLeft != OuterOutRec) + continue; + if (Poly2ContainsPoly1(outRec->Pts, InnerOutRec->Pts)) + outRec->FirstLeft = InnerOutRec; + else if (Poly2ContainsPoly1(outRec->Pts, OuterOutRec->Pts)) + outRec->FirstLeft = OuterOutRec; + else if (outRec->FirstLeft == InnerOutRec || + outRec->FirstLeft == OuterOutRec) + outRec->FirstLeft = orfl; + } +} +//---------------------------------------------------------------------- +void Clipper::FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec) { + // reassigns FirstLeft WITHOUT testing if NewOutRec contains the polygon + for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) { + OutRec *outRec = m_PolyOuts[i]; + OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft); + if (outRec->Pts && firstLeft == OldOutRec) + outRec->FirstLeft = NewOutRec; + } +} +//---------------------------------------------------------------------- + +void Clipper::JoinCommonEdges() { + for (JoinList::size_type i = 0; i < m_Joins.size(); i++) { + Join *join = m_Joins[i]; + + OutRec *outRec1 = GetOutRec(join->OutPt1->Idx); + OutRec *outRec2 = GetOutRec(join->OutPt2->Idx); + + if (!outRec1->Pts || !outRec2->Pts) + continue; + if (outRec1->IsOpen || outRec2->IsOpen) + continue; + + // get the polygon fragment with the correct hole state (FirstLeft) + // before calling JoinPoints() ... + OutRec *holeStateRec; + if (outRec1 == outRec2) + holeStateRec = outRec1; + else if (OutRec1RightOfOutRec2(outRec1, outRec2)) + holeStateRec = outRec2; + else if (OutRec1RightOfOutRec2(outRec2, outRec1)) + holeStateRec = outRec1; + else + holeStateRec = GetLowermostRec(outRec1, outRec2); + + if (!JoinPoints(join, outRec1, outRec2)) + continue; + + if (outRec1 == outRec2) { + // instead of joining two polygons, we've just created a new one by + // splitting one polygon into two. + outRec1->Pts = join->OutPt1; + outRec1->BottomPt = 0; + outRec2 = CreateOutRec(); + outRec2->Pts = join->OutPt2; + + // update all OutRec2.Pts Idx's ... + UpdateOutPtIdxs(*outRec2); + + if (Poly2ContainsPoly1(outRec2->Pts, outRec1->Pts)) { + // outRec1 contains outRec2 ... + outRec2->IsHole = !outRec1->IsHole; + outRec2->FirstLeft = outRec1; + + if (m_UsingPolyTree) + FixupFirstLefts2(outRec2, outRec1); + + if ((outRec2->IsHole ^ m_ReverseOutput) == (Area(*outRec2) > 0)) + ReversePolyPtLinks(outRec2->Pts); + + } else if (Poly2ContainsPoly1(outRec1->Pts, outRec2->Pts)) { + // outRec2 contains outRec1 ... + outRec2->IsHole = outRec1->IsHole; + outRec1->IsHole = !outRec2->IsHole; + outRec2->FirstLeft = outRec1->FirstLeft; + outRec1->FirstLeft = outRec2; + + if (m_UsingPolyTree) + FixupFirstLefts2(outRec1, outRec2); + + if ((outRec1->IsHole ^ m_ReverseOutput) == (Area(*outRec1) > 0)) + ReversePolyPtLinks(outRec1->Pts); + } else { + // the 2 polygons are completely separate ... + outRec2->IsHole = outRec1->IsHole; + outRec2->FirstLeft = outRec1->FirstLeft; + + // fixup FirstLeft pointers that may need reassigning to OutRec2 + if (m_UsingPolyTree) + FixupFirstLefts1(outRec1, outRec2); + } + + } else { + // joined 2 polygons together ... + + outRec2->Pts = 0; + outRec2->BottomPt = 0; + outRec2->Idx = outRec1->Idx; + + outRec1->IsHole = holeStateRec->IsHole; + if (holeStateRec == outRec2) + outRec1->FirstLeft = outRec2->FirstLeft; + outRec2->FirstLeft = outRec1; + + if (m_UsingPolyTree) + FixupFirstLefts3(outRec2, outRec1); + } + } +} + +//------------------------------------------------------------------------------ +// ClipperOffset support functions ... +//------------------------------------------------------------------------------ + +DoublePoint GetUnitNormal(const IntPoint &pt1, const IntPoint &pt2) { + if (pt2.X == pt1.X && pt2.Y == pt1.Y) + return DoublePoint(0, 0); + + double Dx = (double)(pt2.X - pt1.X); + double dy = (double)(pt2.Y - pt1.Y); + double f = 1 * 1.0 / std::sqrt(Dx * Dx + dy * dy); + Dx *= f; + dy *= f; + return DoublePoint(dy, -Dx); +} + +//------------------------------------------------------------------------------ +// ClipperOffset class +//------------------------------------------------------------------------------ + +ClipperOffset::ClipperOffset(double miterLimit, double arcTolerance) { + this->MiterLimit = miterLimit; + this->ArcTolerance = arcTolerance; + m_lowest.X = -1; +} +//------------------------------------------------------------------------------ + +ClipperOffset::~ClipperOffset() { Clear(); } +//------------------------------------------------------------------------------ + +void ClipperOffset::Clear() { + for (int i = 0; i < m_polyNodes.ChildCount(); ++i) + delete m_polyNodes.Childs[i]; + m_polyNodes.Childs.clear(); + m_lowest.X = -1; +} +//------------------------------------------------------------------------------ + +void ClipperOffset::AddPath(const Path &path, JoinType joinType, + EndType endType) { + int highI = (int)path.size() - 1; + if (highI < 0) + return; + PolyNode *newNode = new PolyNode(); + newNode->m_jointype = joinType; + newNode->m_endtype = endType; + + // strip duplicate points from path and also get index to the lowest point ... + if (endType == etClosedLine || endType == etClosedPolygon) + while (highI > 0 && path[0] == path[highI]) + highI--; + newNode->Contour.reserve(highI + 1); + newNode->Contour.push_back(path[0]); + int j = 0, k = 0; + for (int i = 1; i <= highI; i++) + if (newNode->Contour[j] != path[i]) { + j++; + newNode->Contour.push_back(path[i]); + if (path[i].Y > newNode->Contour[k].Y || + (path[i].Y == newNode->Contour[k].Y && + path[i].X < newNode->Contour[k].X)) + k = j; + } + if (endType == etClosedPolygon && j < 2) { + delete newNode; + return; + } + m_polyNodes.AddChild(*newNode); + + // if this path's lowest pt is lower than all the others then update m_lowest + if (endType != etClosedPolygon) + return; + if (m_lowest.X < 0) + m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k); + else { + IntPoint ip = m_polyNodes.Childs[(int)m_lowest.X]->Contour[(int)m_lowest.Y]; + if (newNode->Contour[k].Y > ip.Y || + (newNode->Contour[k].Y == ip.Y && newNode->Contour[k].X < ip.X)) + m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k); + } +} +//------------------------------------------------------------------------------ + +void ClipperOffset::AddPaths(const Paths &paths, JoinType joinType, + EndType endType) { + for (Paths::size_type i = 0; i < paths.size(); ++i) + AddPath(paths[i], joinType, endType); +} +//------------------------------------------------------------------------------ + +void ClipperOffset::FixOrientations() { + // fixup orientations of all closed paths if the orientation of the + // closed path with the lowermost vertex is wrong ... + if (m_lowest.X >= 0 && + !Orientation(m_polyNodes.Childs[(int)m_lowest.X]->Contour)) { + for (int i = 0; i < m_polyNodes.ChildCount(); ++i) { + PolyNode &node = *m_polyNodes.Childs[i]; + if (node.m_endtype == etClosedPolygon || + (node.m_endtype == etClosedLine && Orientation(node.Contour))) + ReversePath(node.Contour); + } + } else { + for (int i = 0; i < m_polyNodes.ChildCount(); ++i) { + PolyNode &node = *m_polyNodes.Childs[i]; + if (node.m_endtype == etClosedLine && !Orientation(node.Contour)) + ReversePath(node.Contour); + } + } +} +//------------------------------------------------------------------------------ + +void ClipperOffset::Execute(Paths &solution, double delta) { + solution.clear(); + FixOrientations(); + DoOffset(delta); + + // now clean up 'corners' ... + Clipper clpr; + clpr.AddPaths(m_destPolys, ptSubject, true); + if (delta > 0) { + clpr.Execute(ctUnion, solution, pftPositive, pftPositive); + } else { + IntRect r = clpr.GetBounds(); + Path outer(4); + outer[0] = IntPoint(r.left - 10, r.bottom + 10); + outer[1] = IntPoint(r.right + 10, r.bottom + 10); + outer[2] = IntPoint(r.right + 10, r.top - 10); + outer[3] = IntPoint(r.left - 10, r.top - 10); + + clpr.AddPath(outer, ptSubject, true); + clpr.ReverseSolution(true); + clpr.Execute(ctUnion, solution, pftNegative, pftNegative); + if (solution.size() > 0) + solution.erase(solution.begin()); + } +} +//------------------------------------------------------------------------------ + +void ClipperOffset::Execute(PolyTree &solution, double delta) { + solution.Clear(); + FixOrientations(); + DoOffset(delta); + + // now clean up 'corners' ... + Clipper clpr; + clpr.AddPaths(m_destPolys, ptSubject, true); + if (delta > 0) { + clpr.Execute(ctUnion, solution, pftPositive, pftPositive); + } else { + IntRect r = clpr.GetBounds(); + Path outer(4); + outer[0] = IntPoint(r.left - 10, r.bottom + 10); + outer[1] = IntPoint(r.right + 10, r.bottom + 10); + outer[2] = IntPoint(r.right + 10, r.top - 10); + outer[3] = IntPoint(r.left - 10, r.top - 10); + + clpr.AddPath(outer, ptSubject, true); + clpr.ReverseSolution(true); + clpr.Execute(ctUnion, solution, pftNegative, pftNegative); + // remove the outer PolyNode rectangle ... + if (solution.ChildCount() == 1 && solution.Childs[0]->ChildCount() > 0) { + PolyNode *outerNode = solution.Childs[0]; + solution.Childs.reserve(outerNode->ChildCount()); + solution.Childs[0] = outerNode->Childs[0]; + solution.Childs[0]->Parent = outerNode->Parent; + for (int i = 1; i < outerNode->ChildCount(); ++i) + solution.AddChild(*outerNode->Childs[i]); + } else + solution.Clear(); + } +} +//------------------------------------------------------------------------------ + +void ClipperOffset::DoOffset(double delta) { + m_destPolys.clear(); + m_delta = delta; + + // if Zero offset, just copy any CLOSED polygons to m_p and return ... + if (NEAR_ZERO(delta)) { + m_destPolys.reserve(m_polyNodes.ChildCount()); + for (int i = 0; i < m_polyNodes.ChildCount(); i++) { + PolyNode &node = *m_polyNodes.Childs[i]; + if (node.m_endtype == etClosedPolygon) + m_destPolys.push_back(node.Contour); + } + return; + } + + // see offset_triginometry3.svg in the documentation folder ... + if (MiterLimit > 2) + m_miterLim = 2 / (MiterLimit * MiterLimit); + else + m_miterLim = 0.5; + + double y; + if (ArcTolerance <= 0.0) + y = def_arc_tolerance; + else if (ArcTolerance > std::fabs(delta) * def_arc_tolerance) + y = std::fabs(delta) * def_arc_tolerance; + else + y = ArcTolerance; + // see offset_triginometry2.svg in the documentation folder ... + double steps = pi / std::acos(1 - y / std::fabs(delta)); + if (steps > std::fabs(delta) * pi) + steps = std::fabs(delta) * pi; // ie excessive precision check + m_sin = std::sin(two_pi / steps); + m_cos = std::cos(two_pi / steps); + m_StepsPerRad = steps / two_pi; + if (delta < 0.0) + m_sin = -m_sin; + + m_destPolys.reserve(m_polyNodes.ChildCount() * 2); + for (int i = 0; i < m_polyNodes.ChildCount(); i++) { + PolyNode &node = *m_polyNodes.Childs[i]; + m_srcPoly = node.Contour; + + int len = (int)m_srcPoly.size(); + if (len == 0 || + (delta <= 0 && (len < 3 || node.m_endtype != etClosedPolygon))) + continue; + + m_destPoly.clear(); + if (len == 1) { + if (node.m_jointype == jtRound) { + double X = 1.0, Y = 0.0; + for (cInt j = 1; j <= steps; j++) { + m_destPoly.push_back(IntPoint(Round(m_srcPoly[0].X + X * delta), + Round(m_srcPoly[0].Y + Y * delta))); + double X2 = X; + X = X * m_cos - m_sin * Y; + Y = X2 * m_sin + Y * m_cos; + } + } else { + double X = -1.0, Y = -1.0; + for (int j = 0; j < 4; ++j) { + m_destPoly.push_back(IntPoint(Round(m_srcPoly[0].X + X * delta), + Round(m_srcPoly[0].Y + Y * delta))); + if (X < 0) + X = 1; + else if (Y < 0) + Y = 1; + else + X = -1; + } + } + m_destPolys.push_back(m_destPoly); + continue; + } + // build m_normals ... + m_normals.clear(); + m_normals.reserve(len); + for (int j = 0; j < len - 1; ++j) + m_normals.push_back(GetUnitNormal(m_srcPoly[j], m_srcPoly[j + 1])); + if (node.m_endtype == etClosedLine || node.m_endtype == etClosedPolygon) + m_normals.push_back(GetUnitNormal(m_srcPoly[len - 1], m_srcPoly[0])); + else + m_normals.push_back(DoublePoint(m_normals[len - 2])); + + if (node.m_endtype == etClosedPolygon) { + int k = len - 1; + for (int j = 0; j < len; ++j) + OffsetPoint(j, k, node.m_jointype); + m_destPolys.push_back(m_destPoly); + } else if (node.m_endtype == etClosedLine) { + int k = len - 1; + for (int j = 0; j < len; ++j) + OffsetPoint(j, k, node.m_jointype); + m_destPolys.push_back(m_destPoly); + m_destPoly.clear(); + // re-build m_normals ... + DoublePoint n = m_normals[len - 1]; + for (int j = len - 1; j > 0; j--) + m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y); + m_normals[0] = DoublePoint(-n.X, -n.Y); + k = 0; + for (int j = len - 1; j >= 0; j--) + OffsetPoint(j, k, node.m_jointype); + m_destPolys.push_back(m_destPoly); + } else { + int k = 0; + for (int j = 1; j < len - 1; ++j) + OffsetPoint(j, k, node.m_jointype); + + IntPoint pt1; + if (node.m_endtype == etOpenButt) { + int j = len - 1; + pt1 = IntPoint((cInt)Round(m_srcPoly[j].X + m_normals[j].X * delta), + (cInt)Round(m_srcPoly[j].Y + m_normals[j].Y * delta)); + m_destPoly.push_back(pt1); + pt1 = IntPoint((cInt)Round(m_srcPoly[j].X - m_normals[j].X * delta), + (cInt)Round(m_srcPoly[j].Y - m_normals[j].Y * delta)); + m_destPoly.push_back(pt1); + } else { + int j = len - 1; + k = len - 2; + m_sinA = 0; + m_normals[j] = DoublePoint(-m_normals[j].X, -m_normals[j].Y); + if (node.m_endtype == etOpenSquare) + DoSquare(j, k); + else + DoRound(j, k); + } + + // re-build m_normals ... + for (int j = len - 1; j > 0; j--) + m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y); + m_normals[0] = DoublePoint(-m_normals[1].X, -m_normals[1].Y); + + k = len - 1; + for (int j = k - 1; j > 0; --j) + OffsetPoint(j, k, node.m_jointype); + + if (node.m_endtype == etOpenButt) { + pt1 = IntPoint((cInt)Round(m_srcPoly[0].X - m_normals[0].X * delta), + (cInt)Round(m_srcPoly[0].Y - m_normals[0].Y * delta)); + m_destPoly.push_back(pt1); + pt1 = IntPoint((cInt)Round(m_srcPoly[0].X + m_normals[0].X * delta), + (cInt)Round(m_srcPoly[0].Y + m_normals[0].Y * delta)); + m_destPoly.push_back(pt1); + } else { + k = 1; + m_sinA = 0; + if (node.m_endtype == etOpenSquare) + DoSquare(0, 1); + else + DoRound(0, 1); + } + m_destPolys.push_back(m_destPoly); + } + } +} +//------------------------------------------------------------------------------ + +void ClipperOffset::OffsetPoint(int j, int &k, JoinType jointype) { + // cross product ... + m_sinA = (m_normals[k].X * m_normals[j].Y - m_normals[j].X * m_normals[k].Y); + if (std::fabs(m_sinA * m_delta) < 1.0) { + // dot product ... + double cosA = + (m_normals[k].X * m_normals[j].X + m_normals[j].Y * m_normals[k].Y); + if (cosA > 0) // angle => 0 degrees + { + m_destPoly.push_back( + IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta), + Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta))); + return; + } + // else angle => 180 degrees + } else if (m_sinA > 1.0) + m_sinA = 1.0; + else if (m_sinA < -1.0) + m_sinA = -1.0; + + if (m_sinA * m_delta < 0) { + m_destPoly.push_back( + IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta), + Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta))); + m_destPoly.push_back(m_srcPoly[j]); + m_destPoly.push_back( + IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta), + Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta))); + } else + switch (jointype) { + case jtMiter: { + double r = 1 + (m_normals[j].X * m_normals[k].X + + m_normals[j].Y * m_normals[k].Y); + if (r >= m_miterLim) + DoMiter(j, k, r); + else + DoSquare(j, k); + break; + } + case jtSquare: + DoSquare(j, k); + break; + case jtRound: + DoRound(j, k); + break; + } + k = j; +} +//------------------------------------------------------------------------------ + +void ClipperOffset::DoSquare(int j, int k) { + double dx = std::tan(std::atan2(m_sinA, m_normals[k].X * m_normals[j].X + + m_normals[k].Y * m_normals[j].Y) / + 4); + m_destPoly.push_back(IntPoint( + Round(m_srcPoly[j].X + m_delta * (m_normals[k].X - m_normals[k].Y * dx)), + Round(m_srcPoly[j].Y + + m_delta * (m_normals[k].Y + m_normals[k].X * dx)))); + m_destPoly.push_back(IntPoint( + Round(m_srcPoly[j].X + m_delta * (m_normals[j].X + m_normals[j].Y * dx)), + Round(m_srcPoly[j].Y + + m_delta * (m_normals[j].Y - m_normals[j].X * dx)))); +} +//------------------------------------------------------------------------------ + +void ClipperOffset::DoMiter(int j, int k, double r) { + double q = m_delta / r; + m_destPoly.push_back( + IntPoint(Round(m_srcPoly[j].X + (m_normals[k].X + m_normals[j].X) * q), + Round(m_srcPoly[j].Y + (m_normals[k].Y + m_normals[j].Y) * q))); +} +//------------------------------------------------------------------------------ + +void ClipperOffset::DoRound(int j, int k) { + double a = std::atan2(m_sinA, m_normals[k].X * m_normals[j].X + + m_normals[k].Y * m_normals[j].Y); + int steps = std::max((int)Round(m_StepsPerRad * std::fabs(a)), 1); + + double X = m_normals[k].X, Y = m_normals[k].Y, X2; + for (int i = 0; i < steps; ++i) { + m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + X * m_delta), + Round(m_srcPoly[j].Y + Y * m_delta))); + X2 = X; + X = X * m_cos - m_sin * Y; + Y = X2 * m_sin + Y * m_cos; + } + m_destPoly.push_back( + IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta), + Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta))); +} + +//------------------------------------------------------------------------------ +// Miscellaneous public functions +//------------------------------------------------------------------------------ + +void Clipper::DoSimplePolygons() { + PolyOutList::size_type i = 0; + while (i < m_PolyOuts.size()) { + OutRec *outrec = m_PolyOuts[i++]; + OutPt *op = outrec->Pts; + if (!op || outrec->IsOpen) + continue; + do // for each Pt in Polygon until duplicate found do ... + { + OutPt *op2 = op->Next; + while (op2 != outrec->Pts) { + if ((op->Pt == op2->Pt) && op2->Next != op && op2->Prev != op) { + // split the polygon into two ... + OutPt *op3 = op->Prev; + OutPt *op4 = op2->Prev; + op->Prev = op4; + op4->Next = op; + op2->Prev = op3; + op3->Next = op2; + + outrec->Pts = op; + OutRec *outrec2 = CreateOutRec(); + outrec2->Pts = op2; + UpdateOutPtIdxs(*outrec2); + if (Poly2ContainsPoly1(outrec2->Pts, outrec->Pts)) { + // OutRec2 is contained by OutRec1 ... + outrec2->IsHole = !outrec->IsHole; + outrec2->FirstLeft = outrec; + if (m_UsingPolyTree) + FixupFirstLefts2(outrec2, outrec); + } else if (Poly2ContainsPoly1(outrec->Pts, outrec2->Pts)) { + // OutRec1 is contained by OutRec2 ... + outrec2->IsHole = outrec->IsHole; + outrec->IsHole = !outrec2->IsHole; + outrec2->FirstLeft = outrec->FirstLeft; + outrec->FirstLeft = outrec2; + if (m_UsingPolyTree) + FixupFirstLefts2(outrec, outrec2); + } else { + // the 2 polygons are separate ... + outrec2->IsHole = outrec->IsHole; + outrec2->FirstLeft = outrec->FirstLeft; + if (m_UsingPolyTree) + FixupFirstLefts1(outrec, outrec2); + } + op2 = op; // ie get ready for the Next iteration + } + op2 = op2->Next; + } + op = op->Next; + } while (op != outrec->Pts); + } +} +//------------------------------------------------------------------------------ + +void ReversePath(Path &p) { std::reverse(p.begin(), p.end()); } +//------------------------------------------------------------------------------ + +void ReversePaths(Paths &p) { + for (Paths::size_type i = 0; i < p.size(); ++i) + ReversePath(p[i]); +} +//------------------------------------------------------------------------------ + +void SimplifyPolygon(const Path &in_poly, Paths &out_polys, + PolyFillType fillType) { + Clipper c; + c.StrictlySimple(true); + c.AddPath(in_poly, ptSubject, true); + c.Execute(ctUnion, out_polys, fillType, fillType); +} +//------------------------------------------------------------------------------ + +void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, + PolyFillType fillType) { + Clipper c; + c.StrictlySimple(true); + c.AddPaths(in_polys, ptSubject, true); + c.Execute(ctUnion, out_polys, fillType, fillType); +} +//------------------------------------------------------------------------------ + +void SimplifyPolygons(Paths &polys, PolyFillType fillType) { + SimplifyPolygons(polys, polys, fillType); +} +//------------------------------------------------------------------------------ + +inline double DistanceSqrd(const IntPoint &pt1, const IntPoint &pt2) { + double Dx = ((double)pt1.X - pt2.X); + double dy = ((double)pt1.Y - pt2.Y); + return (Dx * Dx + dy * dy); +} +//------------------------------------------------------------------------------ + +double DistanceFromLineSqrd(const IntPoint &pt, const IntPoint &ln1, + const IntPoint &ln2) { + // The equation of a line in general form (Ax + By + C = 0) + // given 2 points (x�,y�) & (x�,y�) is ... + //(y� - y�)x + (x� - x�)y + (y� - y�)x� - (x� - x�)y� = 0 + // A = (y� - y�); B = (x� - x�); C = (y� - y�)x� - (x� - x�)y� + // perpendicular distance of point (x�,y�) = (Ax� + By� + C)/Sqrt(A� + B�) + // see http://en.wikipedia.org/wiki/Perpendicular_distance + double A = double(ln1.Y - ln2.Y); + double B = double(ln2.X - ln1.X); + double C = A * ln1.X + B * ln1.Y; + C = A * pt.X + B * pt.Y - C; + return (C * C) / (A * A + B * B); +} +//--------------------------------------------------------------------------- + +bool SlopesNearCollinear(const IntPoint &pt1, const IntPoint &pt2, + const IntPoint &pt3, double distSqrd) { + // this function is more accurate when the point that's geometrically + // between the other 2 points is the one that's tested for distance. + // ie makes it more likely to pick up 'spikes' ... + if (Abs(pt1.X - pt2.X) > Abs(pt1.Y - pt2.Y)) { + if ((pt1.X > pt2.X) == (pt1.X < pt3.X)) + return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd; + else if ((pt2.X > pt1.X) == (pt2.X < pt3.X)) + return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd; + else + return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd; + } else { + if ((pt1.Y > pt2.Y) == (pt1.Y < pt3.Y)) + return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd; + else if ((pt2.Y > pt1.Y) == (pt2.Y < pt3.Y)) + return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd; + else + return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd; + } +} +//------------------------------------------------------------------------------ + +bool PointsAreClose(IntPoint pt1, IntPoint pt2, double distSqrd) { + double Dx = (double)pt1.X - pt2.X; + double dy = (double)pt1.Y - pt2.Y; + return ((Dx * Dx) + (dy * dy) <= distSqrd); +} +//------------------------------------------------------------------------------ + +OutPt *ExcludeOp(OutPt *op) { + OutPt *result = op->Prev; + result->Next = op->Next; + op->Next->Prev = result; + result->Idx = 0; + return result; +} +//------------------------------------------------------------------------------ + +void CleanPolygon(const Path &in_poly, Path &out_poly, double distance) { + // distance = proximity in units/pixels below which vertices + // will be stripped. Default ~= sqrt(2). + + size_t size = in_poly.size(); + + if (size == 0) { + out_poly.clear(); + return; + } + + OutPt *outPts = new OutPt[size]; + for (size_t i = 0; i < size; ++i) { + outPts[i].Pt = in_poly[i]; + outPts[i].Next = &outPts[(i + 1) % size]; + outPts[i].Next->Prev = &outPts[i]; + outPts[i].Idx = 0; + } + + double distSqrd = distance * distance; + OutPt *op = &outPts[0]; + while (op->Idx == 0 && op->Next != op->Prev) { + if (PointsAreClose(op->Pt, op->Prev->Pt, distSqrd)) { + op = ExcludeOp(op); + size--; + } else if (PointsAreClose(op->Prev->Pt, op->Next->Pt, distSqrd)) { + ExcludeOp(op->Next); + op = ExcludeOp(op); + size -= 2; + } else if (SlopesNearCollinear(op->Prev->Pt, op->Pt, op->Next->Pt, + distSqrd)) { + op = ExcludeOp(op); + size--; + } else { + op->Idx = 1; + op = op->Next; + } + } + + if (size < 3) + size = 0; + out_poly.resize(size); + for (size_t i = 0; i < size; ++i) { + out_poly[i] = op->Pt; + op = op->Next; + } + delete[] outPts; +} +//------------------------------------------------------------------------------ + +void CleanPolygon(Path &poly, double distance) { + CleanPolygon(poly, poly, distance); +} +//------------------------------------------------------------------------------ + +void CleanPolygons(const Paths &in_polys, Paths &out_polys, double distance) { + out_polys.resize(in_polys.size()); + for (Paths::size_type i = 0; i < in_polys.size(); ++i) + CleanPolygon(in_polys[i], out_polys[i], distance); +} +//------------------------------------------------------------------------------ + +void CleanPolygons(Paths &polys, double distance) { + CleanPolygons(polys, polys, distance); +} +//------------------------------------------------------------------------------ + +void Minkowski(const Path &poly, const Path &path, Paths &solution, bool isSum, + bool isClosed) { + int delta = (isClosed ? 1 : 0); + size_t polyCnt = poly.size(); + size_t pathCnt = path.size(); + Paths pp; + pp.reserve(pathCnt); + if (isSum) + for (size_t i = 0; i < pathCnt; ++i) { + Path p; + p.reserve(polyCnt); + for (size_t j = 0; j < poly.size(); ++j) + p.push_back(IntPoint(path[i].X + poly[j].X, path[i].Y + poly[j].Y)); + pp.push_back(p); + } + else + for (size_t i = 0; i < pathCnt; ++i) { + Path p; + p.reserve(polyCnt); + for (size_t j = 0; j < poly.size(); ++j) + p.push_back(IntPoint(path[i].X - poly[j].X, path[i].Y - poly[j].Y)); + pp.push_back(p); + } + + solution.clear(); + solution.reserve((pathCnt + delta) * (polyCnt + 1)); + for (size_t i = 0; i < pathCnt - 1 + delta; ++i) + for (size_t j = 0; j < polyCnt; ++j) { + Path quad; + quad.reserve(4); + quad.push_back(pp[i % pathCnt][j % polyCnt]); + quad.push_back(pp[(i + 1) % pathCnt][j % polyCnt]); + quad.push_back(pp[(i + 1) % pathCnt][(j + 1) % polyCnt]); + quad.push_back(pp[i % pathCnt][(j + 1) % polyCnt]); + if (!Orientation(quad)) + ReversePath(quad); + solution.push_back(quad); + } +} +//------------------------------------------------------------------------------ + +void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution, + bool pathIsClosed) { + Minkowski(pattern, path, solution, true, pathIsClosed); + Clipper c; + c.AddPaths(solution, ptSubject, true); + c.Execute(ctUnion, solution, pftNonZero, pftNonZero); +} +//------------------------------------------------------------------------------ + +void TranslatePath(const Path &input, Path &output, const IntPoint delta) { + // precondition: input != output + output.resize(input.size()); + for (size_t i = 0; i < input.size(); ++i) + output[i] = IntPoint(input[i].X + delta.X, input[i].Y + delta.Y); +} +//------------------------------------------------------------------------------ + +void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution, + bool pathIsClosed) { + Clipper c; + for (size_t i = 0; i < paths.size(); ++i) { + Paths tmp; + Minkowski(pattern, paths[i], tmp, true, pathIsClosed); + c.AddPaths(tmp, ptSubject, true); + if (pathIsClosed) { + Path tmp2; + TranslatePath(paths[i], tmp2, pattern[0]); + c.AddPath(tmp2, ptClip, true); + } + } + c.Execute(ctUnion, solution, pftNonZero, pftNonZero); +} +//------------------------------------------------------------------------------ + +void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution) { + Minkowski(poly1, poly2, solution, false, true); + Clipper c; + c.AddPaths(solution, ptSubject, true); + c.Execute(ctUnion, solution, pftNonZero, pftNonZero); +} +//------------------------------------------------------------------------------ + +enum NodeType { ntAny, ntOpen, ntClosed }; + +void AddPolyNodeToPaths(const PolyNode &polynode, NodeType nodetype, + Paths &paths) { + bool match = true; + if (nodetype == ntClosed) + match = !polynode.IsOpen(); + else if (nodetype == ntOpen) + return; + + if (!polynode.Contour.empty() && match) + paths.push_back(polynode.Contour); + for (int i = 0; i < polynode.ChildCount(); ++i) + AddPolyNodeToPaths(*polynode.Childs[i], nodetype, paths); +} +//------------------------------------------------------------------------------ + +void PolyTreeToPaths(const PolyTree &polytree, Paths &paths) { + paths.resize(0); + paths.reserve(polytree.Total()); + AddPolyNodeToPaths(polytree, ntAny, paths); +} +//------------------------------------------------------------------------------ + +void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths) { + paths.resize(0); + paths.reserve(polytree.Total()); + AddPolyNodeToPaths(polytree, ntClosed, paths); +} +//------------------------------------------------------------------------------ + +void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths) { + paths.resize(0); + paths.reserve(polytree.Total()); + // Open paths are top level only, so ... + for (int i = 0; i < polytree.ChildCount(); ++i) + if (polytree.Childs[i]->IsOpen()) + paths.push_back(polytree.Childs[i]->Contour); +} +//------------------------------------------------------------------------------ + +std::ostream &operator<<(std::ostream &s, const IntPoint &p) { + s << "(" << p.X << "," << p.Y << ")"; + return s; +} +//------------------------------------------------------------------------------ + +std::ostream &operator<<(std::ostream &s, const Path &p) { + if (p.empty()) + return s; + Path::size_type last = p.size() - 1; + for (Path::size_type i = 0; i < last; i++) + s << "(" << p[i].X << "," << p[i].Y << "), "; + s << "(" << p[last].X << "," << p[last].Y << ")\n"; + return s; +} +//------------------------------------------------------------------------------ + +std::ostream &operator<<(std::ostream &s, const Paths &p) { + for (Paths::size_type i = 0; i < p.size(); i++) + s << p[i]; + s << "\n"; + return s; +} +//------------------------------------------------------------------------------ + +} // namespace ClipperLib diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h new file mode 100755 index 0000000000..d19e95ca2c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h @@ -0,0 +1,421 @@ +/******************************************************************************* + * * + * Author : Angus Johnson * Version : 6.4.2 * Date : 27 February + *2017 * Website : + *http://www.angusj.com * Copyright : + *Angus Johnson 2010-2017 * + * * + * License: * Use, modification & distribution is subject to Boost Software + *License Ver 1. * http://www.boost.org/LICENSE_1_0.txt * + * * + * Attributions: * The code in this library is an extension of Bala Vatti's + *clipping algorithm: * "A generic solution to polygon clipping" * + * Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. * + * http://portal.acm.org/citation.cfm?id=129906 * + * * + * Computer graphics and geometric modeling: implementation and algorithms * By + *Max K. Agoston * + * Springer; 1 edition (January 4, 2005) * + * http://books.google.com/books?q=vatti+clipping+agoston * + * * + * See also: * "Polygon Offsetting by Computing Winding Numbers" * Paper no. + *DETC2005-85513 pp. 565-575 * ASME 2005 + *International Design Engineering Technical Conferences * and + *Computers and Information in Engineering Conference (IDETC/CIE2005) * + * September 24-28, 2005 , Long Beach, California, USA * + * http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf * + * * + *******************************************************************************/ + +#pragma once + +#ifndef clipper_hpp +#define clipper_hpp + +#define CLIPPER_VERSION "6.4.2" + +// use_int32: When enabled 32bit ints are used instead of 64bit ints. This +// improve performance but coordinate values are limited to the range +/- 46340 +//#define use_int32 + +// use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance. +//#define use_xyz + +// use_lines: Enables line clipping. Adds a very minor cost to performance. +#define use_lines + +// use_deprecated: Enables temporary support for the obsolete functions +//#define use_deprecated + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ClipperLib { + +enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor }; +enum PolyType { ptSubject, ptClip }; +// By far the most widely used winding rules for polygon filling are +// EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32) +// Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL) +// see http://glprogramming.com/red/chapter11.html +enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative }; + +#ifdef use_int32 +typedef int cInt; +static cInt const loRange = 0x7FFF; +static cInt const hiRange = 0x7FFF; +#else +typedef signed long long cInt; +static cInt const loRange = 0x3FFFFFFF; +static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL; +typedef signed long long long64; // used by Int128 class +typedef unsigned long long ulong64; + +#endif + +struct IntPoint { + cInt X; + cInt Y; +#ifdef use_xyz + cInt Z; + IntPoint(cInt x = 0, cInt y = 0, cInt z = 0) : X(x), Y(y), Z(z){}; +#else + IntPoint(cInt x = 0, cInt y = 0) : X(x), Y(y){}; +#endif + + friend inline bool operator==(const IntPoint &a, const IntPoint &b) { + return a.X == b.X && a.Y == b.Y; + } + friend inline bool operator!=(const IntPoint &a, const IntPoint &b) { + return a.X != b.X || a.Y != b.Y; + } +}; +//------------------------------------------------------------------------------ + +typedef std::vector Path; +typedef std::vector Paths; + +inline Path &operator<<(Path &poly, const IntPoint &p) { + poly.push_back(p); + return poly; +} +inline Paths &operator<<(Paths &polys, const Path &p) { + polys.push_back(p); + return polys; +} + +std::ostream &operator<<(std::ostream &s, const IntPoint &p); +std::ostream &operator<<(std::ostream &s, const Path &p); +std::ostream &operator<<(std::ostream &s, const Paths &p); + +struct DoublePoint { + double X; + double Y; + DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {} + DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {} +}; +//------------------------------------------------------------------------------ + +#ifdef use_xyz +typedef void (*ZFillCallback)(IntPoint &e1bot, IntPoint &e1top, IntPoint &e2bot, + IntPoint &e2top, IntPoint &pt); +#endif + +enum InitOptions { + ioReverseSolution = 1, + ioStrictlySimple = 2, + ioPreserveCollinear = 4 +}; +enum JoinType { jtSquare, jtRound, jtMiter }; +enum EndType { + etClosedPolygon, + etClosedLine, + etOpenButt, + etOpenSquare, + etOpenRound +}; + +class PolyNode; +typedef std::vector PolyNodes; + +class PolyNode { +public: + PolyNode(); + virtual ~PolyNode(){}; + Path Contour; + PolyNodes Childs; + PolyNode *Parent; + PolyNode *GetNext() const; + bool IsHole() const; + bool IsOpen() const; + int ChildCount() const; + +private: + // PolyNode& operator =(PolyNode& other); + unsigned Index; // node index in Parent.Childs + bool m_IsOpen; + JoinType m_jointype; + EndType m_endtype; + PolyNode *GetNextSiblingUp() const; + void AddChild(PolyNode &child); + friend class Clipper; // to access Index + friend class ClipperOffset; +}; + +class PolyTree : public PolyNode { +public: + ~PolyTree() { Clear(); }; + PolyNode *GetFirst() const; + void Clear(); + int Total() const; + +private: + // PolyTree& operator =(PolyTree& other); + PolyNodes AllNodes; + friend class Clipper; // to access AllNodes +}; + +bool Orientation(const Path &poly); +double Area(const Path &poly); +int PointInPolygon(const IntPoint &pt, const Path &path); + +void SimplifyPolygon(const Path &in_poly, Paths &out_polys, + PolyFillType fillType = pftEvenOdd); +void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, + PolyFillType fillType = pftEvenOdd); +void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd); + +void CleanPolygon(const Path &in_poly, Path &out_poly, double distance = 1.415); +void CleanPolygon(Path &poly, double distance = 1.415); +void CleanPolygons(const Paths &in_polys, Paths &out_polys, + double distance = 1.415); +void CleanPolygons(Paths &polys, double distance = 1.415); + +void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution, + bool pathIsClosed); +void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution, + bool pathIsClosed); +void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution); + +void PolyTreeToPaths(const PolyTree &polytree, Paths &paths); +void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths); +void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths); + +void ReversePath(Path &p); +void ReversePaths(Paths &p); + +struct IntRect { + cInt left; + cInt top; + cInt right; + cInt bottom; +}; + +// enums that are used internally ... +enum EdgeSide { esLeft = 1, esRight = 2 }; + +// forward declarations (for stuff used internally) ... +struct TEdge; +struct IntersectNode; +struct LocalMinimum; +struct OutPt; +struct OutRec; +struct Join; + +typedef std::vector PolyOutList; +typedef std::vector EdgeList; +typedef std::vector JoinList; +typedef std::vector IntersectList; + +//------------------------------------------------------------------------------ + +// ClipperBase is the ancestor to the Clipper class. It should not be +// instantiated directly. This class simply abstracts the conversion of sets of +// polygon coordinates into edge objects that are stored in a LocalMinima list. +class ClipperBase { +public: + ClipperBase(); + virtual ~ClipperBase(); + virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed); + bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed); + virtual void Clear(); + IntRect GetBounds(); + bool PreserveCollinear() { return m_PreserveCollinear; }; + void PreserveCollinear(bool value) { m_PreserveCollinear = value; }; + +protected: + void DisposeLocalMinimaList(); + TEdge *AddBoundsToLML(TEdge *e, bool IsClosed); + virtual void Reset(); + TEdge *ProcessBound(TEdge *E, bool IsClockwise); + void InsertScanbeam(const cInt Y); + bool PopScanbeam(cInt &Y); + bool LocalMinimaPending(); + bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin); + OutRec *CreateOutRec(); + void DisposeAllOutRecs(); + void DisposeOutRec(PolyOutList::size_type index); + void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2); + void DeleteFromAEL(TEdge *e); + void UpdateEdgeIntoAEL(TEdge *&e); + + typedef std::vector MinimaList; + MinimaList::iterator m_CurrentLM; + MinimaList m_MinimaList; + + bool m_UseFullRange; + EdgeList m_edges; + bool m_PreserveCollinear; + bool m_HasOpenPaths; + PolyOutList m_PolyOuts; + TEdge *m_ActiveEdges; + + typedef std::priority_queue ScanbeamList; + ScanbeamList m_Scanbeam; +}; +//------------------------------------------------------------------------------ + +class Clipper : public virtual ClipperBase { +public: + Clipper(int initOptions = 0); + bool Execute(ClipType clipType, Paths &solution, + PolyFillType fillType = pftEvenOdd); + bool Execute(ClipType clipType, Paths &solution, PolyFillType subjFillType, + PolyFillType clipFillType); + bool Execute(ClipType clipType, PolyTree &polytree, + PolyFillType fillType = pftEvenOdd); + bool Execute(ClipType clipType, PolyTree &polytree, PolyFillType subjFillType, + PolyFillType clipFillType); + bool ReverseSolution() { return m_ReverseOutput; }; + void ReverseSolution(bool value) { m_ReverseOutput = value; }; + bool StrictlySimple() { return m_StrictSimple; }; + void StrictlySimple(bool value) { m_StrictSimple = value; }; +// set the callback function for z value filling on intersections (otherwise Z +// is 0) +#ifdef use_xyz + void ZFillFunction(ZFillCallback zFillFunc); +#endif +protected: + virtual bool ExecuteInternal(); + +private: + JoinList m_Joins; + JoinList m_GhostJoins; + IntersectList m_IntersectList; + ClipType m_ClipType; + typedef std::list MaximaList; + MaximaList m_Maxima; + TEdge *m_SortedEdges; + bool m_ExecuteLocked; + PolyFillType m_ClipFillType; + PolyFillType m_SubjFillType; + bool m_ReverseOutput; + bool m_UsingPolyTree; + bool m_StrictSimple; +#ifdef use_xyz + ZFillCallback m_ZFill; // custom callback +#endif + void SetWindingCount(TEdge &edge); + bool IsEvenOddFillType(const TEdge &edge) const; + bool IsEvenOddAltFillType(const TEdge &edge) const; + void InsertLocalMinimaIntoAEL(const cInt botY); + void InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge); + void AddEdgeToSEL(TEdge *edge); + bool PopEdgeFromSEL(TEdge *&edge); + void CopyAELToSEL(); + void DeleteFromSEL(TEdge *e); + void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2); + bool IsContributing(const TEdge &edge) const; + bool IsTopHorz(const cInt XPos); + void DoMaxima(TEdge *e); + void ProcessHorizontals(); + void ProcessHorizontal(TEdge *horzEdge); + void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt); + OutPt *AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt); + OutRec *GetOutRec(int idx); + void AppendPolygon(TEdge *e1, TEdge *e2); + void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt); + OutPt *AddOutPt(TEdge *e, const IntPoint &pt); + OutPt *GetLastOutPt(TEdge *e); + bool ProcessIntersections(const cInt topY); + void BuildIntersectList(const cInt topY); + void ProcessIntersectList(); + void ProcessEdgesAtTopOfScanbeam(const cInt topY); + void BuildResult(Paths &polys); + void BuildResult2(PolyTree &polytree); + void SetHoleState(TEdge *e, OutRec *outrec); + void DisposeIntersectNodes(); + bool FixupIntersectionOrder(); + void FixupOutPolygon(OutRec &outrec); + void FixupOutPolyline(OutRec &outrec); + bool IsHole(TEdge *e); + bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl); + void FixHoleLinkage(OutRec &outrec); + void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt); + void ClearJoins(); + void ClearGhostJoins(); + void AddGhostJoin(OutPt *op, const IntPoint offPt); + bool JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2); + void JoinCommonEdges(); + void DoSimplePolygons(); + void FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec); + void FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec); + void FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec); +#ifdef use_xyz + void SetZ(IntPoint &pt, TEdge &e1, TEdge &e2); +#endif +}; +//------------------------------------------------------------------------------ + +class ClipperOffset { +public: + ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25); + ~ClipperOffset(); + void AddPath(const Path &path, JoinType joinType, EndType endType); + void AddPaths(const Paths &paths, JoinType joinType, EndType endType); + void Execute(Paths &solution, double delta); + void Execute(PolyTree &solution, double delta); + void Clear(); + double MiterLimit; + double ArcTolerance; + +private: + Paths m_destPolys; + Path m_srcPoly; + Path m_destPoly; + std::vector m_normals; + double m_delta, m_sinA, m_sin, m_cos; + double m_miterLim, m_StepsPerRad; + IntPoint m_lowest; + PolyNode m_polyNodes; + + void FixOrientations(); + void DoOffset(double delta); + void OffsetPoint(int j, int &k, JoinType jointype); + void DoSquare(int j, int k); + void DoMiter(int j, int k, double r); + void DoRound(int j, int k); +}; +//------------------------------------------------------------------------------ + +class clipperException : public std::exception { +public: + clipperException(const char *description) : m_descr(description) {} + virtual ~clipperException() throw() {} + virtual const char *what() const throw() { return m_descr.c_str(); } + +private: + std::string m_descr; +}; +//------------------------------------------------------------------------------ + +} // namespace ClipperLib + +#endif // clipper_hpp diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc new file mode 100755 index 0000000000..8099821056 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +cv::Mat GetRotateCropImage(const cv::Mat &srcimage, + const std::array &box) { + cv::Mat image; + srcimage.copyTo(image); + + std::vector> points; + + for (int i = 0; i < 4; ++i) { + std::vector tmp; + tmp.push_back(box[2 * i]); + tmp.push_back(box[2 * i + 1]); + points.push_back(tmp); + } + int x_collect[4] = {box[0], box[2], box[4], box[6]}; + int y_collect[4] = {box[1], box[3], box[5], box[7]}; + int left = int(*std::min_element(x_collect, x_collect + 4)); + int right = int(*std::max_element(x_collect, x_collect + 4)); + int top = int(*std::min_element(y_collect, y_collect + 4)); + int bottom = int(*std::max_element(y_collect, y_collect + 4)); + + cv::Mat img_crop; + image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop); + + for (int i = 0; i < points.size(); i++) { + points[i][0] -= left; + points[i][1] -= top; + } + + int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) + + pow(points[0][1] - points[1][1], 2))); + int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) + + pow(points[0][1] - points[3][1], 2))); + + cv::Point2f pts_std[4]; + pts_std[0] = cv::Point2f(0., 0.); + pts_std[1] = cv::Point2f(img_crop_width, 0.); + pts_std[2] = cv::Point2f(img_crop_width, img_crop_height); + pts_std[3] = cv::Point2f(0.f, img_crop_height); + + cv::Point2f pointsf[4]; + pointsf[0] = cv::Point2f(points[0][0], points[0][1]); + pointsf[1] = cv::Point2f(points[1][0], points[1][1]); + pointsf[2] = cv::Point2f(points[2][0], points[2][1]); + pointsf[3] = cv::Point2f(points[3][0], points[3][1]); + + cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std); + + cv::Mat dst_img; + cv::warpPerspective(img_crop, dst_img, M, + cv::Size(img_crop_width, img_crop_height), + cv::BORDER_REPLICATE); + + if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) { + cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth()); + cv::transpose(dst_img, srcCopy); + cv::flip(srcCopy, srcCopy, 0); + return srcCopy; + } else { + return dst_img; + } +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc new file mode 100755 index 0000000000..d8de7e6e54 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +std::vector Xyxyxyxy2Xyxy(std::array &box) { + int x_collect[4] = {box[0], box[2], box[4], box[6]}; + int y_collect[4] = {box[1], box[3], box[5], box[7]}; + int left = int(*std::min_element(x_collect, x_collect + 4)); + int right = int(*std::max_element(x_collect, x_collect + 4)); + int top = int(*std::min_element(y_collect, y_collect + 4)); + int bottom = int(*std::max_element(y_collect, y_collect + 4)); + std::vector box1(4, 0); + box1[0] = left; + box1[1] = top; + box1[2] = right; + box1[3] = bottom; + return box1; +} + +float Dis(std::vector &box1, std::vector &box2) { + float x1_1 = float(box1[0]); + float y1_1 = float(box1[1]); + float x2_1 = float(box1[2]); + float y2_1 = float(box1[3]); + + float x1_2 = float(box2[0]); + float y1_2 = float(box2[1]); + float x2_2 = float(box2[2]); + float y2_2 = float(box2[3]); + + float dis = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1) + + std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1); + float dis_2 = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1); + float dis_3 = std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1); + return dis + std::min(dis_2, dis_3); +} + +float Iou(std::vector &box1, std::vector &box2) { + int area1 = std::max(0, box1[2] - box1[0]) * std::max(0, box1[3] - box1[1]); + int area2 = std::max(0, box2[2] - box2[0]) * std::max(0, box2[3] - box2[1]); + + // computing the sum_area + int sum_area = area1 + area2; + + // find the each point of intersect rectangle + int x1 = std::max(box1[0], box2[0]); + int y1 = std::max(box1[1], box2[1]); + int x2 = std::min(box1[2], box2[2]); + int y2 = std::min(box1[3], box2[3]); + + // judge if there is an intersect + if (y1 >= y2 || x1 >= x2) { + return 0.0; + } else { + int intersect = (x2 - x1) * (y2 - y1); + return intersect / (sum_area - intersect + 0.00000001); + } +} + +bool ComparisonDis(const std::vector &dis1, + const std::vector &dis2) { + if (dis1[1] < dis2[1]) { + return true; + } else if (dis1[1] == dis2[1]) { + return dis1[0] < dis2[0]; + } else { + return false; + } +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc new file mode 100755 index 0000000000..7c1a55c17f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc @@ -0,0 +1,538 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ocr_postprocess_op.h" +#include "clipper.h" +#include + +namespace ultrainfer { +namespace vision { +namespace ocr { + +void PostProcessor::GetContourArea(const std::vector> &box, + float unclip_ratio, float &distance) { + int pts_num = 4; + float area = 0.0f; + float dist = 0.0f; + for (int i = 0; i < pts_num; i++) { + area += box[i][0] * box[(i + 1) % pts_num][1] - + box[i][1] * box[(i + 1) % pts_num][0]; + dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) * + (box[i][0] - box[(i + 1) % pts_num][0]) + + (box[i][1] - box[(i + 1) % pts_num][1]) * + (box[i][1] - box[(i + 1) % pts_num][1])); + } + area = fabs(float(area / 2.0)); + + distance = area * unclip_ratio / dist; +} + +void PostProcessor::GetContourAreaPoly(const std::vector &box, + float unclip_ratio, float &distance) { + int pts_num = box.size(); + float area = 0.0f; + float dist = 0.0f; + for (int i = 0; i < pts_num; i++) { + area += box[i].x * box[(i + 1) % pts_num].y - + box[i].y * box[(i + 1) % pts_num].x; + dist += sqrtf((box[i].x - box[(i + 1) % pts_num].x) * + (box[i].x - box[(i + 1) % pts_num].x) + + (box[i].y - box[(i + 1) % pts_num].y) * + (box[i].y - box[(i + 1) % pts_num].y)); + } + area = fabs(float(area / 2.0)); + + distance = area * unclip_ratio / dist; +} + +cv::RotatedRect PostProcessor::UnClip(std::vector> box, + const float &unclip_ratio) { + float distance = 1.0; + + GetContourArea(box, unclip_ratio, distance); + + ClipperLib::ClipperOffset offset; + ClipperLib::Path p; + p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1])) + << ClipperLib::IntPoint(int(box[1][0]), int(box[1][1])) + << ClipperLib::IntPoint(int(box[2][0]), int(box[2][1])) + << ClipperLib::IntPoint(int(box[3][0]), int(box[3][1])); + offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon); + + ClipperLib::Paths soln; + offset.Execute(soln, distance); + std::vector points; + + for (int j = 0; j < soln.size(); j++) { + for (int i = 0; i < soln[soln.size() - 1].size(); i++) { + points.emplace_back(soln[j][i].X, soln[j][i].Y); + } + } + cv::RotatedRect res; + if (points.size() <= 0) { + res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0); + } else { + res = cv::minAreaRect(points); + } + return res; +} + +std::vector PostProcessor::UnClipPoly(std::vector box, + const float &unclip_ratio) { + float distance = 1.0; + + GetContourAreaPoly(box, unclip_ratio, distance); + + ClipperLib::ClipperOffset offset; + ClipperLib::Path p; + for (const auto &pt : box) { + p << ClipperLib::IntPoint(int(pt.x), int(pt.y)); + } + + offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon); + ClipperLib::Paths soln; + offset.Execute(soln, distance); + std::vector points; + + if (!soln.empty()) { + for (int i = 0; i < soln[0].size(); i++) { + points.emplace_back(soln[0][i].X, soln[0][i].Y); + } + } + + return points; +} + +float **PostProcessor::Mat2Vec(cv::Mat mat) { + auto **array = new float *[mat.rows]; + for (int i = 0; i < mat.rows; ++i) + array[i] = new float[mat.cols]; + for (int i = 0; i < mat.rows; ++i) { + for (int j = 0; j < mat.cols; ++j) { + array[i][j] = mat.at(i, j); + } + } + + return array; +} + +std::vector> +PostProcessor::OrderPointsClockwise(std::vector> pts) { + std::vector> box = pts; + std::sort(box.begin(), box.end(), XsortInt); + + std::vector> leftmost = {box[0], box[1]}; + std::vector> rightmost = {box[2], box[3]}; + + if (leftmost[0][1] > leftmost[1][1]) + std::swap(leftmost[0], leftmost[1]); + + if (rightmost[0][1] > rightmost[1][1]) + std::swap(rightmost[0], rightmost[1]); + + std::vector> rect = {leftmost[0], rightmost[0], rightmost[1], + leftmost[1]}; + return rect; +} + +std::vector> PostProcessor::Mat2Vector(cv::Mat mat) { + std::vector> img_vec; + std::vector tmp; + + for (int i = 0; i < mat.rows; ++i) { + tmp.clear(); + for (int j = 0; j < mat.cols; ++j) { + tmp.push_back(mat.at(i, j)); + } + img_vec.push_back(tmp); + } + return img_vec; +} + +bool PostProcessor::XsortFp32(std::vector a, std::vector b) { + if (a[0] != b[0]) + return a[0] < b[0]; + return false; +} + +bool PostProcessor::XsortInt(std::vector a, std::vector b) { + if (a[0] != b[0]) + return a[0] < b[0]; + return false; +} + +std::vector> PostProcessor::GetMiniBoxes(cv::RotatedRect box, + float &ssid) { + ssid = std::max(box.size.width, box.size.height); + + cv::Mat points; + cv::boxPoints(box, points); + + auto array = Mat2Vector(points); + std::sort(array.begin(), array.end(), XsortFp32); + + std::vector idx1 = array[0], idx2 = array[1], idx3 = array[2], + idx4 = array[3]; + if (array[3][1] <= array[2][1]) { + idx2 = array[3]; + idx3 = array[2]; + } else { + idx2 = array[2]; + idx3 = array[3]; + } + if (array[1][1] <= array[0][1]) { + idx1 = array[1]; + idx4 = array[0]; + } else { + idx1 = array[0]; + idx4 = array[1]; + } + + array[0] = idx1; + array[1] = idx2; + array[2] = idx3; + array[3] = idx4; + + return array; +} + +float PostProcessor::PolygonScoreAcc(std::vector contour, + cv::Mat pred) { + int width = pred.cols; + int height = pred.rows; + std::vector box_x; + std::vector box_y; + for (int i = 0; i < contour.size(); ++i) { + box_x.push_back(contour[i].x); + box_y.push_back(contour[i].y); + } + + int xmin = + clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0, + width - 1); + int xmax = + clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0, + width - 1); + int ymin = + clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0, + height - 1); + int ymax = + clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0, + height - 1); + + cv::Mat mask; + mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); + + cv::Point *rook_point = new cv::Point[contour.size()]; + + for (int i = 0; i < contour.size(); ++i) { + rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin); + } + const cv::Point *ppt[1] = {rook_point}; + int npt[] = {int(contour.size())}; + + cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1)); + + cv::Mat croppedImg; + pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)) + .copyTo(croppedImg); + float score = cv::mean(croppedImg, mask)[0]; + + delete[] rook_point; + return score; +} + +float PostProcessor::BoxScoreFast(std::vector> box_array, + cv::Mat pred) { + auto array = box_array; + int width = pred.cols; + int height = pred.rows; + + float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]}; + float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]}; + + int xmin = clamp(int(std::floor(*(std::min_element(box_x, box_x + 4)))), 0, + width - 1); + int xmax = clamp(int(std::ceil(*(std::max_element(box_x, box_x + 4)))), 0, + width - 1); + int ymin = clamp(int(std::floor(*(std::min_element(box_y, box_y + 4)))), 0, + height - 1); + int ymax = clamp(int(std::ceil(*(std::max_element(box_y, box_y + 4)))), 0, + height - 1); + + cv::Mat mask; + mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); + + cv::Point root_point[4]; + root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin); + root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin); + root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin); + root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin); + const cv::Point *ppt[1] = {root_point}; + int npt[] = {4}; + cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1)); + + cv::Mat croppedImg; + pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)) + .copyTo(croppedImg); + + auto score = cv::mean(croppedImg, mask)[0]; + return score; +} + +float PostProcessor::PolyScoreFast(std::vector box_array, + cv::Mat pred) { + int width = pred.cols; + int height = pred.rows; + + std::vector box_x; + std::vector box_y; + for (const cv::Point &p : box_array) { + box_x.push_back(p.x); + box_y.push_back(p.y); + } + + int xmin = + clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0, + width - 1); + int xmax = + clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0, + width - 1); + int ymin = + clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0, + height - 1); + int ymax = + clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0, + height - 1); + + cv::Mat mask; + mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); + + std::vector shifted_box_array; + for (const cv::Point &p : box_array) { + shifted_box_array.emplace_back(p.x - xmin, p.y - ymin); + } + + std::vector> box_contour(1, shifted_box_array); + cv::fillPoly(mask, box_contour, cv::Scalar(1)); + + cv::Mat croppedImg; + pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)) + .copyTo(croppedImg); + + auto score = cv::mean(croppedImg, mask)[0]; + return score; +} + +std::vector>> PostProcessor::BoxesFromBitmap( + const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, + const float &det_db_unclip_ratio, const std::string &det_db_score_mode) { + const int min_size = 3; + const int max_candidates = 1000; + + int width = bitmap.cols; + int height = bitmap.rows; + + std::vector> contours; + std::vector hierarchy; + + cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST, + cv::CHAIN_APPROX_SIMPLE); + + int num_contours = + contours.size() >= max_candidates ? max_candidates : contours.size(); + + std::vector>> boxes; + + for (int _i = 0; _i < num_contours; _i++) { + if (contours[_i].size() <= 2) { + continue; + } + float ssid; + cv::RotatedRect box = cv::minAreaRect(contours[_i]); + auto array = GetMiniBoxes(box, ssid); + + auto box_for_unclip = array; + // end get_mini_box + + if (ssid < min_size) { + continue; + } + + float score; + if (det_db_score_mode == "slow") /* compute using polygon*/ + score = PolygonScoreAcc(contours[_i], pred); + else + score = BoxScoreFast(array, pred); + + if (score < box_thresh) + continue; + + // start for unclip + cv::RotatedRect points = UnClip(box_for_unclip, det_db_unclip_ratio); + if (points.size.height < 1.001 && points.size.width < 1.001) { + continue; + } + // end for unclip + + cv::RotatedRect clipbox = points; + auto cliparray = GetMiniBoxes(clipbox, ssid); + + if (ssid < min_size + 2) + continue; + + int dest_width = pred.cols; + int dest_height = pred.rows; + std::vector> intcliparray; + + for (int num_pt = 0; num_pt < 4; num_pt++) { + std::vector a{int(clampf(roundf(cliparray[num_pt][0] / float(width) * + float(dest_width)), + 0, float(dest_width))), + int(clampf(roundf(cliparray[num_pt][1] / + float(height) * float(dest_height)), + 0, float(dest_height)))}; + intcliparray.push_back(a); + } + boxes.push_back(intcliparray); + + } // end for + return boxes; +} + +std::vector>> PostProcessor::PolygonFromBitmap( + const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, + const float &det_db_unclip_ratio, const std::string &det_db_score_mode) { + const int min_size = 3; + const int max_candidates = 1000; + + int width = bitmap.cols; + int height = bitmap.rows; + + std::vector> contours; + std::vector hierarchy; + + cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST, + cv::CHAIN_APPROX_SIMPLE); + int num_contours = + contours.size() >= max_candidates ? max_candidates : contours.size(); + + std::vector>> boxes; + for (int _i = 0; _i < num_contours; _i++) { + + if (contours[_i].size() <= 2) { + continue; + } + double perimeter = cv::arcLength(contours[_i], true); + double epsilon = 0.002 * perimeter; + std::vector approx; + cv::approxPolyDP(contours[_i], approx, epsilon, true); + + if (approx.size() < 4) { + continue; // 跳过点数少于4的轮廓 + } + auto box_for_unclip = approx; + float score; + if (det_db_score_mode == "slow") + score = PolygonScoreAcc(approx, pred); + else + score = PolyScoreFast(approx, pred); + if (score < box_thresh) + continue; + // start for unclip + std::vector points = + UnClipPoly(box_for_unclip, det_db_unclip_ratio); + if (points.size() <= 0) + continue; + cv::RotatedRect res = cv::minAreaRect(points); + float ssid = std::max(res.size.width, res.size.height); + if (ssid < min_size + 2) + continue; + + int dest_width = pred.cols; + int dest_height = pred.rows; + std::vector> intcliparray; + + for (int num_pt = 0; num_pt < points.size(); num_pt++) { + std::vector a{int(clampf(roundf(points[num_pt].x / float(width) * + float(dest_width)), + 0, float(dest_width))), + int(clampf(roundf(points[num_pt].y / float(height) * + float(dest_height)), + 0, float(dest_height)))}; + intcliparray.push_back(a); + } + boxes.push_back(intcliparray); + } // end for + return boxes; +} + +std::vector>> PostProcessor::FilterCURVETagDetRes( + std::vector>> boxes, + const std::array &det_img_info) { + int oriimg_w = det_img_info[0]; + int oriimg_h = det_img_info[1]; + float ratio_w = float(det_img_info[2]) / float(oriimg_w); + float ratio_h = float(det_img_info[3]) / float(oriimg_h); + + std::vector>> root_points; + for (int n = 0; n < boxes.size(); n++) { + for (int m = 0; m < boxes[n].size(); m++) { + boxes[n][m][0] /= ratio_w; + boxes[n][m][1] /= ratio_h; + boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1)); + boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1)); + } + root_points.push_back(boxes[n]); + } + + return root_points; +} + +std::vector>> +PostProcessor::FilterTagDetRes(std::vector>> boxes, + const std::array &det_img_info) { + int oriimg_w = det_img_info[0]; + int oriimg_h = det_img_info[1]; + float ratio_w = float(det_img_info[2]) / float(oriimg_w); + float ratio_h = float(det_img_info[3]) / float(oriimg_h); + + std::vector>> root_points; + for (int n = 0; n < boxes.size(); n++) { + boxes[n] = OrderPointsClockwise(boxes[n]); + for (int m = 0; m < boxes[0].size(); m++) { + boxes[n][m][0] /= ratio_w; + boxes[n][m][1] /= ratio_h; + + boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1)); + boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1)); + } + } + + for (int n = 0; n < boxes.size(); n++) { + int rect_width, rect_height; + rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) + + pow(boxes[n][0][1] - boxes[n][1][1], 2))); + rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) + + pow(boxes[n][0][1] - boxes[n][3][1], 2))); + if (rect_width <= 4 || rect_height <= 4) + continue; + root_points.push_back(boxes[n]); + } + return root_points; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h new file mode 100755 index 0000000000..b8c471dff8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h @@ -0,0 +1,107 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ultrainfer/vision/ocr/ppocr/utils/clipper.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +class PostProcessor { +public: + void GetContourArea(const std::vector> &box, + float unclip_ratio, float &distance); + void GetContourAreaPoly(const std::vector &box, float unclip_ratio, + float &distance); + cv::RotatedRect UnClip(std::vector> box, + const float &unclip_ratio); + + std::vector UnClipPoly(std::vector box, + const float &unclip_ratio); + + float **Mat2Vec(cv::Mat mat); + + std::vector> + OrderPointsClockwise(std::vector> pts); + + std::vector> GetMiniBoxes(cv::RotatedRect box, + float &ssid); + + float BoxScoreFast(std::vector> box_array, cv::Mat pred); + float PolyScoreFast(std::vector box_array, cv::Mat pred); + float PolygonScoreAcc(std::vector contour, cv::Mat pred); + + std::vector>> + BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, + const float &box_thresh, const float &det_db_unclip_ratio, + const std::string &det_db_score_mode); + + std::vector>> + PolygonFromBitmap(const cv::Mat pred, const cv::Mat bitmap, + const float &box_thresh, const float &det_db_unclip_ratio, + const std::string &det_db_score_mode); + + std::vector>> + FilterTagDetRes(std::vector>> boxes, + const std::array &det_img_info); + + std::vector>> + FilterCURVETagDetRes(std::vector>> boxes, + const std::array &det_img_info); + +private: + static bool XsortInt(std::vector a, std::vector b); + + static bool XsortFp32(std::vector a, std::vector b); + + std::vector> Mat2Vector(cv::Mat mat); + + inline int _max(int a, int b) { return a >= b ? a : b; } + + inline int _min(int a, int b) { return a >= b ? b : a; } + + template inline T clamp(T x, T min, T max) { + if (x > max) + return max; + if (x < min) + return min; + return x; + } + + inline float clampf(float x, float min, float max) { + if (x > max) + return max; + if (x < min) + return min; + return x; + } +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h new file mode 100755 index 0000000000..26c84a3cd8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/result.h" +#include +#include + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +ULTRAINFER_DECL cv::Mat GetRotateCropImage(const cv::Mat &srcimage, + const std::array &box); + +ULTRAINFER_DECL void SortBoxes(std::vector> *boxes); + +ULTRAINFER_DECL std::vector ArgSort(const std::vector &array); + +ULTRAINFER_DECL std::vector Softmax(std::vector &src); + +ULTRAINFER_DECL std::vector Xyxyxyxy2Xyxy(std::array &box); + +ULTRAINFER_DECL float Dis(std::vector &box1, std::vector &box2); + +ULTRAINFER_DECL float Iou(std::vector &box1, std::vector &box2); + +ULTRAINFER_DECL bool ComparisonDis(const std::vector &dis1, + const std::vector &dis2); +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc new file mode 100755 index 0000000000..4d33e4678e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc @@ -0,0 +1,51 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +static inline float FastExp(float x) { + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; +} + +std::vector Softmax(std::vector &src) { + int length = src.size(); + std::vector dst; + dst.resize(length); + const float alpha = + static_cast(*std::max_element(&src[0], &src[0 + length])); + float denominator{0}; + + for (int i = 0; i < length; ++i) { + dst[i] = FastExp(src[i] - alpha); + denominator += dst[i]; + } + + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } + return dst; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc new file mode 100755 index 0000000000..075ce672b6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +bool CompareBox(const std::array &result1, + const std::array &result2) { + if (result1[1] < result2[1]) { + return true; + } else if (result1[1] == result2[1]) { + return result1[0] < result2[0]; + } else { + return false; + } +} + +void SortBoxes(std::vector> *boxes) { + std::sort(boxes->begin(), boxes->end(), CompareBox); + + if (boxes->size() == 0) { + return; + } + + for (int i = 0; i < boxes->size() - 1; i++) { + for (int j = i; j >= 0; j--) { + if (std::abs((*boxes)[j + 1][1] - (*boxes)[j][1]) < 10 && + ((*boxes)[j + 1][0] < (*boxes)[j][0])) { + std::swap((*boxes)[i], (*boxes)[i + 1]); + } + } + } +} + +std::vector ArgSort(const std::vector &array) { + const int array_len(array.size()); + std::vector array_index(array_len, 0); + for (int i = 0; i < array_len; ++i) + array_index[i] = i; + + std::sort( + array_index.begin(), array_index.end(), + [&array](int pos1, int pos2) { return (array[pos1] < array[pos2]); }); + + return array_index; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc new file mode 100755 index 0000000000..f8f5466f3a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +// bool UVDocPostprocessor::SingleBatchPostprocessor(const float* out_data, +// cv::Mat* result) { +// // Reverse normalization +// std::vector mean{127.5f, 127.5f, 127.5f}; +// std::vector std{127.5f, 127.5f, 127.5f}; +// Mat result_mat = Mat::Create(result->rows, result->cols, 3, +// FDDataType::FP32, const_cast(out_data)); +// Convert::Run(&result_mat, mean, std); + +// // Convert result_mat to OpenCV Mat object +// auto temp = result_mat.GetOpenCVMat(); +// cv::Mat res = cv::Mat::zeros(temp->size(), CV_8UC3); +// temp->convertTo(res, CV_8UC3, 1); + +// // Execute BGR2RGB conversion +// Mat fd_image = WrapMat(res); +// BGR2RGB::Run(&fd_image); +// res = *(fd_image.GetOpenCVMat()); + +// // Copy result to output +// res.copyTo(*result); + +// return true; +// } + +bool UVDocPostprocessor::Run(const std::vector &infer_results, + std::vector *results) { + *results = infer_results; + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h new file mode 100755 index 0000000000..2c07e50f9c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/function/functions.h" +#include "ultrainfer/vision/common/processors/transform.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Postprocessor object for UVDoc serials model. + */ +class ULTRAINFER_DECL UVDocPostprocessor { +public: + UVDocPostprocessor() {} + /** \brief Process the result of runtime and fill to UVDocResult + * + * \param[in] tensors The inference result from runtime + * \param[in] results The output text results of UVDoc + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *results); +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc new file mode 100755 index 0000000000..7612b5bbe3 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +UVDocPreprocessor::UVDocPreprocessor() { + normalize_permute_op_ = std::make_shared( + std::vector({0.0f, 0.0f, 0.0f}), + std::vector({1.0f, 1.0f, 1.0f}), true); +} + +bool UVDocPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + + if (!disable_normalize_ && !disable_permute_) { + (*normalize_permute_op_)(image_batch); + } + + outputs->resize(1); + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h new file mode 100755 index 0000000000..c10bbd5bd9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/normalize_and_permute.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace ocr { +/*! @brief Preprocessor object for UVDoc serials model. + */ +class ULTRAINFER_DECL UVDocPreprocessor : public ProcessorManager { +public: + UVDocPreprocessor(); + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * wrapped by FDMat. \param[in] output The output tensors which will feed in + * runtime \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// Set preprocess normalize parameters, please call this API to customize + /// the normalize parameters, otherwise it will use the default normalize + /// parameters. + void SetNormalize(const std::vector &mean, + const std::vector &std, bool is_scale) { + normalize_permute_op_ = + std::make_shared(mean, std, is_scale); + } + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + +private: + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + std::shared_ptr normalize_permute_op_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc new file mode 100755 index 0000000000..0c563165ce --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/ocr/ppocr/uvdocwarpper.h" +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace ultrainfer { +namespace vision { +namespace ocr { + +UVDocWarpper::UVDocWarpper() {} +UVDocWarpper::UVDocWarpper(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +// Init +bool UVDocWarpper::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +std::unique_ptr UVDocWarpper::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(UVDocWarpper(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool UVDocWarpper::Predict(cv::Mat *im, FDTensor *result) { + return Predict(*im, result); +} + +bool UVDocWarpper::Predict(const cv::Mat &img, FDTensor *result) { + std::vector results; + if (!BatchPredict({img}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool UVDocWarpper::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h new file mode 100755 index 0000000000..28fa279c44 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h @@ -0,0 +1,104 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/utils/unique_ptr.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h" +#include "ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h" +#include "ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h" + +namespace ultrainfer { +namespace vision { +/** \brief All OCR series model APIs are defined inside this namespace + * + */ +namespace ocr { + +/*! @brief UVDocWarpper object is used to load the detection model provided by + * PaddleOCR. + */ +class ULTRAINFER_DECL UVDocWarpper : public UltraInferModel { +public: + UVDocWarpper(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g + * ./ch_PP-OCRv3_det_infer/model.pdmodel. \param[in] params_file Path of + * parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model + * format is ONNX, this parameter will be ignored. \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends`. \param[in] model_format Model + * format of the loaded model, default is Paddle format. + */ + UVDocWarpper(const std::string &model_file, + const std::string ¶ms_file = "", + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new UVDocWarpper with less memory usage when multiple + * instances of the same model are created + * + * \return new UVDocWarpper* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "pp-uvdoc"; } + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] result The output of OCR + * detection model result will be writen to this structure. \return true if + * the prediction is successed, otherwise false. + */ + + virtual bool Predict(cv::Mat *img, FDTensor *result); + + virtual bool Predict(const cv::Mat &img, FDTensor *result); + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format. \param[in] result The output of OCR + * detection model result will be writen to this structure. \return true if + * the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector &images, + std::vector *results); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is + * a 3-D array with layout HWC, BGR format. \param[in] results The output of + * OCR detection model result will be writen to this structure. \return true + * if the prediction is successed, otherwise false. + */ + /// Get preprocessor reference of UVDocWarpperPreprocessor + virtual UVDocPreprocessor &GetPreprocessor() { return preprocessor_; } + + // / Get postprocessor reference of UVDocWarpperPostprocessor + virtual UVDocPostprocessor &GetPostprocessor() { return postprocessor_; } + +private: + bool Initialize(); + UVDocPreprocessor preprocessor_; + UVDocPostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc new file mode 100755 index 0000000000..9e560d0fbd --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/caddn/caddn.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +Caddn::Caddn(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file) { + valid_gpu_backends = {Backend::PDINFER}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool Caddn::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool Caddn::Predict(const cv::Mat &im, std::vector &input_cam_data, + std::vector &input_lidar_data, + PerceptionResult *result) { + std::vector results; + if (!BatchPredict({im}, input_cam_data, input_lidar_data, &results)) { + return false; + } + if (results.size()) { + *result = std::move(results[0]); + } + return true; +} + +bool Caddn::BatchPredict(const std::vector &images, + std::vector &input_cam_data, + std::vector &input_lidar_data, + std::vector *results) { + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, input_cam_data, input_lidar_data, + &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = "images"; + reused_input_tensors_[1].name = "trans_cam_to_img"; + reused_input_tensors_[2].name = "trans_lidar_to_cam"; + + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h new file mode 100755 index 0000000000..75bf8c8d9c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h" +#include "ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace perception { +/*! @brief Caddn model object used when to load a Caddn model exported by Caddn. + */ +class ULTRAINFER_DECL Caddn : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g Caddn/model.pdiparams + * \param[in] params_file Path of parameter file, e.g Caddn/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is Paddle format + */ + Caddn(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + std::string ModelName() const { return "Paddle3D/Caddn"; } + + /** \brief Predict the perception result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output perception + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, std::vector &input_cam_data, + std::vector &input_lidar_data, + PerceptionResult *results); + + /** \brief Predict the perception results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output perception result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &images, + std::vector &input_cam_data, + std::vector &input_lidar_data, + std::vector *results); + + /// Get preprocessor reference of Caddn + virtual CaddnPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of Caddn + virtual CaddnPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + CaddnPreprocessor preprocessor_; + CaddnPostprocessor postprocessor_; + bool initialized_ = false; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc new file mode 100755 index 0000000000..cd86c4e67b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindCaddn(pybind11::module &m) { + pybind11::class_(m, "CaddnPreprocessor") + .def(pybind11::init()) + .def("run", + [](vision::perception::CaddnPreprocessor &self, + std::vector &im_list, + std::vector &cam_data, std::vector &lidar_data) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, cam_data, lidar_data, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in CaddnPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }); + + pybind11::class_(m, + "CaddnPostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::perception::CaddnPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "CaddnPostprocessor."); + } + return results; + }) + .def("run", [](vision::perception::CaddnPostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "CaddnPostprocessor."); + } + return results; + }); + + pybind11::class_(m, "Caddn") + .def(pybind11::init()) + .def("predict", + [](vision::perception::Caddn &self, pybind11::array &data, + std::vector &cam_data, std::vector &lidar_data) { + auto mat = PyArrayToCvMat(data); + vision::PerceptionResult res; + self.Predict(mat, cam_data, lidar_data, &res); + return res; + }) + .def("batch_predict", + [](vision::perception::Caddn &self, + std::vector &data, std::vector &cam_data, + std::vector &lidar_data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, cam_data, lidar_data, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::perception::Caddn::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::perception::Caddn::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc new file mode 100755 index 0000000000..e4389b7046 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc @@ -0,0 +1,70 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h" + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +CaddnPostprocessor::CaddnPostprocessor() {} + +bool CaddnPostprocessor::Run(const std::vector &tensors, + std::vector *results) { + results->resize(1); + (*results)[0].Clear(); + (*results)[0].Reserve(tensors[0].shape[0]); + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data_0 = reinterpret_cast(tensors[0].Data()); + auto result = &(*results)[0]; + for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 7) { + // item 1 ~ 3 : box3d bottom center x, y, z + // item 4 ~ 6 : box3d w, h, l + // item 7 : box3d yaw angle + std::vector vec(data_0 + i, data_0 + i + 7); + result->boxes.emplace_back( + std::array{0, 0, 0, 0, vec[3], vec[4], vec[5]}); + result->center.emplace_back(std::array{vec[0], vec[1], vec[2]}); + result->yaw_angle.push_back(vec[6]); + } + const float *data_1 = reinterpret_cast(tensors[2].Data()); + for (int i = 0; i < tensors[2].shape[0]; i += 1) { + std::vector vec(data_1 + i, data_1 + i + 1); + result->scores.push_back(vec[0]); + } + const float *data_2 = reinterpret_cast(tensors[1].Data()); + for (int i = 0; i < tensors[1].shape[0]; i++) { + std::vector vec(data_2 + i, data_2 + i + 1); + result->label_ids.push_back(vec[0]); + } + + result->valid.push_back(true); // 0 scores + result->valid.push_back(true); // 1 label_ids + result->valid.push_back(true); // 2 boxes + result->valid.push_back(true); // 3 center + result->valid.push_back(false); // 4 observation_angle + result->valid.push_back(true); // 5 yaw_angle + result->valid.push_back(false); // 6 velocity + + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h new file mode 100755 index 0000000000..35a554be2b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Postprocessor object for Caddn serials model. + */ +class ULTRAINFER_DECL CaddnPostprocessor { +public: + /** \brief Create a postprocessor instance for Caddn serials model + */ + CaddnPostprocessor(); + + /** \brief Process the result of runtime and fill to PerceptionResult + * structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *results); + +protected: + float conf_threshold_; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc new file mode 100755 index 0000000000..ffba6c02ad --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h" + +#include "ultrainfer/function/concat.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +CaddnPreprocessor::CaddnPreprocessor(const std::string &config_file) { + config_file_ = config_file; + FDASSERT(BuildPreprocessPipeline(), + "Failed to create Paddle3DDetPreprocessor."); + initialized_ = true; +} + +bool CaddnPreprocessor::BuildPreprocessPipeline() { + processors_.clear(); + + // preprocess + processors_.push_back(std::make_shared()); + + std::vector alpha = {1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0}; + std::vector beta = {0.0, 0.0, 0.0}; + processors_.push_back(std::make_shared(alpha, beta)); + + processors_.push_back(std::make_shared("float")); + processors_.push_back(std::make_shared()); + + // Fusion will improve performance + FuseTransforms(&processors_); + + return true; +} + +bool CaddnPreprocessor::Apply(FDMatBatch *image_batch, + std::vector &input_cam_data, + std::vector &input_lidar_data, + std::vector *outputs) { + if (image_batch->mats->empty()) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + // There are 3 outputs, image, cam_data, lidar_data + outputs->resize(3); + int batch = static_cast(image_batch->mats->size()); + + // Allocate memory for cam_data + (*outputs)[1].Resize({batch, 3, 4}, FDDataType::FP32); + + // Allocate memory for lidar_data + (*outputs)[2].Resize({batch, 4, 4}, FDDataType::FP32); + + auto *cam_data_ptr = reinterpret_cast((*outputs)[1].MutableData()); + auto *lidar_data_ptr = reinterpret_cast((*outputs)[2].MutableData()); + + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + for (size_t j = 0; j < processors_.size(); ++j) { + if (!(*(processors_[j].get()))(mat)) { + FDERROR << "Failed to processs image:" << i << " in " + << processors_[j]->Name() << "." << std::endl; + return false; + } + } + + memcpy(cam_data_ptr + i * 12, input_cam_data.data(), 12 * sizeof(float)); + memcpy(lidar_data_ptr + i * 16, input_lidar_data.data(), + 16 * sizeof(float)); + } + + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + + return true; +} + +bool CaddnPreprocessor::Run(std::vector *images, + std::vector &input_cam_data, + std::vector &input_lidar_data, + std::vector *outputs) { + FDMatBatch image_batch(images); + PreApply(&image_batch); + bool ret = Apply(&image_batch, input_cam_data, input_lidar_data, outputs); + PostApply(); + return ret; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h new file mode 100755 index 0000000000..769df72960 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h @@ -0,0 +1,69 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Preprocessor object for Caddn serials model. + */ +class ULTRAINFER_DECL CaddnPreprocessor : public ProcessorManager { +public: + CaddnPreprocessor() = default; + /** \brief Create a preprocessor instance for Caddn model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * Caddn/infer_cfg.yml + */ + explicit CaddnPreprocessor(const std::string &config_file); + + bool Run(std::vector *images, std::vector &input_cam_data, + std::vector &input_lidar_data, + std::vector *outputs); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Apply(FDMatBatch *image_batch, std::vector *outputs) { + FDERROR << "CaddnPreprocessor should input cam and lidar datas" + << std::endl; + return 0; + }; + bool Apply(FDMatBatch *image_batch, std::vector &input_cam_data, + std::vector &input_lidar_data, + std::vector *outputs); + +protected: + bool BuildPreprocessPipeline(); + std::vector> processors_; + + bool disable_permute_ = false; + + bool initialized_ = false; + + std::string config_file_; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc new file mode 100755 index 0000000000..1ff09e56d4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +Centerpoint::Centerpoint(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file) { + valid_gpu_backends = {Backend::PDINFER}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool Centerpoint::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool Centerpoint::Predict(const std::string point_dir, + PerceptionResult *result) { + std::vector results; + if (!BatchPredict({point_dir}, &results)) { + return false; + } + + if (results.size()) { + *result = std::move(results[0]); + } + return true; +} + +bool Centerpoint::BatchPredict(std::vector points_dir, + std::vector *results) { + int64_t num_point_dim = 5; + int with_timelag = 0; + if (!preprocessor_.Run(points_dir, num_point_dim, with_timelag, + reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + results->resize(reused_input_tensors_.size()); + for (int index = 0; index < reused_input_tensors_.size(); ++index) { + std::vector input_tensor; + input_tensor.push_back(reused_input_tensors_[index]); + + input_tensor[0].name = InputInfoOfRuntime(0).name; + + if (!Infer(input_tensor, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + (*results)[index].Clear(); + (*results)[index].Reserve(reused_output_tensors_[0].shape[0]); + if (!postprocessor_.Run(reused_output_tensors_, &((*results)[index]))) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + } + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h new file mode 100755 index 0000000000..67d40cb9ec --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h" +#include "ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace perception { +/*! @brief Centerpoint model object used when to load a Centerpoint model + * exported by Centerpoint. + */ +class ULTRAINFER_DECL Centerpoint : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g Centerpoint/model.pdiparams + * \param[in] params_file Path of parameter file, e.g + * Centerpoint/model.pdiparams, if the model format is ONNX, this parameter + * will be ignored \param[in] custom_option RuntimeOption for inference, the + * default will use cpu, and choose the backend defined in + * "valid_cpu_backends" \param[in] model_format Model format of the loaded + * model, default is Paddle format + */ + Centerpoint(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + std::string ModelName() const { return "Paddle3D/Centerpoint"; } + + /** \brief Predict the perception result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output perception + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(std::string point_dir, PerceptionResult *result); + + /** \brief Predict the perception results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output perception result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(std::vector points_dir, + std::vector *results); + + /// Get preprocessor reference of Centerpoint + virtual CenterpointPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of Centerpoint + virtual CenterpointPostprocessor &GetPostprocessor() { + return postprocessor_; + } + +protected: + bool Initialize(); + CenterpointPreprocessor preprocessor_; + CenterpointPostprocessor postprocessor_; + bool initialized_ = false; + std::vector> ouput_tensors; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc new file mode 100755 index 0000000000..89b9d79885 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindCenterpoint(pybind11::module &m) { + pybind11::class_(m, "CenterpointPreprocessor") + .def(pybind11::init()) + .def("run", [](vision::perception::CenterpointPreprocessor &self, + std::vector points_dir, + const int64_t num_point_dim, const int with_timelag) { + std::vector outputs; + if (!self.Run(points_dir, num_point_dim, with_timelag, outputs)) { + throw std::runtime_error("Failed to preprocess the input data in " + "CenterpointPreprocessor."); + } + + return outputs; + }); + + pybind11::class_( + m, "Centerpoint") + .def(pybind11::init()) + .def("predict", + [](vision::perception::Centerpoint &self, std::string point_dir) { + vision::PerceptionResult result; + self.Predict(point_dir, &result); + return result; + }) + .def("batch_predict", + [](vision::perception::Centerpoint &self, + std::vector &points_dir) { + std::vector results; + self.BatchPredict(points_dir, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::perception::Centerpoint::GetPreprocessor) + .def_property_readonly( + "postprocessor", &vision::perception::Centerpoint::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc new file mode 100755 index 0000000000..ff822c0867 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h" + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +CenterpointPostprocessor::CenterpointPostprocessor() {} + +bool CenterpointPostprocessor::Run(const std::vector &tensors, + PerceptionResult *result) { + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + const float *data_0 = reinterpret_cast(tensors[0].Data()); + for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 9) { + // item 1 ~ 3 : box3d bottom center x, y, z + // item 4 ~ 6 : box3d w, l, h + // item 7 ~ 8 : speed x,y + // item 9 : box3d yaw angle + std::vector vec(data_0 + i, data_0 + i + 9); + result->boxes.emplace_back( + std::array{0, 0, 0, 0, vec[3], vec[4], vec[5]}); + result->center.emplace_back(std::array{vec[0], vec[1], vec[2]}); + result->yaw_angle.push_back(vec[8]); + result->velocity.push_back(std::array{vec[6], vec[7]}); + } + + const float *data_1 = reinterpret_cast(tensors[2].Data()); + for (int i = 0; i < tensors[1].shape[0]; i += 1) { + std::vector vec(data_1 + i, data_1 + i + 1); + result->scores.push_back(vec[0]); + } + + const long long *data_2 = + reinterpret_cast(tensors[1].Data()); + for (int i = 0; i < tensors[2].shape[0]; i++) { + std::vector vec(data_2 + i, data_2 + i + 1); + result->label_ids.push_back(vec[0]); + } + result->valid.push_back(true); // 0 scores + result->valid.push_back(true); // 1 label_ids + result->valid.push_back(true); // 2 boxes + result->valid.push_back(true); // 3 center + result->valid.push_back(false); // 4 observation_angle + result->valid.push_back(true); // 5 yaw_angle + result->valid.push_back(true); // 6 velocity + + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h new file mode 100755 index 0000000000..6e03edaf4a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Postprocessor object for Centerpoint serials model. + */ +class ULTRAINFER_DECL CenterpointPostprocessor { +public: + /** \brief Create a postprocessor instance for Centerpoint serials model + */ + CenterpointPostprocessor(); + + /** \brief Process the result of runtime and fill to PerceptionResult + * structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, PerceptionResult *results); + +protected: + float conf_threshold_; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc new file mode 100755 index 0000000000..90a8fa81ec --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +CenterpointPreprocessor::CenterpointPreprocessor( + const std::string &config_file) { + initialized_ = true; +} + +bool CenterpointPreprocessor::ReadPoint(const std::string &file_path, + const int64_t num_point_dim, + std::vector &data, + int64_t *num_points) { + std::ifstream file_in(file_path, std::ios::in | std::ios::binary); + if (num_point_dim < 4) { + FDERROR << "Point dimension must not be less than 4, but received " + << "num_point_dim is " << num_point_dim << std::endl; + } + + if (!file_in) { + FDERROR << "Failed to read file: " << file_path << std::endl; + return false; + } + + std::streampos file_size; + file_in.seekg(0, std::ios::end); + file_size = file_in.tellg(); + file_in.seekg(0, std::ios::beg); + + data.resize(file_size / sizeof(float)); + file_in.read(reinterpret_cast(data.data()), file_size); + file_in.close(); + + if (file_size / sizeof(float) % num_point_dim != 0) { + FDERROR << "Loaded file size (" << file_size + << ") is not evenly divisible by num_point_dim (" << num_point_dim + << ")\n"; + return false; + } + *num_points = file_size / sizeof(float) / num_point_dim; + return true; +} + +bool CenterpointPreprocessor::InsertTimeToPoints(const int64_t num_points, + const int64_t num_point_dim, + float *points) { + for (int64_t i = 0; i < num_points; ++i) { + *(points + i * num_point_dim + 4) = 0.; + } + return true; +} + +bool CenterpointPreprocessor::Apply(std::vector &points_dir, + const int64_t num_point_dim, + const int with_timelag, + std::vector &outputs) { + for (int index = 0; index < points_dir.size(); ++index) { + std::string file_path = points_dir[index]; + std::vector points_shape; + std::vector data; + int64_t num_points; + if (!ReadPoint(file_path, num_point_dim, data, &num_points)) { + return false; + } + float *points = data.data(); + + if (!with_timelag && num_point_dim == 5 || num_point_dim > 5) { + InsertTimeToPoints(num_points, num_point_dim, points); + } + points_shape.push_back(num_points); + points_shape.push_back(num_point_dim); + + FDTensor tensor; + tensor.SetData(points_shape, FDDataType::FP32, points, true); + outputs.push_back(tensor); + } + return true; +} + +bool CenterpointPreprocessor::Run(std::vector &points_dir, + const int64_t num_point_dim, + const int with_timelag, + std::vector &outputs) { + bool ret = Apply(points_dir, num_point_dim, with_timelag, outputs); + return ret; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h new file mode 100755 index 0000000000..4685f2c9c7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Preprocessor object for Centerpoint model. + */ +class ULTRAINFER_DECL CenterpointPreprocessor : public ProcessorManager { +public: + CenterpointPreprocessor() = default; + /** \brief Create a preprocessor instance for Centerpoint model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * Centerpoint/infer_cfg.yml + */ + explicit CenterpointPreprocessor(const std::string &config_file); + + bool Apply(FDMatBatch *image_batch, std::vector *outputs) { + return false; + } + + bool Apply(std::vector &points_dir, const int64_t num_point_dim, + const int with_timelag, std::vector &outputs); + + bool Run(std::vector &points_dir, const int64_t num_point_dim, + const int with_timelag, std::vector &outputs); + +protected: + std::vector> processors_; + bool ReadPoint(const std::string &file_path, const int64_t num_point_dim, + std::vector &data, int64_t *num_points); + bool InsertTimeToPoints(const int64_t num_points, const int64_t num_point_dim, + float *points); + bool initialized_ = false; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc new file mode 100755 index 0000000000..e1ecebd860 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/petr/petr.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +Petr::Petr(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + runtime_option.paddle_infer_option.enable_mkldnn = false; + initialized = Initialize(); +} + +bool Petr::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool Petr::Predict(const cv::Mat &images, PerceptionResult *results) { + FDERROR << "Petr inference only support 6(V1) or 12(V2) images" << std::endl; + return false; +} + +bool Petr::BatchPredict(const std::vector &images, + std::vector *results) { + if ((images.size() != 6) && (images.size() != 12)) { + FDERROR << "Petr only support 6(V1) or 12(V2) images"; + return false; + } + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + // Note: un-commented the codes below to show the debug info. + // reused_input_tensors_[0].PrintInfo(); + // reused_input_tensors_[1].PrintInfo(); + // reused_input_tensors_[2].PrintInfo(); + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[1].name = InputInfoOfRuntime(1).name; + if (images.size() == 12) { + // for Petr V2 timestamp + reused_input_tensors_[2].name = InputInfoOfRuntime(2).name; + } else { + // for Petr V1 + reused_input_tensors_.pop_back(); + } + + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h new file mode 100755 index 0000000000..90c5e78e1b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/perception/paddle3d/petr/postprocessor.h" +#include "ultrainfer/vision/perception/paddle3d/petr/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace perception { +/*! @brief petr model object used when to load a petr model exported by petr. + */ +class ULTRAINFER_DECL Petr : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g petr/model.pdiparams + * \param[in] params_file Path of parameter file, e.g petr/model.pdiparams, if + * the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is Paddle format + */ + Petr(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + std::string ModelName() const { return "Paddle3D/petr"; } + + /** \brief Predict the perception result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output perception + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, PerceptionResult *result); + + /** \brief Predict the perception results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output perception result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of Petr + virtual PetrPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of Petr + virtual PetrPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + PetrPreprocessor preprocessor_; + PetrPostprocessor postprocessor_; + bool initialized_ = false; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc new file mode 100755 index 0000000000..24f2ab49fc --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPetr(pybind11::module &m) { + pybind11::class_(m, "PetrPreprocessor") + .def(pybind11::init()) + .def("run", [](vision::perception::PetrPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in PetrPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }); + + pybind11::class_(m, + "PetrPostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::perception::PetrPostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PetrPostprocessor."); + } + return results; + }) + .def("run", [](vision::perception::PetrPostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PetrPostprocessor."); + } + return results; + }); + + pybind11::class_(m, "Petr") + .def(pybind11::init()) + .def("predict", + [](vision::perception::Petr &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::PerceptionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::perception::Petr &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::perception::Petr::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::perception::Petr::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc new file mode 100755 index 0000000000..e82d5c697e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/petr/postprocessor.h" + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +PetrPostprocessor::PetrPostprocessor() {} + +bool PetrPostprocessor::Run(const std::vector &tensors, + std::vector *results) { + results->resize(1); + (*results)[0].Clear(); + (*results)[0].Reserve(tensors[0].shape[0]); + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data_0 = reinterpret_cast(tensors[0].Data()); + auto result = &(*results)[0]; + for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 9) { + // item 1 ~ 3 : box3d w, h, l + // item 4 ~ 6 : box3d bottom center x, y, z + // item 7 : box3d yaw angle + // item 8 ~ 9 : speed x,y + std::vector vec(data_0 + i, data_0 + i + 9); + result->boxes.emplace_back( + std::array{0, 0, 0, 0, vec[0], vec[1], vec[2]}); + result->center.emplace_back(std::array{vec[3], vec[4], vec[5]}); + result->yaw_angle.push_back(vec[6]); + result->velocity.push_back(std::array{vec[7], vec[8]}); + } + const float *data_1 = reinterpret_cast(tensors[1].Data()); + for (int i = 0; i < tensors[1].shape[0]; i += 1) { + std::vector vec(data_1 + i, data_1 + i + 1); + result->scores.push_back(vec[0]); + } + const long long *data_2 = + reinterpret_cast(tensors[2].Data()); + for (int i = 0; i < tensors[2].shape[0]; i++) { + std::vector vec(data_2 + i, data_2 + i + 1); + result->label_ids.push_back(vec[0]); + } + + result->valid.push_back(true); // 0 scores + result->valid.push_back(true); // 1 label_ids + result->valid.push_back(true); // 2 boxes + result->valid.push_back(true); // 3 center + result->valid.push_back(false); // 4 observation_angle + result->valid.push_back(true); // 5 yaw_angle + result->valid.push_back(true); // 6 velocity + + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h new file mode 100755 index 0000000000..ed2ff151e2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Postprocessor object for Petr serials model. + */ +class ULTRAINFER_DECL PetrPostprocessor { +public: + /** \brief Create a postprocessor instance for Petr serials model + */ + PetrPostprocessor(); + + /** \brief Process the result of runtime and fill to PerceptionResult + * structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *results); + +protected: + float conf_threshold_; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc new file mode 100755 index 0000000000..277fbb9b14 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc @@ -0,0 +1,114 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/petr/preprocessor.h" + +#include + +#include "ultrainfer/function/concat.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +PetrPreprocessor::PetrPreprocessor(const std::string &config_file) { + config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), + "Failed to create Paddle3DDetPreprocessor."); + initialized_ = true; +} + +bool PetrPreprocessor::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + + processors_.push_back(std::make_shared(800, 450)); + processors_.push_back(std::make_shared(0, 130, 800, 320)); + + std::vector mean{103.530, 116.280, 123.675}; + std::vector std{57.375, 57.120, 58.395}; + bool scale = false; + processors_.push_back(std::make_shared(mean, std, scale)); + processors_.push_back(std::make_shared("float")); + processors_.push_back(std::make_shared()); + + // Fusion will improve performance + FuseTransforms(&processors_); + + return true; +} + +bool PetrPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + if (image_batch->mats->empty()) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + // There are 3 outputs, image, k_data, timestamp + outputs->resize(3); + int num_cams = static_cast(image_batch->mats->size()); + + // Allocate memory for k_data + (*outputs)[1].Resize({1, num_cams, 4, 4}, FDDataType::FP32); + + // Allocate memory for image_data + (*outputs)[0].Resize({1, num_cams, 3, 320, 800}, FDDataType::FP32); + + // Allocate memory for timestamp + (*outputs)[2].Resize({1, num_cams}, FDDataType::FP32); + + auto *image_ptr = reinterpret_cast((*outputs)[0].MutableData()); + + auto *k_data_ptr = reinterpret_cast((*outputs)[1].MutableData()); + + auto *timestamp_ptr = reinterpret_cast((*outputs)[2].MutableData()); + + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + for (size_t j = 0; j < processors_.size(); ++j) { + if (!(*(processors_[j].get()))(mat)) { + FDERROR << "Failed to processs image:" << i << " in " + << processors_[j]->Name() << "." << std::endl; + return false; + } + } + } + + for (int i = 0; i < num_cams / 2 * 4 * 4; ++i) { + input_k_data_.push_back(input_k_data_[i]); + } + memcpy(k_data_ptr, input_k_data_.data(), num_cams * 16 * sizeof(float)); + + std::vector timestamp(num_cams, 0.0f); + for (int i = num_cams / 2; i < num_cams; ++i) { + timestamp[i] = 1.0f; + } + memcpy(timestamp_ptr, timestamp.data(), num_cams * sizeof(float)); + + FDTensor *tensor = image_batch->Tensor(); // [num_cams,3,320,800] + tensor->ExpandDim(0); // [num_cams,3,320,800] -> [1,num_cams,3,320,800] + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h new file mode 100755 index 0000000000..c5b925a409 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Preprocessor object for Petr serials model. + */ +class ULTRAINFER_DECL PetrPreprocessor : public ProcessorManager { +public: + PetrPreprocessor() = default; + /** \brief Create a preprocessor instance for Petr model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * smoke/infer_cfg.yml + */ + explicit PetrPreprocessor(const std::string &config_file); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Apply(FDMatBatch *image_batch, std::vector *outputs); + +protected: + bool BuildPreprocessPipelineFromConfig(); + std::vector> processors_; + + bool disable_permute_ = false; + + bool initialized_ = false; + + std::string config_file_; + + float scale_ = 1.0f; + std::vector mean_; + std::vector std_; + + std::vector input_k_data_{ + -1.40307297e-03, 9.07780395e-06, 4.84838307e-01, -5.43047376e-02, + -1.40780103e-04, 1.25770375e-05, 1.04126692e+00, 7.67668605e-01, + -1.02884378e-05, -1.41007011e-03, 1.02823459e-01, -3.07415128e-01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, + -9.39000631e-04, -7.65239349e-07, 1.14073277e+00, 4.46270645e-01, + 1.04998052e-03, 1.91798881e-05, 2.06218868e-01, 7.42717385e-01, + 1.48074005e-05, -1.40855671e-03, 7.45946690e-02, -3.16081315e-01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, + -7.0699735e-04, 4.2389297e-07, -5.5183989e-01, -5.3276348e-01, + -1.2281288e-03, 2.5626015e-05, 1.0212017e+00, 6.1102939e-01, + -2.2421273e-05, -1.4170362e-03, 9.3639769e-02, -3.0863306e-01, + 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00, + 2.2227580e-03, 2.5312484e-06, -9.7261822e-01, 9.0684637e-02, + 1.9360810e-04, 2.1347081e-05, -1.0779887e+00, -7.9227984e-01, + 4.3742721e-06, -2.2310747e-03, 1.0842450e-01, -2.9406491e-01, + 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00, + 5.97175560e-04, -5.88774265e-06, -1.15893924e+00, -4.49921310e-01, + -1.28312141e-03, 3.58297058e-07, 1.48300052e-01, 1.14334166e-01, + -2.80917516e-06, -1.41527120e-03, 8.37693438e-02, -2.36765608e-01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, + 3.6048229e-04, 3.8333174e-06, 7.9871160e-01, 4.3321830e-01, + 1.3671946e-03, 6.7484652e-06, -8.4722507e-01, 1.9411178e-01, + 7.5027779e-06, -1.4139183e-03, 8.2083985e-02, -2.4505949e-01, + 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00}; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc new file mode 100755 index 0000000000..3bf2abc13f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h" + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +SmokePostprocessor::SmokePostprocessor() {} + +bool SmokePostprocessor::Run(const std::vector &tensors, + std::vector *results) { + results->resize(1); + (*results)[0].Clear(); + (*results)[0].Reserve(tensors[0].shape[0]); + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float *data = reinterpret_cast(tensors[0].Data()); + auto result = &(*results)[0]; + for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 14) { + // item 1 : class + // item 2 : observation angle α + // item 3 ~ 6 : box2d x1, y1, x2, y2 + // item 7 ~ 9 : box3d h, w, l + // item 10 ~ 12 : box3d bottom center x, y, z + // item 13 : box3d yaw angle + // item 14 : score + std::vector vec(data + i, data + i + 14); + result->scores.push_back(vec[13]); + result->label_ids.push_back(vec[0]); + result->boxes.emplace_back(std::array{ + vec[2], vec[3], vec[4], vec[5], vec[6], vec[7], vec[8]}); + result->center.emplace_back(std::array{vec[9], vec[10], vec[11]}); + result->observation_angle.push_back(vec[1]); + result->yaw_angle.push_back(vec[12]); + } + + result->valid.push_back(true); // 0 scores + result->valid.push_back(true); // 1 label_ids + result->valid.push_back(true); // 2 boxes + result->valid.push_back(true); // 3 center + result->valid.push_back(true); // 4 observation_angle + result->valid.push_back(true); // 5 yaw_angle + result->valid.push_back(false); // 6 velocity + + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h new file mode 100755 index 0000000000..046513cd89 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Postprocessor object for Smoke serials model. + */ +class ULTRAINFER_DECL SmokePostprocessor { +public: + /** \brief Create a postprocessor instance for Smoke serials model + */ + SmokePostprocessor(); + + /** \brief Process the result of runtime and fill to PerceptionResult + * structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector &tensors, + std::vector *results); + +protected: + float conf_threshold_; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc new file mode 100755 index 0000000000..22d5a4de9f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc @@ -0,0 +1,161 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h" + +#include "ultrainfer/function/concat.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +SmokePreprocessor::SmokePreprocessor(const std::string &config_file) { + config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), + "Failed to create Paddle3DDetPreprocessor."); + initialized_ = true; +} + +bool SmokePreprocessor::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + + // read for preprocess + processors_.push_back(std::make_shared()); + + bool has_permute = false; + for (const auto &op : cfg["Preprocess"]) { + std::string op_name = op["type"].as(); + if (op_name == "NormalizeImage") { + auto mean = op["mean"].as>(); + auto std = op["std"].as>(); + bool is_scale = true; + if (op["is_scale"]) { + is_scale = op["is_scale"].as(); + } + std::string norm_type = "mean_std"; + if (op["norm_type"]) { + norm_type = op["norm_type"].as(); + } + if (norm_type != "mean_std") { + std::fill(mean.begin(), mean.end(), 0.0); + std::fill(std.begin(), std.end(), 1.0); + } + processors_.push_back(std::make_shared(mean, std, is_scale)); + } else if (op_name == "Resize") { + bool keep_ratio = op["keep_ratio"].as(); + auto target_size = op["target_size"].as>(); + int interp = op["interp"].as(); + FDASSERT(target_size.size() == 2, + "Require size of target_size be 2, but now it's %lu.", + target_size.size()); + if (!keep_ratio) { + int width = target_size[1]; + int height = target_size[0]; + processors_.push_back( + std::make_shared(width, height, -1.0, -1.0, interp, false)); + } else { + int min_target_size = std::min(target_size[0], target_size[1]); + int max_target_size = std::max(target_size[0], target_size[1]); + std::vector max_size; + if (max_target_size > 0) { + max_size.push_back(max_target_size); + max_size.push_back(max_target_size); + } + processors_.push_back(std::make_shared( + min_target_size, interp, true, max_size)); + } + } else if (op_name == "Permute") { + // Do nothing, do permute as the last operation + has_permute = true; + continue; + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + if (!disable_permute_) { + if (has_permute) { + // permute = cast + HWC2CHW + processors_.push_back(std::make_shared("float")); + processors_.push_back(std::make_shared()); + } + } + + // Fusion will improve performance + FuseTransforms(&processors_); + + input_k_data_ = cfg["k_data"].as>(); + input_ratio_data_ = cfg["ratio_data"].as>(); + return true; +} + +bool SmokePreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + if (image_batch->mats->empty()) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + // There are 3 outputs, image, k_data, ratio_data + outputs->resize(3); + int batch = static_cast(image_batch->mats->size()); + + // Allocate memory for k_data + (*outputs)[2].Resize({batch, 3, 3}, FDDataType::FP32); + + // Allocate memory for ratio_data + (*outputs)[0].Resize({batch, 2}, FDDataType::FP32); + + auto *k_data_ptr = reinterpret_cast((*outputs)[2].MutableData()); + + auto *ratio_data_ptr = reinterpret_cast((*outputs)[0].MutableData()); + + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat *mat = &(image_batch->mats->at(i)); + for (size_t j = 0; j < processors_.size(); ++j) { + if (!(*(processors_[j].get()))(mat)) { + FDERROR << "Failed to processs image:" << i << " in " + << processors_[j]->Name() << "." << std::endl; + return false; + } + } + + memcpy(k_data_ptr + i * 9, input_k_data_.data(), 9 * sizeof(float)); + memcpy(ratio_data_ptr + i * 2, input_ratio_data_.data(), 2 * sizeof(float)); + } + + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[1].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h new file mode 100755 index 0000000000..e4409a5ea1 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h @@ -0,0 +1,62 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +namespace perception { +/*! @brief Preprocessor object for Smoke serials model. + */ +class ULTRAINFER_DECL SmokePreprocessor : public ProcessorManager { +public: + SmokePreprocessor() = default; + /** \brief Create a preprocessor instance for Smoke model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * smoke/infer_cfg.yml + */ + explicit SmokePreprocessor(const std::string &config_file); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned + * by cv::imread() \param[in] outputs The output tensors which will feed in + * runtime \param[in] ims_info The shape info list, record input_shape and + * output_shape \return true if the preprocess successed, otherwise false + */ + bool Apply(FDMatBatch *image_batch, std::vector *outputs); + +protected: + bool BuildPreprocessPipelineFromConfig(); + std::vector> processors_; + + bool disable_permute_ = false; + + bool initialized_ = false; + + std::string config_file_; + + std::vector input_k_data_; + + std::vector input_ratio_data_; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc new file mode 100755 index 0000000000..47a93fc161 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/perception/paddle3d/smoke/smoke.h" + +namespace ultrainfer { +namespace vision { +namespace perception { + +Smoke::Smoke(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool Smoke::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool Smoke::Predict(const cv::Mat &im, PerceptionResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + if (results.size()) { + *result = std::move(results[0]); + } + return true; +} + +bool Smoke::BatchPredict(const std::vector &images, + std::vector *results) { + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[1].name = InputInfoOfRuntime(1).name; + reused_input_tensors_[2].name = InputInfoOfRuntime(2).name; + + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h new file mode 100755 index 0000000000..ee496888b0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h" +#include "ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h" + +namespace ultrainfer { +namespace vision { +namespace perception { +/*! @brief smoke model object used when to load a smoke model exported by smoke. + */ +class ULTRAINFER_DECL Smoke : public UltraInferModel { +public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g smoke/model.pdiparams + * \param[in] params_file Path of parameter file, e.g smoke/model.pdiparams, + * if the model format is ONNX, this parameter will be ignored \param[in] + * custom_option RuntimeOption for inference, the default will use cpu, and + * choose the backend defined in "valid_cpu_backends" \param[in] model_format + * Model format of the loaded model, default is Paddle format + */ + Smoke(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + std::string ModelName() const { return "Paddle3D/smoke"; } + + /** \brief Predict the perception result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D + * array with layout HWC, BGR format \param[in] result The output perception + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(const cv::Mat &img, PerceptionResult *result); + + /** \brief Predict the perception results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output perception result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of Smoke + virtual SmokePreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of Smoke + virtual SmokePostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + SmokePreprocessor preprocessor_; + SmokePostprocessor postprocessor_; + bool initialized_ = false; +}; + +} // namespace perception +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc new file mode 100755 index 0000000000..f16b5b064e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindSmoke(pybind11::module &m) { + pybind11::class_(m, "SmokePreprocessor") + .def(pybind11::init()) + .def("run", [](vision::perception::SmokePreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in SmokePreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }); + + pybind11::class_(m, + "SmokePostprocessor") + .def(pybind11::init<>()) + .def("run", + [](vision::perception::SmokePostprocessor &self, + std::vector &inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "SmokePostprocessor."); + } + return results; + }) + .def("run", [](vision::perception::SmokePostprocessor &self, + std::vector &input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "SmokePostprocessor."); + } + return results; + }); + + pybind11::class_(m, "Smoke") + .def(pybind11::init()) + .def("predict", + [](vision::perception::Smoke &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::PerceptionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", + [](vision::perception::Smoke &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", + &vision::perception::Smoke::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::perception::Smoke::GetPostprocessor); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc new file mode 100755 index 0000000000..ba68dbb4d6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindSmoke(pybind11::module &m); +void BindPetr(pybind11::module &m); +void BindCenterpoint(pybind11::module &m); +void BindCaddn(pybind11::module &m); + +void BindPerception(pybind11::module &m) { + auto perception_module = + m.def_submodule("perception", "3D object perception models."); + BindSmoke(perception_module); + BindPetr(perception_module); + BindCenterpoint(perception_module); + BindCaddn(perception_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc new file mode 100755 index 0000000000..fb9eea1299 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/segmentation/ppseg/model.h" + +#include "ultrainfer/utils/unique_ptr.h" + +namespace ultrainfer { +namespace vision { +namespace segmentation { + +PaddleSegModel::PaddleSegModel(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) + : preprocessor_(config_file), postprocessor_(config_file) { + if (model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + } else { + valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + valid_rknpu_backends = {Backend::RKNPU2}; + valid_timvx_backends = {Backend::LITE}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_directml_backends = {Backend::ORT}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +std::unique_ptr PaddleSegModel::Clone() const { + std::unique_ptr clone_model = + ultrainfer::utils::make_unique(PaddleSegModel(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool PaddleSegModel::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + return true; +} + +bool PaddleSegModel::Predict(cv::Mat *im, SegmentationResult *result) { + return Predict(*im, result); +} + +bool PaddleSegModel::Predict(const cv::Mat &im, SegmentationResult *result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool PaddleSegModel::BatchPredict(const std::vector &imgs, + std::vector *results) { + std::vector fd_images = WrapMat(imgs); + // Record the shape of input images + std::map>> imgs_info; + preprocessor_.SetImgsInfo(&imgs_info); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + if (!postprocessor_.Run(reused_output_tensors_, results, imgs_info)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} +} // namespace segmentation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h new file mode 100755 index 0000000000..41d02c1da7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h @@ -0,0 +1,99 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/segmentation/ppseg/postprocessor.h" +#include "ultrainfer/vision/segmentation/ppseg/preprocessor.h" + +namespace ultrainfer { +namespace vision { +/** \brief All segmentation model APIs are defined inside this namespace + * + */ +namespace segmentation { + +/*! @brief PaddleSeg serials model object used when to load a PaddleSeg model + * exported by PaddleSeg repository + */ +class ULTRAINFER_DECL PaddleSegModel : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g unet/model.pdmodel + * \param[in] params_file Path of parameter file, e.g unet/model.pdiparams, if + * the model format is ONNX, this parameter will be ignored \param[in] + * config_file Path of configuration file for deployment, e.g unet/deploy.yml + * \param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends` \param[in] + * model_format Model format of the loaded model, default is Paddle format + */ + PaddleSegModel(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /** \brief Clone a new PaddleSegModel with less memory usage when multiple + * instances of the same model are created + * + * \return new PaddleDetModel* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "PaddleSeg"; } + + /** \brief DEPRECATED Predict the segmentation result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output segmentation + * result will be writen to this structure \return true if the segmentation + * prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat *im, SegmentationResult *result); + + /** \brief Predict the segmentation result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result The output segmentation + * result will be writen to this structure \return true if the segmentation + * prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat &im, SegmentationResult *result); + + /** \brief Predict the segmentation results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output segmentation result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector &imgs, + std::vector *results); + + /// Get preprocessor reference of PaddleSegModel + virtual PaddleSegPreprocessor &GetPreprocessor() { return preprocessor_; } + + /// Get postprocessor reference of PaddleSegModel + virtual PaddleSegPostprocessor &GetPostprocessor() { return postprocessor_; } + +protected: + bool Initialize(); + PaddleSegPreprocessor preprocessor_; + PaddleSegPostprocessor postprocessor_; +}; + +} // namespace segmentation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc new file mode 100755 index 0000000000..62d6c83e10 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc @@ -0,0 +1,291 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/segmentation/ppseg/postprocessor.h" +#include "ultrainfer/function/cast.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace segmentation { + +PaddleSegPostprocessor::PaddleSegPostprocessor(const std::string &config_file) { + FDASSERT(ReadFromConfig(config_file), + "Failed to create PaddleSegPreprocessor."); + initialized_ = true; +} + +bool PaddleSegPostprocessor::ReadFromConfig(const std::string &config_file) { + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file + << ", maybe you should check this file." << std::endl; + return false; + } + + if (cfg["Deploy"]["output_op"]) { + std::string output_op = cfg["Deploy"]["output_op"].as(); + if (output_op == "softmax") { + is_with_softmax_ = true; + is_with_argmax_ = false; + } else if (output_op == "argmax") { + is_with_softmax_ = false; + is_with_argmax_ = true; + } else if (output_op == "none") { + is_with_softmax_ = false; + is_with_argmax_ = false; + } else { + FDERROR << "Unexcepted output_op operator in deploy.yml: " << output_op + << "." << std::endl; + return false; + } + } + return true; +} + +bool PaddleSegPostprocessor::SliceOneResultFromBatchInferResults( + const FDTensor &infer_results, FDTensor *infer_result, + const std::vector &infer_result_shape, const int64_t &start_idx) { + int64_t infer_batch = infer_results.shape[0]; + if (infer_batch == 1) { + *infer_result = infer_results; + // batch is 1, so ignore + infer_result->shape = infer_result_shape; + } else { + if (infer_results.dtype == FDDataType::FP32) { + const float_t *infer_results_ptr = + reinterpret_cast(infer_results.CpuData()) + + start_idx; + infer_result->SetExternalData( + infer_result_shape, FDDataType::FP32, + reinterpret_cast(const_cast(infer_results_ptr))); + } else if (infer_results.dtype == FDDataType::INT64) { + const int64_t *infer_results_ptr = + reinterpret_cast(infer_results.CpuData()) + + start_idx; + infer_result->SetExternalData( + infer_result_shape, FDDataType::INT64, + reinterpret_cast(const_cast(infer_results_ptr))); + } else if (infer_results.dtype == FDDataType::INT32) { + const int32_t *infer_results_ptr = + reinterpret_cast(infer_results.CpuData()) + + start_idx; + infer_result->SetExternalData( + infer_result_shape, FDDataType::INT32, + reinterpret_cast(const_cast(infer_results_ptr))); + } else if (infer_results.dtype == FDDataType::UINT8) { + const uint8_t *infer_results_ptr = + reinterpret_cast(infer_results.CpuData()) + + start_idx; + infer_result->SetExternalData( + infer_result_shape, FDDataType::UINT8, + reinterpret_cast(const_cast(infer_results_ptr))); + } else { + FDASSERT( + false, + "Require the data type for slicing is int64, fp32 or int32, but now " + "it's %s.", + Str(infer_results.dtype).c_str()) + return false; + } + } + return true; +} + +bool PaddleSegPostprocessor::ProcessWithScoreResult( + const FDTensor &infer_result, const int64_t &out_num, + SegmentationResult *result) { + const uint8_t *argmax_infer_result_buffer = nullptr; + const float_t *score_infer_result_buffer = nullptr; + FDTensor argmax_infer_result; + FDTensor max_score_result; + std::vector reduce_dim{-1}; + function::ArgMax(infer_result, &argmax_infer_result, -1, FDDataType::UINT8); + function::Max(infer_result, &max_score_result, reduce_dim); + score_infer_result_buffer = + reinterpret_cast(max_score_result.CpuData()); + std::memcpy(result->score_map.data(), score_infer_result_buffer, + out_num * sizeof(float_t)); + + argmax_infer_result_buffer = + reinterpret_cast(argmax_infer_result.CpuData()); + + std::memcpy(result->label_map.data(), argmax_infer_result_buffer, + out_num * sizeof(uint8_t)); + + return true; +} + +bool PaddleSegPostprocessor::ProcessWithLabelResult( + const FDTensor &infer_result, const int64_t &out_num, + SegmentationResult *result) { + if (infer_result.dtype == FDDataType::INT64) { + const int64_t *infer_result_buffer = + reinterpret_cast(infer_result.CpuData()); + for (int i = 0; i < out_num; i++) { + result->label_map[i] = static_cast(*(infer_result_buffer + i)); + } + } else if (infer_result.dtype == FDDataType::INT32) { + const int32_t *infer_result_buffer = + reinterpret_cast(infer_result.CpuData()); + for (int i = 0; i < out_num; i++) { + result->label_map[i] = static_cast(*(infer_result_buffer + i)); + } + } else if (infer_result.dtype == FDDataType::UINT8) { + const uint8_t *infer_result_buffer = + reinterpret_cast(infer_result.CpuData()); + memcpy(result->label_map.data(), infer_result_buffer, + out_num * sizeof(uint8_t)); + } else { + FDASSERT( + false, + "Require the data type to process is int64, int32 or uint8, but now " + "it's %s.", + Str(infer_result.dtype).c_str()); + return false; + } + return true; +} + +bool PaddleSegPostprocessor::Run( + const std::vector &infer_results, + std::vector *results, + const std::map>> &imgs_info) { + // PaddleSeg has three types of inference output: + // 1. output with argmax and without softmax. 3-D matrix N(C)HW, Channel + // is batch_size, the element in matrix is classified label_id INT64 type. + // 2. output without argmax and without softmax. 4-D matrix NCHW, N(batch) + // is batch_size, Channel is the num of classes. The element is the logits + // of classes FP32 type + // 3. output without argmax and with softmax. 4-D matrix NCHW, the result + // of 2 with softmax layer + // Xdeploy output: + // 1. label_map + // 2. score_map(optional) + // 3. shape: 2-D HW + if (!initialized_) { + FDERROR << "Postprocessor is not initialized." << std::endl; + return false; + } + + FDDataType infer_results_dtype = infer_results[0].dtype; + FDASSERT(infer_results_dtype == FDDataType::INT64 || + infer_results_dtype == FDDataType::FP32 || + infer_results_dtype == FDDataType::INT32, + "Require the data type of output is int64, fp32 or int32, but now " + "it's %s.", + Str(infer_results_dtype).c_str()); + + auto iter_input_imgs_shape_list = imgs_info.find("shape_info"); + FDASSERT(iter_input_imgs_shape_list != imgs_info.end(), + "Cannot find shape_info from imgs_info."); + + // For Argmax Softmax function to store transformed result below + FDTensor transform_infer_results; + + int64_t infer_batch = infer_results[0].shape[0]; + int64_t infer_channel = 0; + int64_t infer_height = 0; + int64_t infer_width = 0; + + if (is_with_argmax_) { + // infer_results with argmax + infer_channel = 1; + infer_height = infer_results[0].shape[1]; + infer_width = infer_results[0].shape[2]; + } else { + // infer_results without argmax + infer_channel = 1; + infer_height = infer_results[0].shape[2]; + infer_width = infer_results[0].shape[3]; + if (store_score_map_) { + infer_channel = infer_results[0].shape[1]; + std::vector dim{0, 2, 3, 1}; + function::Transpose(infer_results[0], &transform_infer_results, dim); + if (!is_with_softmax_ && apply_softmax_) { + function::Softmax(transform_infer_results, &transform_infer_results, 1); + } + } else { + function::ArgMax(infer_results[0], &transform_infer_results, 1, + FDDataType::UINT8); + infer_results_dtype = transform_infer_results.dtype; + } + } + + int64_t infer_chw = infer_channel * infer_height * infer_width; + + results->resize(infer_batch); + for (int i = 0; i < infer_batch; i++) { + SegmentationResult *result = &((*results)[i]); + result->Clear(); + int64_t start_idx = i * infer_chw; + + FDTensor infer_result; + std::vector infer_result_shape = {infer_height, infer_width, + infer_channel}; + + if (is_with_argmax_) { + SliceOneResultFromBatchInferResults(infer_results[0], &infer_result, + infer_result_shape, start_idx); + } else { + SliceOneResultFromBatchInferResults(transform_infer_results, + &infer_result, infer_result_shape, + start_idx); + } + bool is_resized = false; + int input_height = iter_input_imgs_shape_list->second[i][0]; + int input_width = iter_input_imgs_shape_list->second[i][1]; + if (input_height != infer_height || input_width != infer_width) { + is_resized = true; + } + + FDMat mat; + // Resize interpration + int interpolation = cv::INTER_LINEAR; + if (is_resized) { + if (infer_results_dtype == FDDataType::INT64 || + infer_results_dtype == FDDataType::INT32) { + function::Cast(infer_result, &infer_result, FDDataType::UINT8); + // label map resize with nearest interpolation + interpolation = cv::INTER_NEAREST; + } + mat = std::move(Mat::Create(infer_result, ProcLib::OPENCV)); + Resize::Run(&mat, input_width, input_height, -1.0f, -1.0f, interpolation, + false, ProcLib::OPENCV); + mat.ShareWithTensor(&infer_result); + } + result->shape = infer_result.shape; + // output shape is 2-D HW layout, so out_num = H * W + int out_num = + std::accumulate(result->shape.begin(), result->shape.begin() + 2, 1, + std::multiplies()); + + if (!is_with_argmax_ && store_score_map_) { + // output with label_map and score_map + result->contain_score_map = true; + result->Resize(out_num); + ProcessWithScoreResult(infer_result, out_num, result); + } else { + result->Resize(out_num); + ProcessWithLabelResult(infer_result, out_num, result); + } + // HWC remove C + result->shape.erase(result->shape.begin() + 2); + } + return true; +} +} // namespace segmentation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h new file mode 100755 index 0000000000..97b1b93606 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace segmentation { +/*! @brief Postprocessor object for PaddleSeg serials model. + */ +class ULTRAINFER_DECL PaddleSegPostprocessor { +public: + /** \brief Create a postprocessor instance for PaddleSeg serials model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * ppliteseg/deploy.yaml + */ + explicit PaddleSegPostprocessor(const std::string &config_file); + + /** \brief Process the result of runtime and fill to SegmentationResult + * structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] imgs_info The original input images shape info map, key is + * "shape_info", value is vector> a{{height, width}} \return + * true if the postprocess successed, otherwise false + */ + virtual bool + Run(const std::vector &infer_results, + std::vector *results, + const std::map>> &imgs_info); + + /** \brief Get apply_softmax property of PaddleSeg model, default is false + */ + bool GetApplySoftmax() const { return apply_softmax_; } + + /// Set apply_softmax value, bool type required + void SetApplySoftmax(bool value) { apply_softmax_ = value; } + + /// Get store_score_map property of PaddleSeg model, default is false + bool GetStoreScoreMap() const { return store_score_map_; } + + /// Set store_score_map value, bool type required + void SetStoreScoreMap(bool value) { store_score_map_ = value; } + +private: + virtual bool ReadFromConfig(const std::string &config_file); + + virtual bool SliceOneResultFromBatchInferResults( + const FDTensor &infer_results, FDTensor *infer_result, + const std::vector &infer_result_shape, const int64_t &start_idx); + + virtual bool ProcessWithScoreResult(const FDTensor &infer_result, + const int64_t &out_num, + SegmentationResult *result); + + virtual bool ProcessWithLabelResult(const FDTensor &infer_result, + const int64_t &out_num, + SegmentationResult *result); + + bool is_with_softmax_ = false; + + bool is_with_argmax_ = true; + + bool apply_softmax_ = false; + + bool store_score_map_ = false; + + bool initialized_ = false; +}; + +} // namespace segmentation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc new file mode 100755 index 0000000000..67ecfa6bee --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc @@ -0,0 +1,130 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPSeg(pybind11::module &m) { + pybind11::class_(m, "PaddleSegPreprocessor") + .def(pybind11::init()) + .def("run", + [](vision::segmentation::PaddleSegPreprocessor &self, + std::vector &im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + // Record the shape of input images + std::map>> imgs_info; + std::vector outputs; + self.SetImgsInfo(&imgs_info); + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "PaddleSegPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, imgs_info); + ; + }) + .def("disable_normalize", + [](vision::segmentation::PaddleSegPreprocessor &self) { + self.DisableNormalize(); + }) + .def("disable_permute", + [](vision::segmentation::PaddleSegPreprocessor &self) { + self.DisablePermute(); + }) + .def_property( + "is_vertical_screen", + &vision::segmentation::PaddleSegPreprocessor::GetIsVerticalScreen, + &vision::segmentation::PaddleSegPreprocessor::SetIsVerticalScreen); + + pybind11::class_( + m, "PaddleSegModel") + .def(pybind11::init()) + .def("clone", + [](vision::segmentation::PaddleSegModel &self) { + return self.Clone(); + }) + .def("predict", + [](vision::segmentation::PaddleSegModel &self, + pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::SegmentationResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](vision::segmentation::PaddleSegModel &self, + std::vector &data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly( + "preprocessor", + &vision::segmentation::PaddleSegModel::GetPreprocessor) + .def_property_readonly( + "postprocessor", + &vision::segmentation::PaddleSegModel::GetPostprocessor); + + pybind11::class_( + m, "PaddleSegPostprocessor") + .def(pybind11::init()) + .def("run", + [](vision::segmentation::PaddleSegPostprocessor &self, + std::vector &inputs, + const std::map>> + &imgs_info) { + std::vector results; + if (!self.Run(inputs, &results, imgs_info)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PaddleSegPostprocessor."); + } + return results; + }) + .def("run", + [](vision::segmentation::PaddleSegPostprocessor &self, + std::vector &input_array, + const std::map>> + &imgs_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, imgs_info)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "PaddleSegPostprocessor."); + } + return results; + }) + .def_property( + "apply_softmax", + &vision::segmentation::PaddleSegPostprocessor::GetApplySoftmax, + &vision::segmentation::PaddleSegPostprocessor::SetApplySoftmax) + .def_property( + "store_score_map", + &vision::segmentation::PaddleSegPostprocessor::GetStoreScoreMap, + &vision::segmentation::PaddleSegPostprocessor::SetStoreScoreMap); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc new file mode 100755 index 0000000000..4ed2c15b65 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc @@ -0,0 +1,180 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/segmentation/ppseg/preprocessor.h" + +#include "ultrainfer/function/concat.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace segmentation { + +PaddleSegPreprocessor::PaddleSegPreprocessor(const std::string &config_file) { + this->config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), + "Failed to create PaddleSegPreprocessor."); + initialized_ = true; +} + +bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + processors_.push_back(std::make_shared()); + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + + if (cfg["Deploy"]["transforms"]) { + auto preprocess_cfg = cfg["Deploy"]["transforms"]; + for (const auto &op : preprocess_cfg) { + FDASSERT(op.IsMap(), + "Require the transform information in yaml be Map type."); + if (op["type"].as() == "Normalize") { + if (!disable_normalize_) { + std::vector mean = {0.5, 0.5, 0.5}; + std::vector std = {0.5, 0.5, 0.5}; + if (op["mean"]) { + mean = op["mean"].as>(); + } + if (op["std"]) { + std = op["std"].as>(); + } + processors_.push_back(std::make_shared(mean, std)); + } + } else if (op["type"].as() == "Resize") { + is_contain_resize_op_ = true; + const auto &target_size = op["target_size"]; + int resize_width = target_size[0].as(); + int resize_height = target_size[1].as(); + processors_.push_back( + std::make_shared(resize_width, resize_height)); + } else { + std::string op_name = op["type"].as(); + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + } + if (cfg["Deploy"]["input_shape"]) { + auto input_shape = cfg["Deploy"]["input_shape"]; + int input_height = input_shape[2].as(); + int input_width = input_shape[3].as(); + if (input_height != -1 && input_width != -1 && !is_contain_resize_op_) { + is_contain_resize_op_ = true; + processors_.insert(processors_.begin(), + std::make_shared(input_width, input_height)); + } + } + if (!disable_permute_) { + processors_.push_back(std::make_shared()); + } + + // Fusion will improve performance + FuseTransforms(&processors_); + return true; +} + +bool PaddleSegPreprocessor::Apply(FDMatBatch *image_batch, + std::vector *outputs) { + std::vector *images = image_batch->mats; + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." + << std::endl; + return false; + } + std::vector> shape_info; + for (const auto &image : *images) { + shape_info.push_back( + {static_cast(image.Height()), static_cast(image.Width())}); + } + (*imgs_info_)["shape_info"] = shape_info; + for (size_t i = 0; i < processors_.size(); ++i) { + if (processors_[i]->Name() == "Resize") { + auto processor = dynamic_cast(processors_[i].get()); + int resize_width = -1; + int resize_height = -1; + std::tie(resize_width, resize_height) = processor->GetWidthAndHeight(); + if (is_vertical_screen_ && (resize_width > resize_height)) { + if (!(processor->SetWidthAndHeight(resize_height, resize_width))) { + FDERROR << "Failed to set width and height of " + << processors_[i]->Name() << " processor." << std::endl; + } + } + break; + } + } + size_t img_num = images->size(); + // Batch preprocess : resize all images to the largest image shape in batch + if (!is_contain_resize_op_ && img_num > 1) { + int max_width = 0; + int max_height = 0; + for (size_t i = 0; i < img_num; ++i) { + max_width = std::max(max_width, ((*images)[i]).Width()); + max_height = std::max(max_height, ((*images)[i]).Height()); + } + pre_resize_op_->SetWidthAndHeight(max_width, max_height); + for (size_t i = 0; i < img_num; ++i) { + if (!(*pre_resize_op_)(&(*images)[i])) { + FDERROR << "Failed to batch resize max_width and max_height" + << std::endl; + } + } + } + for (size_t i = 0; i < img_num; ++i) { + for (size_t j = 0; j < processors_.size(); ++j) { + if (!(*(processors_[j].get()))(&((*images)[i]))) { + FDERROR << "Failed to process image data in " << processors_[i]->Name() + << "." << std::endl; + return false; + } + } + } + outputs->resize(1); + FDTensor *tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +void PaddleSegPreprocessor::DisableNormalize() { + this->disable_normalize_ = true; + // the DisableNormalize function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} +void PaddleSegPreprocessor::DisablePermute() { + this->disable_permute_ = true; + // the DisablePermute function will be invalid if the configuration file is + // loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + } +} +} // namespace segmentation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h new file mode 100755 index 0000000000..3810476753 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/vision/common/processors/manager.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { +namespace segmentation { +/*! @brief Preprocessor object for PaddleSeg serials model. + */ +class ULTRAINFER_DECL PaddleSegPreprocessor : public ProcessorManager { +public: + /** \brief Create a preprocessor instance for PaddleSeg serials model + * + * \param[in] config_file Path of configuration file for deployment, e.g + * ppliteseg/deploy.yaml + */ + explicit PaddleSegPreprocessor(const std::string &config_file); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch *image_batch, std::vector *outputs); + + /// Get is_vertical_screen property of PP-HumanSeg model, default is false + bool GetIsVerticalScreen() const { return is_vertical_screen_; } + + /// Set is_vertical_screen value, bool type required + void SetIsVerticalScreen(bool value) { is_vertical_screen_ = value; } + + /// This function will disable normalize in preprocessing step. + void DisableNormalize(); + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute(); + /// This function will set imgs_info_ in PaddleSegPreprocessor + void SetImgsInfo( + std::map>> *imgs_info) { + imgs_info_ = imgs_info; + } + /// This function will get imgs_info_ in PaddleSegPreprocessor + std::map>> *GetImgsInfo() { + return imgs_info_; + } + +private: + virtual bool BuildPreprocessPipelineFromConfig(); + std::vector> processors_; + std::string config_file_; + + /** \brief For PP-HumanSeg model, set true if the input image is vertical + * image(height > width), default value is false + */ + bool is_vertical_screen_ = false; + + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + + bool is_contain_resize_op_ = false; + + bool initialized_ = false; + + std::map>> *imgs_info_; + std::shared_ptr pre_resize_op_ = std::make_shared(0, 0); +}; + +} // namespace segmentation +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc new file mode 100755 index 0000000000..2e5706020f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc @@ -0,0 +1,26 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindPPSeg(pybind11::module &m); + +void BindSegmentation(pybind11::module &m) { + auto segmentation_module = + m.def_submodule("segmentation", "Image semantic segmentation models."); + BindPPSeg(segmentation_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc new file mode 100755 index 0000000000..ae1bc25554 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/sr/ppsr/basicvsr.h" + +namespace ultrainfer { +namespace vision { +namespace sr { + +BasicVSR::BasicVSR(const std::string &model_file, + const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + // unsupported ORT backend + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::PDINFER, Backend::TRT, Backend::ORT}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + + initialized = Initialize(); +} +} // namespace sr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h new file mode 100755 index 0000000000..2c6a35390a --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h @@ -0,0 +1,43 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h" + +namespace ultrainfer { +namespace vision { +namespace sr { + +class ULTRAINFER_DECL BasicVSR : public PPMSVSR { +public: + /** + * Set path of model file and configuration file, and the configuration of + * runtime + * @param[in] model_file Path of model file, e.g BasicVSR/model.pdmodel + * @param[in] params_file Path of parameter file, e.g BasicVSR/model.pdiparams + * @param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends` + * @param[in] model_format Model format of the loaded model, default is Paddle + * format + */ + BasicVSR(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + /// model name contained BasicVSR + std::string ModelName() const override { return "BasicVSR"; } +}; + +} // namespace sr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc new file mode 100755 index 0000000000..dc2905a3e7 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/sr/ppsr/edvr.h" + +namespace ultrainfer { +namespace vision { +namespace sr { + +EDVR::EDVR(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + // unsupported ORT backend + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::PDINFER, Backend::TRT, Backend::ORT}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + + initialized = Initialize(); +} + +bool EDVR::Postprocess(std::vector &infer_results, + std::vector &results) { + // group to image + // output_shape is [b, n, c, h, w] n = frame_nums b=1(default) + // b and n is dependence export model shape + // see + // https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/video_super_resolution.md + auto output_shape = infer_results[0].shape; + // EDVR + int h_ = output_shape[2]; + int w_ = output_shape[3]; + int c_ = output_shape[1]; + int frame_num = 1; + float *out_data = static_cast(infer_results[0].Data()); + cv::Mat temp = cv::Mat::zeros(h_, w_, CV_32FC3); // RGB image + int pix_num = h_ * w_; + int frame_pix_num = pix_num * c_; + for (int frame = 0; frame < frame_num; frame++) { + int index = 0; + for (int h = 0; h < h_; ++h) { + for (int w = 0; w < w_; ++w) { + temp.at(h, w) = { + out_data[2 * pix_num + index + frame_pix_num * frame], + out_data[pix_num + index + frame_pix_num * frame], + out_data[index + frame_pix_num * frame]}; + index += 1; + } + } + // tmp data type is float[0-1.0],convert to uint type + cv::Mat res = cv::Mat::zeros(temp.size(), CV_8UC3); + temp.convertTo(res, CV_8UC3, 255); + results.push_back(res); + } + return true; +} +} // namespace sr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h new file mode 100755 index 0000000000..cca88716e8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h" + +namespace ultrainfer { +namespace vision { +namespace sr { + +class ULTRAINFER_DECL EDVR : public PPMSVSR { +public: + /** + * Set path of model file and configuration file, and the configuration of + * runtime + * @param[in] model_file Path of model file, e.g EDVR/model.pdmodel + * @param[in] params_file Path of parameter file, e.g EDVR/model.pdiparams + * @param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends` + * @param[in] model_format Model format of the loaded model, default is Paddle + * format + */ + EDVR(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + /// model name contained EDVR + std::string ModelName() const override { return "EDVR"; } + +private: + bool Postprocess(std::vector &infer_results, + std::vector &results) override; +}; +} // namespace sr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h new file mode 100755 index 0000000000..91d3c19b19 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h @@ -0,0 +1,18 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "ultrainfer/vision/sr/ppsr/basicvsr.h" +#include "ultrainfer/vision/sr/ppsr/edvr.h" +#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h" diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc new file mode 100755 index 0000000000..d60125c939 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc @@ -0,0 +1,130 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h" + +namespace ultrainfer { +namespace vision { +namespace sr { + +PPMSVSR::PPMSVSR(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + // unsupported ORT backend + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::PDINFER, Backend::TRT, Backend::ORT}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + + initialized = Initialize(); +} + +bool PPMSVSR::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + mean_ = {0., 0., 0.}; + scale_ = {1., 1., 1.}; + return true; +} + +bool PPMSVSR::Preprocess(Mat *mat, std::vector &output) { + BGR2RGB::Run(mat); + Normalize::Run(mat, mean_, scale_, true); + HWC2CHW::Run(mat); + // Csat float + float *ptr = static_cast(mat->Data()); + size_t size = mat->Width() * mat->Height() * mat->Channels(); + output = std::vector(ptr, ptr + size); + return true; +} + +bool PPMSVSR::Predict(std::vector &imgs, + std::vector &results) { + // Theoretically, the more frame nums there are, the better the result will + // be, but it will lead to a significant increase in memory + int frame_num = imgs.size(); + int rows = imgs[0].rows; + int cols = imgs[0].cols; + int channels = imgs[0].channels(); + std::vector input_tensors; + input_tensors.resize(1); + std::vector all_data_temp; + for (int i = 0; i < frame_num; i++) { + Mat mat(imgs[i]); + std::vector data_temp; + Preprocess(&mat, data_temp); + all_data_temp.insert(all_data_temp.end(), data_temp.begin(), + data_temp.end()); + } + // share memory in order to avoid memory copy, data type must be float32 + input_tensors[0].SetExternalData({1, frame_num, channels, rows, cols}, + FDDataType::FP32, all_data_temp.data()); + input_tensors[0].shape = {1, frame_num, channels, rows, cols}; + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + if (!Postprocess(output_tensors, results)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +bool PPMSVSR::Postprocess(std::vector &infer_results, + std::vector &results) { + // group to image + // output_shape is [b, n, c, h, w] n = frame_nums b=1(default) + // b and n is dependence export model shape + // see + // https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/video_super_resolution.md + auto output_shape = infer_results[0].shape; + // PP-MSVSR + int h_ = output_shape[3]; + int w_ = output_shape[4]; + int c_ = output_shape[2]; + int frame_num = output_shape[1]; + + float *out_data = static_cast(infer_results[0].Data()); + cv::Mat temp = cv::Mat::zeros(h_, w_, CV_32FC3); // RGB image + int pix_num = h_ * w_; + int frame_pix_num = pix_num * c_; + for (int frame = 0; frame < frame_num; frame++) { + int index = 0; + for (int h = 0; h < h_; ++h) { + for (int w = 0; w < w_; ++w) { + temp.at(h, w) = { + out_data[2 * pix_num + index + frame_pix_num * frame], + out_data[pix_num + index + frame_pix_num * frame], + out_data[index + frame_pix_num * frame]}; + index += 1; + } + } + // tmp data type is float[0-1.0],convert to uint type + cv::Mat res = cv::Mat::zeros(temp.size(), CV_8UC3); + temp.convertTo(res, CV_8UC3, 255); + results.push_back(res); + } + return true; +} +} // namespace sr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h new file mode 100755 index 0000000000..7a360cb908 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h @@ -0,0 +1,63 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" + +namespace ultrainfer { +namespace vision { +namespace sr { + +class ULTRAINFER_DECL PPMSVSR : public UltraInferModel { +public: + /** + * Set path of model file and configuration file, and the configuration of + * runtime + * @param[in] model_file Path of model file, e.g PPMSVSR/model.pdmodel + * @param[in] params_file Path of parameter file, e.g PPMSVSR/model.pdiparams + * @param[in] custom_option RuntimeOption for inference, the default will use + * cpu, and choose the backend defined in `valid_cpu_backends` + * @param[in] model_format Model format of the loaded model, default is Paddle + * format + */ + PPMSVSR(const std::string &model_file, const std::string ¶ms_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + /// model name contained PP-MSVSR。 + std::string ModelName() const override { return "PPMSVSR"; } + /** + * get super resolution frame sequence + * @param[in] imgs origin frame sequences + * @param[in] results super resolution frame sequence + * @return true if the prediction successed, otherwise false + */ + virtual bool Predict(std::vector &imgs, + std::vector &results); + +protected: + PPMSVSR(){}; + + virtual bool Initialize(); + + virtual bool Preprocess(Mat *mat, std::vector &output); + + virtual bool Postprocess(std::vector &infer_results, + std::vector &results); + + std::vector mean_; + std::vector scale_; +}; +} // namespace sr +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc new file mode 100755 index 0000000000..23f095fc8f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPSR(pybind11::module &m) { + pybind11::class_(m, "PPMSVSR") + .def(pybind11::init()) + .def("predict", + [](vision::sr::PPMSVSR &self, std::vector &datas) { + std::vector inputs; + for (auto &data : datas) { + auto mat = PyArrayToCvMat(data); + inputs.push_back(mat); + } + std::vector res; + std::vector res_pyarray; + self.Predict(inputs, res); + for (auto &img : res) { + auto ret = pybind11::array_t( + {img.rows, img.cols, img.channels()}, img.data); + res_pyarray.push_back(ret); + } + return res_pyarray; + }); + pybind11::class_(m, "EDVR") + .def(pybind11::init()) + .def("predict", + [](vision::sr::EDVR &self, std::vector &datas) { + std::vector inputs; + for (auto &data : datas) { + auto mat = PyArrayToCvMat(data); + inputs.push_back(mat); + } + std::vector res; + std::vector res_pyarray; + self.Predict(inputs, res); + for (auto &img : res) { + auto ret = pybind11::array_t( + {img.rows, img.cols, img.channels()}, img.data); + res_pyarray.push_back(ret); + } + return res_pyarray; + }); + pybind11::class_(m, "BasicVSR") + .def(pybind11::init()) + .def("predict", + [](vision::sr::BasicVSR &self, std::vector &datas) { + std::vector inputs; + for (auto &data : datas) { + auto mat = PyArrayToCvMat(data); + inputs.push_back(mat); + } + std::vector res; + std::vector res_pyarray; + self.Predict(inputs, res); + for (auto &img : res) { + auto ret = pybind11::array_t( + {img.rows, img.cols, img.channels()}, img.data); + res_pyarray.push_back(ret); + } + return res_pyarray; + }); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc new file mode 100755 index 0000000000..3ae1a47453 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindPPSR(pybind11::module &m); + +void BindSR(pybind11::module &m) { + auto sr_module = m.def_submodule("sr", "sr(super resolution) submodule"); + BindPPSR(sr_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc new file mode 100755 index 0000000000..db32a5701b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc @@ -0,0 +1,389 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/gatagat/lap/blob/master/lap/lapjv.cpp +// Ths copyright of gatagat/lap is as follows: +// MIT License + +#include +#include +#include + +#include "ultrainfer/vision/tracking/pptracking/lapjv.h" + +namespace ultrainfer { +namespace vision { +namespace tracking { + +/** Column-reduction and reduction transfer for a dense cost matrix. + */ +int _ccrrt_dense(const int n, float *cost[], int *free_rows, int *x, int *y, + float *v) { + int n_free_rows; + bool *unique; + + for (int i = 0; i < n; i++) { + x[i] = -1; + v[i] = LARGE; + y[i] = 0; + } + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + const float c = cost[i][j]; + if (c < v[j]) { + v[j] = c; + y[j] = i; + } + } + } + NEW(unique, bool, n); + memset(unique, TRUE, n); + { + int j = n; + do { + j--; + const int i = y[j]; + if (x[i] < 0) { + x[i] = j; + } else { + unique[i] = FALSE; + y[j] = -1; + } + } while (j > 0); + } + n_free_rows = 0; + for (int i = 0; i < n; i++) { + if (x[i] < 0) { + free_rows[n_free_rows++] = i; + } else if (unique[i]) { + const int j = x[i]; + float min = LARGE; + for (int j2 = 0; j2 < n; j2++) { + if (j2 == static_cast(j)) { + continue; + } + const float c = cost[i][j2] - v[j2]; + if (c < min) { + min = c; + } + } + v[j] -= min; + } + } + FREE(unique); + return n_free_rows; +} + +/** Augmenting row reduction for a dense cost matrix. + */ +int _carr_dense(const int n, float *cost[], const int n_free_rows, + int *free_rows, int *x, int *y, float *v) { + int current = 0; + int new_free_rows = 0; + int rr_cnt = 0; + while (current < n_free_rows) { + int i0; + int j1, j2; + float v1, v2, v1_new; + bool v1_lowers; + + rr_cnt++; + const int free_i = free_rows[current++]; + j1 = 0; + v1 = cost[free_i][0] - v[0]; + j2 = -1; + v2 = LARGE; + for (int j = 1; j < n; j++) { + const float c = cost[free_i][j] - v[j]; + if (c < v2) { + if (c >= v1) { + v2 = c; + j2 = j; + } else { + v2 = v1; + v1 = c; + j2 = j1; + j1 = j; + } + } + } + i0 = y[j1]; + v1_new = v[j1] - (v2 - v1); + v1_lowers = v1_new < v[j1]; + if (rr_cnt < current * n) { + if (v1_lowers) { + v[j1] = v1_new; + } else if (i0 >= 0 && j2 >= 0) { + j1 = j2; + i0 = y[j2]; + } + if (i0 >= 0) { + if (v1_lowers) { + free_rows[--current] = i0; + } else { + free_rows[new_free_rows++] = i0; + } + } + } else { + if (i0 >= 0) { + free_rows[new_free_rows++] = i0; + } + } + x[free_i] = j1; + y[j1] = free_i; + } + return new_free_rows; +} + +/** Find columns with minimum d[j] and put them on the SCAN list. + */ +int _find_dense(const int n, int lo, float *d, int *cols, int *y) { + int hi = lo + 1; + float mind = d[cols[lo]]; + for (int k = hi; k < n; k++) { + int j = cols[k]; + if (d[j] <= mind) { + if (d[j] < mind) { + hi = lo; + mind = d[j]; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + return hi; +} + +// Scan all columns in TODO starting from arbitrary column in SCAN +// and try to decrease d of the TODO columns using the SCAN column. +int _scan_dense(const int n, float *cost[], int *plo, int *phi, float *d, + int *cols, int *pred, int *y, float *v) { + int lo = *plo; + int hi = *phi; + float h, cred_ij; + + while (lo != hi) { + int j = cols[lo++]; + const int i = y[j]; + const float mind = d[j]; + h = cost[i][j] - v[j] - mind; + // For all columns in TODO + for (int k = hi; k < n; k++) { + j = cols[k]; + cred_ij = cost[i][j] - v[j] - h; + if (cred_ij < d[j]) { + d[j] = cred_ij; + pred[j] = i; + if (cred_ij == mind) { + if (y[j] < 0) { + return j; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + } + } + *plo = lo; + *phi = hi; + return -1; +} + +/** Single iteration of modified Dijkstra shortest path algorithm as explained + * in the JV paper. + * + * This is a dense matrix version. + * + * \return The closest free column index. + */ +int find_path_dense(const int n, float *cost[], const int start_i, int *y, + float *v, int *pred) { + int lo = 0, hi = 0; + int final_j = -1; + int n_ready = 0; + int *cols; + float *d; + + NEW(cols, int, n); + NEW(d, float, n); + + for (int i = 0; i < n; i++) { + cols[i] = i; + pred[i] = start_i; + d[i] = cost[start_i][i] - v[i]; + } + while (final_j == -1) { + // No columns left on the SCAN list. + if (lo == hi) { + n_ready = lo; + hi = _find_dense(n, lo, d, cols, y); + for (int k = lo; k < hi; k++) { + const int j = cols[k]; + if (y[j] < 0) { + final_j = j; + } + } + } + if (final_j == -1) { + final_j = _scan_dense(n, cost, &lo, &hi, d, cols, pred, y, v); + } + } + + { + const float mind = d[cols[lo]]; + for (int k = 0; k < n_ready; k++) { + const int j = cols[k]; + v[j] += d[j] - mind; + } + } + + FREE(cols); + FREE(d); + + return final_j; +} + +/** Augment for a dense cost matrix. + */ +int _ca_dense(const int n, float *cost[], const int n_free_rows, int *free_rows, + int *x, int *y, float *v) { + int *pred; + + NEW(pred, int, n); + + for (int *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) { + int i = -1, j; + int k = 0; + + j = find_path_dense(n, cost, *pfree_i, y, v, pred); + while (i != *pfree_i) { + i = pred[j]; + y[j] = i; + SWAP_INDICES(j, x[i]); + k++; + } + } + FREE(pred); + return 0; +} + +/** Solve dense sparse LAP. + */ +int lapjv_internal(const cv::Mat &cost, const bool extend_cost, + const float cost_limit, int *x, int *y) { + int n_rows = cost.rows; + int n_cols = cost.cols; + int n; + if (n_rows == n_cols) { + n = n_rows; + } else if (!extend_cost) { + throw std::invalid_argument( + "Square cost array expected. If cost is intentionally non-square, pass " + "extend_cost=True."); + } + + // Get extend cost + if (extend_cost || cost_limit < LARGE) { + n = n_rows + n_cols; + } + cv::Mat cost_expand(n, n, CV_32F); + float expand_value; + if (cost_limit < LARGE) { + expand_value = cost_limit / 2; + } else { + double max_v; + minMaxLoc(cost, nullptr, &max_v); + expand_value = static_cast(max_v) + 1.; + } + + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cost_expand.at(i, j) = expand_value; + if (i >= n_rows && j >= n_cols) { + cost_expand.at(i, j) = 0; + } else if (i < n_rows && j < n_cols) { + cost_expand.at(i, j) = cost.at(i, j); + } + } + } + + // Convert Mat to pointer array + float **cost_ptr; + NEW(cost_ptr, float *, n); + for (int i = 0; i < n; ++i) { + NEW(cost_ptr[i], float, n); + } + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cost_ptr[i][j] = cost_expand.at(i, j); + } + } + + int ret; + int *free_rows; + float *v; + int *x_c; + int *y_c; + + NEW(free_rows, int, n); + NEW(v, float, n); + NEW(x_c, int, n); + NEW(y_c, int, n); + + ret = _ccrrt_dense(n, cost_ptr, free_rows, x_c, y_c, v); + int i = 0; + while (ret > 0 && i < 2) { + ret = _carr_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v); + i++; + } + if (ret > 0) { + ret = _ca_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v); + } + FREE(v); + FREE(free_rows); + for (int i = 0; i < n; ++i) { + FREE(cost_ptr[i]); + } + FREE(cost_ptr); + if (ret != 0) { + if (ret == -1) { + throw "Out of memory."; + } + throw "Unknown error (lapjv_internal)"; + } + // Get output of x, y, opt + for (int i = 0; i < n; ++i) { + if (i < n_rows) { + x[i] = x_c[i]; + if (x[i] >= n_cols) { + x[i] = -1; + } + } + if (i < n_cols) { + y[i] = y_c[i]; + if (y[i] >= n_rows) { + y[i] = -1; + } + } + } + + FREE(x_c); + FREE(y_c); + return ret; +} + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h new file mode 100755 index 0000000000..93dbbb531d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h @@ -0,0 +1,62 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/gatagat/lap/blob/master/lap/lapjv.h +// Ths copyright of gatagat/lap is as follows: +// MIT License + +#pragma once +#define LARGE 1000000 + +#if !defined TRUE +#define TRUE 1 +#endif +#if !defined FALSE +#define FALSE 0 +#endif + +#define NEW(x, t, n) \ + if ((x = reinterpret_cast(malloc(sizeof(t) * (n)))) == 0) { \ + return -1; \ + } +#define FREE(x) \ + if (x != 0) { \ + free(x); \ + x = 0; \ + } +#define SWAP_INDICES(a, b) \ + { \ + int_t _temp_index = a; \ + a = b; \ + b = _temp_index; \ + } +#include + +namespace ultrainfer { +namespace vision { +namespace tracking { + +typedef signed int int_t; +typedef unsigned int uint_t; +typedef double cost_t; +typedef char boolean; +typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; + +int lapjv_internal(const cv::Mat &cost, const bool extend_cost, + const float cost_limit, int *x, int *y); + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc new file mode 100755 index 0000000000..ffd6680499 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc @@ -0,0 +1,169 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/tracking/pptracking/letter_box_resize.h" +#include "ultrainfer/vision/common/processors/transform.h" + +namespace ultrainfer { +namespace vision { + +bool LetterBoxResize::ImplByOpenCV(Mat *mat) { + if (mat->Channels() != color_.size()) { + FDERROR << "LetterBoxResize: Require input channels equals to size of " + "color value, " + "but now channels = " + << mat->Channels() + << ", the size of color values = " << color_.size() << "." + << std::endl; + return false; + } + // generate scale_factor + int origin_w = mat->Width(); + int origin_h = mat->Height(); + int target_h = target_size_[0]; + int target_w = target_size_[1]; + float ratio_h = static_cast(target_h) / static_cast(origin_h); + float ratio_w = static_cast(target_w) / static_cast(origin_w); + float resize_scale = std::min(ratio_h, ratio_w); + // get_resized_shape + int new_shape_w = std::round(origin_w * resize_scale); + int new_shape_h = std::round(origin_h * resize_scale); + // calculate pad + float padw = (target_size_[1] - new_shape_w) / 2.; + float padh = (target_size_[0] - new_shape_h) / 2.; + int top = std::round(padh - 0.1); + int bottom = std::round(padh + 0.1); + int left = std::round(padw - 0.1); + int right = std::round(padw + 0.1); + Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false); + Pad::Run(mat, top, bottom, left, right, color_); + return true; +} + +#ifdef ENABLE_FLYCV +bool LetterBoxResize::ImplByFlyCV(Mat *mat) { + if (mat->Channels() != color_.size()) { + FDERROR << "LetterBoxResize: Require input channels equals to size of " + "color value, " + "but now channels = " + << mat->Channels() + << ", the size of color values = " << color_.size() << "." + << std::endl; + return false; + } + // generate scale_factor + int origin_w = mat->Width(); + int origin_h = mat->Height(); + int target_h = target_size_[0]; + int target_w = target_size_[1]; + float ratio_h = static_cast(target_h) / static_cast(origin_h); + float ratio_w = static_cast(target_w) / static_cast(origin_w); + float resize_scale = std::min(ratio_h, ratio_w); + // get_resized_shape + int new_shape_w = std::round(origin_w * resize_scale); + int new_shape_h = std::round(origin_h * resize_scale); + // calculate pad + float padw = (target_size_[1] - new_shape_w) / 2.; + float padh = (target_size_[0] - new_shape_h) / 2.; + int top = std::round(padh - 0.1); + int bottom = std::round(padh + 0.1); + int left = std::round(padw - 0.1); + int right = std::round(padw + 0.1); + Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false, + ProcLib::FLYCV); + Pad::Run(mat, top, bottom, left, right, color_, ProcLib::FLYCV); + return true; +} +#endif + +#ifdef ENABLE_CVCUDA +bool LetterBoxResize::ImplByCvCuda(Mat *mat) { + if (mat->Channels() != color_.size()) { + FDERROR << "LetterBoxResize: Require input channels equals to size of " + "color value, " + "but now channels = " + << mat->Channels() + << ", the size of color values = " << color_.size() << "." + << std::endl; + return false; + } + // generate scale_factor + int origin_w = mat->Width(); + int origin_h = mat->Height(); + int target_h = target_size_[0]; + int target_w = target_size_[1]; + float ratio_h = static_cast(target_h) / static_cast(origin_h); + float ratio_w = static_cast(target_w) / static_cast(origin_w); + float resize_scale = std::min(ratio_h, ratio_w); + // get_resized_shape + int new_shape_w = std::round(origin_w * resize_scale); + int new_shape_h = std::round(origin_h * resize_scale); + // calculate pad + float padw = (target_size_[1] - new_shape_w) / 2.; + float padh = (target_size_[0] - new_shape_h) / 2.; + int top = std::round(padh - 0.1); + int bottom = std::round(padh + 0.1); + int left = std::round(padw - 0.1); + int right = std::round(padw + 0.1); + Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false, + ProcLib::CVCUDA); + Pad::Run(mat, top, bottom, left, right, color_, ProcLib::CVCUDA); + return true; +} +#endif + +#ifdef ENABLE_CUDA +bool LetterBoxResize::ImplByCuda(Mat *mat) { + if (mat->Channels() != color_.size()) { + FDERROR << "LetterBoxResize: Require input channels equals to size of " + "color value, " + "but now channels = " + << mat->Channels() + << ", the size of color values = " << color_.size() << "." + << std::endl; + return false; + } + // generate scale_factor + int origin_w = mat->Width(); + int origin_h = mat->Height(); + int target_h = target_size_[0]; + int target_w = target_size_[1]; + float ratio_h = static_cast(target_h) / static_cast(origin_h); + float ratio_w = static_cast(target_w) / static_cast(origin_w); + float resize_scale = std::min(ratio_h, ratio_w); + // get_resized_shape + int new_shape_w = std::round(origin_w * resize_scale); + int new_shape_h = std::round(origin_h * resize_scale); + // calculate pad + float padw = (target_size_[1] - new_shape_w) / 2.; + float padh = (target_size_[0] - new_shape_h) / 2.; + int top = std::round(padh - 0.1); + int bottom = std::round(padh + 0.1); + int left = std::round(padw - 0.1); + int right = std::round(padw + 0.1); + Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false, + ProcLib::CUDA); + Pad::Run(mat, top, bottom, left, right, color_, ProcLib::CUDA); + return true; +} +#endif + +bool LetterBoxResize::Run(Mat *mat, const std::vector &target_size, + const std::vector &color, ProcLib lib) { + auto l = LetterBoxResize(target_size, color); + return l(mat, lib); +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h new file mode 100755 index 0000000000..634265dc10 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h @@ -0,0 +1,52 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/vision/common/processors/base.h" + +namespace ultrainfer { +namespace vision { + +class LetterBoxResize : public Processor { +public: + LetterBoxResize(const std::vector &target_size, + const std::vector &color) { + target_size_ = target_size; + color_ = color; + } + + std::string Name() override { return "LetterBoxResize"; } + bool ImplByOpenCV(Mat *mat) override; +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat *mat) override; +#endif +#ifdef ENABLE_CVCUDA + virtual bool ImplByCvCuda(FDMat *mat) override; +#endif + +#ifdef ENABLE_CUDA + virtual bool ImplByCuda(FDMat *mat); +#endif + + static bool Run(Mat *mat, const std::vector &target_size, + const std::vector &color, + ProcLib lib = ProcLib::DEFAULT); + +private: + std::vector target_size_; + std::vector color_; +}; +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc new file mode 100755 index 0000000000..b7d2611ead --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc @@ -0,0 +1,316 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/tracking/pptracking/model.h" + +#include "ultrainfer/vision/tracking/pptracking/letter_box_resize.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { +namespace tracking { + +PPTracking::PPTracking(const std::string &model_file, + const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option, + const ModelFormat &model_format) { + config_file_ = config_file; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + + initialized = Initialize(); +} + +bool PPTracking::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + + // Get draw_threshold for visualization + if (cfg["draw_threshold"].IsDefined()) { + draw_threshold_ = cfg["draw_threshold"].as(); + } else { + FDERROR << "Please set draw_threshold." << std::endl; + return false; + } + // Get config for tracker + if (cfg["tracker"].IsDefined()) { + if (cfg["tracker"]["conf_thres"].IsDefined()) { + conf_thresh_ = cfg["tracker"]["conf_thres"].as(); + } else { + std::cerr << "Please set conf_thres in tracker." << std::endl; + return false; + } + if (cfg["tracker"]["min_box_area"].IsDefined()) { + min_box_area_ = cfg["tracker"]["min_box_area"].as(); + } + if (cfg["tracker"]["tracked_thresh"].IsDefined()) { + tracked_thresh_ = cfg["tracker"]["tracked_thresh"].as(); + } + } + + processors_.push_back(std::make_shared()); + for (const auto &op : cfg["Preprocess"]) { + std::string op_name = op["type"].as(); + if (op_name == "Resize") { + bool keep_ratio = op["keep_ratio"].as(); + auto target_size = op["target_size"].as>(); + int interp = op["interp"].as(); + FDASSERT(target_size.size() == 2, + "Require size of target_size be 2, but now it's %lu.", + target_size.size()); + if (!keep_ratio) { + int width = target_size[1]; + int height = target_size[0]; + processors_.push_back( + std::make_shared(width, height, -1.0, -1.0, interp, false)); + } else { + int min_target_size = std::min(target_size[0], target_size[1]); + int max_target_size = std::max(target_size[0], target_size[1]); + std::vector max_size; + if (max_target_size > 0) { + max_size.push_back(max_target_size); + max_size.push_back(max_target_size); + } + processors_.push_back(std::make_shared( + min_target_size, interp, true, max_size)); + } + + } else if (op_name == "LetterBoxResize") { + auto target_size = op["target_size"].as>(); + FDASSERT(target_size.size() == 2, + "Require size of target_size be 2, but now it's %lu.", + target_size.size()); + std::vector color{127.0f, 127.0f, 127.0f}; + if (op["fill_value"].IsDefined()) { + color = op["fill_value"].as>(); + } + processors_.push_back( + std::make_shared(target_size, color)); + } else if (op_name == "NormalizeImage") { + auto mean = op["mean"].as>(); + auto std = op["std"].as>(); + bool is_scale = true; + if (op["is_scale"]) { + is_scale = op["is_scale"].as(); + } + std::string norm_type = "mean_std"; + if (op["norm_type"]) { + norm_type = op["norm_type"].as(); + } + if (norm_type != "mean_std") { + std::fill(mean.begin(), mean.end(), 0.0); + std::fill(std.begin(), std.end(), 1.0); + } + processors_.push_back(std::make_shared(mean, std, is_scale)); + } else if (op_name == "Permute") { + // Do nothing, do permute as the last operation + continue; + // processors_.push_back(std::make_shared()); + } else if (op_name == "Pad") { + auto size = op["size"].as>(); + auto value = op["fill_value"].as>(); + processors_.push_back(std::make_shared("float")); + processors_.push_back( + std::make_shared(size[1], size[0], value)); + } else if (op_name == "PadStride") { + auto stride = op["stride"].as(); + processors_.push_back( + std::make_shared(stride, std::vector(3, 0))); + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + processors_.push_back(std::make_shared()); + + FuseTransforms(&processors_); + return true; +} + +bool PPTracking::Initialize() { + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + return false; + } + if (!InitRuntime()) { + FDERROR << "Failed to initialize ultrainfer backend." << std::endl; + return false; + } + // create JDETracker instance + jdeTracker_ = std::unique_ptr(new JDETracker); + return true; +} + +bool PPTracking::Predict(cv::Mat *img, MOTResult *result) { + Mat mat(*img); + std::vector input_tensors; + + if (!Preprocess(&mat, &input_tensors)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + + if (!Postprocess(output_tensors, result)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + return true; +} + +bool PPTracking::Preprocess(Mat *mat, std::vector *outputs) { + int origin_w = mat->Width(); + int origin_h = mat->Height(); + + for (size_t i = 0; i < processors_.size(); ++i) { + if (!(*(processors_[i].get()))(mat)) { + FDERROR << "Failed to process image data in " << processors_[i]->Name() + << "." << std::endl; + return false; + } + } + + // LetterBoxResize(mat); + // Normalize::Run(mat,mean_,scale_,is_scale_); + // HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + + outputs->resize(3); + // image_shape + (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(0).name); + float *shape = static_cast((*outputs)[0].MutableData()); + shape[0] = mat->Height(); + shape[1] = mat->Width(); + // image + (*outputs)[1].name = InputInfoOfRuntime(1).name; + mat->ShareWithTensor(&((*outputs)[1])); + (*outputs)[1].ExpandDim(0); + // scale + (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(2).name); + float *scale = static_cast((*outputs)[2].MutableData()); + scale[0] = mat->Height() * 1.0 / origin_h; + scale[1] = mat->Width() * 1.0 / origin_w; + return true; +} + +void FilterDets(const float conf_thresh, const cv::Mat &dets, + std::vector *index) { + for (int i = 0; i < dets.rows; ++i) { + float score = *dets.ptr(i, 4); + if (score > conf_thresh) { + index->push_back(i); + } + } +} + +bool PPTracking::Postprocess(std::vector &infer_result, + MOTResult *result) { + auto bbox_shape = infer_result[0].shape; + auto bbox_data = static_cast(infer_result[0].Data()); + + auto emb_shape = infer_result[1].shape; + auto emb_data = static_cast(infer_result[1].Data()); + + cv::Mat dets(bbox_shape[0], 6, CV_32FC1, bbox_data); + cv::Mat emb(bbox_shape[0], emb_shape[1], CV_32FC1, emb_data); + + result->Clear(); + std::vector tracks; + std::vector valid; + FilterDets(conf_thresh_, dets, &valid); + cv::Mat new_dets, new_emb; + for (int i = 0; i < valid.size(); ++i) { + new_dets.push_back(dets.row(valid[i])); + new_emb.push_back(emb.row(valid[i])); + } + jdeTracker_->update(new_dets, new_emb, &tracks); + if (tracks.size() == 0) { + std::array box = { + int(*dets.ptr(0, 0)), int(*dets.ptr(0, 1)), + int(*dets.ptr(0, 2)), int(*dets.ptr(0, 3))}; + result->boxes.push_back(box); + result->ids.push_back(1); + result->scores.push_back(*dets.ptr(0, 4)); + } else { + std::vector::iterator titer; + for (titer = tracks.begin(); titer != tracks.end(); ++titer) { + if (titer->score < tracked_thresh_) { + continue; + } else { + float w = titer->ltrb[2] - titer->ltrb[0]; + float h = titer->ltrb[3] - titer->ltrb[1]; + bool vertical = w / h > 1.6; + float area = w * h; + if (area > min_box_area_ && !vertical) { + std::array box = {int(titer->ltrb[0]), int(titer->ltrb[1]), + int(titer->ltrb[2]), int(titer->ltrb[3])}; + result->boxes.push_back(box); + result->ids.push_back(titer->id); + result->scores.push_back(titer->score); + } + } + } + } + if (!is_record_trail_) + return true; + int nums = result->boxes.size(); + for (int i = 0; i < nums; i++) { + float center_x = (result->boxes[i][0] + result->boxes[i][2]) / 2; + float center_y = (result->boxes[i][1] + result->boxes[i][3]) / 2; + int id = result->ids[i]; + recorder_->Add(id, {int(center_x), int(center_y)}); + } + return true; +} + +void PPTracking::BindRecorder(TrailRecorder *recorder) { + recorder_ = recorder; + is_record_trail_ = true; +} + +void PPTracking::UnbindRecorder() { + is_record_trail_ = false; + std::map>>::iterator iter; + for (iter = recorder_->records.begin(); iter != recorder_->records.end(); + iter++) { + iter->second.clear(); + iter->second.shrink_to_fit(); + } + recorder_->records.clear(); + std::map>>().swap(recorder_->records); + recorder_ = nullptr; +} + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h new file mode 100755 index 0000000000..10e08d970b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/common/processors/transform.h" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/tracking/pptracking/tracker.h" +#include + +namespace ultrainfer { +namespace vision { +namespace tracking { +struct TrailRecorder { + std::map>> records; + void Add(int id, const std::array &record); +}; + +inline void TrailRecorder::Add(int id, const std::array &record) { + auto iter = records.find(id); + if (iter != records.end()) { + auto trail = records[id]; + trail.push_back(record); + records[id] = trail; + } else { + records[id] = {record}; + } +} + +class ULTRAINFER_DECL PPTracking : public UltraInferModel { +public: + /** \brief Set path of model file and configuration file, and the + * configuration of runtime + * + * \param[in] model_file Path of model file, e.g pptracking/model.pdmodel + * \param[in] params_file Path of parameter file, e.g + * pptracking/model.pdiparams, if the model format is ONNX, this parameter + * will be ignored \param[in] config_file Path of configuration file for + * deployment, e.g pptracking/infer_cfg.yml \param[in] custom_option + * RuntimeOption for inference, the default will use cpu, and choose the + * backend defined in `valid_cpu_backends` \param[in] model_format Model + * format of the loaded model, default is Paddle format + */ + PPTracking(const std::string &model_file, const std::string ¶ms_file, + const std::string &config_file, + const RuntimeOption &custom_option = RuntimeOption(), + const ModelFormat &model_format = ModelFormat::PADDLE); + + /// Get model's name + std::string ModelName() const override { return "pptracking"; } + + /** \brief Predict the detection result for an input image(consecutive) + * + * \param[in] im The input image data which is consecutive frame, comes from + * imread() or videoCapture.read() \param[in] result The output tracking + * result will be writen to this structure \return true if the prediction + * successed, otherwise false + */ + virtual bool Predict(cv::Mat *img, MOTResult *result); + /** \brief bind tracking trail struct + * + * \param[in] recorder The MOT trail will record the trail of object + */ + void BindRecorder(TrailRecorder *recorder); + /** \brief cancel binding and clear trail information + */ + void UnbindRecorder(); + +private: + bool BuildPreprocessPipelineFromConfig(); + + bool Initialize(); + + bool Preprocess(Mat *img, std::vector *outputs); + + bool Postprocess(std::vector &infer_result, MOTResult *result); + + std::vector> processors_; + std::string config_file_; + float draw_threshold_; + float conf_thresh_; + float tracked_thresh_; + float min_box_area_; + bool is_record_trail_ = false; + std::unique_ptr jdeTracker_; + TrailRecorder *recorder_ = nullptr; +}; + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc new file mode 100755 index 0000000000..52c0b67fb5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindPPTracking(pybind11::module &m) { + + pybind11::class_(m, "TrailRecorder") + .def(pybind11::init<>()) + .def_readwrite("records", &vision::tracking::TrailRecorder::records) + .def("add", &vision::tracking::TrailRecorder::Add); + pybind11::class_(m, + "PPTracking") + .def(pybind11::init()) + .def("predict", + [](vision::tracking::PPTracking &self, pybind11::array &data) { + auto mat = PyArrayToCvMat(data); + vision::MOTResult res; + self.Predict(&mat, &res); + return res; + }) + .def("bind_recorder", &vision::tracking::PPTracking::BindRecorder) + .def("unbind_recorder", &vision::tracking::PPTracking::UnbindRecorder); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc new file mode 100755 index 0000000000..0944cafa74 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc @@ -0,0 +1,297 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.cpp +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#include +#include +#include +#include + +#include "ultrainfer/vision/tracking/pptracking/lapjv.h" +#include "ultrainfer/vision/tracking/pptracking/tracker.h" + +#define mat2vec4f(m) \ + cv::Vec4f(*m.ptr(0, 0), *m.ptr(0, 1), *m.ptr(0, 2), \ + *m.ptr(0, 3)) + +namespace ultrainfer { +namespace vision { +namespace tracking { + +static std::map chi2inv95 = { + {1, 3.841459f}, {2, 5.991465f}, {3, 7.814728f}, + {4, 9.487729f}, {5, 11.070498f}, {6, 12.591587f}, + {7, 14.067140f}, {8, 15.507313f}, {9, 16.918978f}}; + +JDETracker::JDETracker() + : timestamp(0), max_lost_time(30), lambda(0.98f), det_thresh(0.3f) {} + +bool JDETracker::update(const cv::Mat &dets, const cv::Mat &emb, + std::vector *tracks) { + ++timestamp; + TrajectoryPool candidates(dets.rows); + for (int i = 0; i < dets.rows; ++i) { + float score = *dets.ptr(i, 1); + const cv::Mat <rb_ = dets(cv::Rect(2, i, 4, 1)); + cv::Vec4f ltrb = mat2vec4f(ltrb_); + const cv::Mat &embedding = emb(cv::Rect(0, i, emb.cols, 1)); + candidates[i] = Trajectory(ltrb, score, embedding); + } + + TrajectoryPtrPool tracked_trajectories; + TrajectoryPtrPool unconfirmed_trajectories; + for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) { + if (this->tracked_trajectories[i].is_activated) + tracked_trajectories.push_back(&this->tracked_trajectories[i]); + else + unconfirmed_trajectories.push_back(&this->tracked_trajectories[i]); + } + + TrajectoryPtrPool trajectory_pool = + tracked_trajectories + &(this->lost_trajectories); + + for (size_t i = 0; i < trajectory_pool.size(); ++i) + trajectory_pool[i]->predict(); + + Match matches; + std::vector mismatch_row; + std::vector mismatch_col; + + cv::Mat cost = motion_distance(trajectory_pool, candidates); + linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col); + + MatchIterator miter; + TrajectoryPtrPool activated_trajectories; + TrajectoryPtrPool retrieved_trajectories; + + for (miter = matches.begin(); miter != matches.end(); miter++) { + Trajectory *pt = trajectory_pool[miter->first]; + Trajectory &ct = candidates[miter->second]; + if (pt->state == Tracked) { + pt->update(&ct, timestamp); + activated_trajectories.push_back(pt); + } else { + pt->reactivate(&ct, count, timestamp); + retrieved_trajectories.push_back(pt); + } + } + + TrajectoryPtrPool next_candidates(mismatch_col.size()); + for (size_t i = 0; i < mismatch_col.size(); ++i) + next_candidates[i] = &candidates[mismatch_col[i]]; + + TrajectoryPtrPool next_trajectory_pool; + for (size_t i = 0; i < mismatch_row.size(); ++i) { + int j = mismatch_row[i]; + if (trajectory_pool[j]->state == Tracked) + next_trajectory_pool.push_back(trajectory_pool[j]); + } + + cost = iou_distance(next_trajectory_pool, next_candidates); + linear_assignment(cost, 0.5f, &matches, &mismatch_row, &mismatch_col); + + for (miter = matches.begin(); miter != matches.end(); miter++) { + Trajectory *pt = next_trajectory_pool[miter->first]; + Trajectory *ct = next_candidates[miter->second]; + if (pt->state == Tracked) { + pt->update(ct, timestamp); + activated_trajectories.push_back(pt); + } else { + pt->reactivate(ct, count, timestamp); + retrieved_trajectories.push_back(pt); + } + } + + TrajectoryPtrPool lost_trajectories; + for (size_t i = 0; i < mismatch_row.size(); ++i) { + Trajectory *pt = next_trajectory_pool[mismatch_row[i]]; + if (pt->state != Lost) { + pt->mark_lost(); + lost_trajectories.push_back(pt); + } + } + + TrajectoryPtrPool nnext_candidates(mismatch_col.size()); + for (size_t i = 0; i < mismatch_col.size(); ++i) + nnext_candidates[i] = next_candidates[mismatch_col[i]]; + cost = iou_distance(unconfirmed_trajectories, nnext_candidates); + linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col); + + for (miter = matches.begin(); miter != matches.end(); miter++) { + unconfirmed_trajectories[miter->first]->update( + nnext_candidates[miter->second], timestamp); + activated_trajectories.push_back(unconfirmed_trajectories[miter->first]); + } + + TrajectoryPtrPool removed_trajectories; + + for (size_t i = 0; i < mismatch_row.size(); ++i) { + unconfirmed_trajectories[mismatch_row[i]]->mark_removed(); + removed_trajectories.push_back(unconfirmed_trajectories[mismatch_row[i]]); + } + + for (size_t i = 0; i < mismatch_col.size(); ++i) { + if (nnext_candidates[mismatch_col[i]]->score < det_thresh) + continue; + nnext_candidates[mismatch_col[i]]->activate(count, timestamp); + activated_trajectories.push_back(nnext_candidates[mismatch_col[i]]); + } + + for (size_t i = 0; i < this->lost_trajectories.size(); ++i) { + Trajectory < = this->lost_trajectories[i]; + if (timestamp - lt.timestamp > max_lost_time) { + lt.mark_removed(); + removed_trajectories.push_back(<); + } + } + + TrajectoryPoolIterator piter; + for (piter = this->tracked_trajectories.begin(); + piter != this->tracked_trajectories.end();) { + if (piter->state != Tracked) + piter = this->tracked_trajectories.erase(piter); + else + ++piter; + } + + this->tracked_trajectories += activated_trajectories; + this->tracked_trajectories += retrieved_trajectories; + + this->lost_trajectories -= this->tracked_trajectories; + this->lost_trajectories += lost_trajectories; + this->lost_trajectories -= this->removed_trajectories; + this->removed_trajectories += removed_trajectories; + remove_duplicate_trajectory(&this->tracked_trajectories, + &this->lost_trajectories); + + tracks->clear(); + for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) { + if (this->tracked_trajectories[i].is_activated) { + Track track = {this->tracked_trajectories[i].id, + this->tracked_trajectories[i].score, + this->tracked_trajectories[i].ltrb}; + tracks->push_back(track); + } + } + return 0; +} + +cv::Mat JDETracker::motion_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b) { + if (0 == a.size() || 0 == b.size()) + return cv::Mat(a.size(), b.size(), CV_32F); + + cv::Mat edists = embedding_distance(a, b); + cv::Mat mdists = mahalanobis_distance(a, b); + cv::Mat fdists = lambda * edists + (1 - lambda) * mdists; + + const float gate_thresh = chi2inv95[4]; + for (int i = 0; i < fdists.rows; ++i) { + for (int j = 0; j < fdists.cols; ++j) { + if (*mdists.ptr(i, j) > gate_thresh) + *fdists.ptr(i, j) = FLT_MAX; + } + } + + return fdists; +} + +void JDETracker::linear_assignment(const cv::Mat &cost, float cost_limit, + Match *matches, + std::vector *mismatch_row, + std::vector *mismatch_col) { + matches->clear(); + mismatch_row->clear(); + mismatch_col->clear(); + if (cost.empty()) { + for (int i = 0; i < cost.rows; ++i) + mismatch_row->push_back(i); + for (int i = 0; i < cost.cols; ++i) + mismatch_col->push_back(i); + return; + } + + float opt = 0; + cv::Mat x(cost.rows, 1, CV_32S); + cv::Mat y(cost.cols, 1, CV_32S); + + lapjv_internal(cost, true, cost_limit, reinterpret_cast(x.data), + reinterpret_cast(y.data)); + + for (int i = 0; i < x.rows; ++i) { + int j = *x.ptr(i); + if (j >= 0) + matches->insert({i, j}); + else + mismatch_row->push_back(i); + } + + for (int i = 0; i < y.rows; ++i) { + int j = *y.ptr(i); + if (j < 0) + mismatch_col->push_back(i); + } + + return; +} + +void JDETracker::remove_duplicate_trajectory(TrajectoryPool *a, + TrajectoryPool *b, + float iou_thresh) { + if (a->size() == 0 || b->size() == 0) + return; + + cv::Mat dist = iou_distance(*a, *b); + cv::Mat mask = dist < iou_thresh; + std::vector idx; + cv::findNonZero(mask, idx); + + std::vector da; + std::vector db; + for (size_t i = 0; i < idx.size(); ++i) { + int ta = (*a)[idx[i].y].timestamp - (*a)[idx[i].y].starttime; + int tb = (*b)[idx[i].x].timestamp - (*b)[idx[i].x].starttime; + if (ta > tb) + db.push_back(idx[i].x); + else + da.push_back(idx[i].y); + } + + int id = 0; + TrajectoryPoolIterator piter; + for (piter = a->begin(); piter != a->end();) { + std::vector::iterator iter = find(da.begin(), da.end(), id++); + if (iter != da.end()) + piter = a->erase(piter); + else + ++piter; + } + + id = 0; + for (piter = b->begin(); piter != b->end();) { + std::vector::iterator iter = find(db.begin(), db.end(), id++); + if (iter != db.end()) + piter = b->erase(piter); + else + ++piter; + } +} + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h new file mode 100755 index 0000000000..12337e465f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.h +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#pragma once + +#include +#include + +#include "ultrainfer/ultrainfer_model.h" +#include "ultrainfer/vision/tracking/pptracking/trajectory.h" +#include +#include +#include + +namespace ultrainfer { +namespace vision { +namespace tracking { + +typedef std::map Match; +typedef std::map::iterator MatchIterator; + +struct Track { + int id; + float score; + cv::Vec4f ltrb; +}; + +class ULTRAINFER_DECL JDETracker { +public: + JDETracker(); + + virtual bool update(const cv::Mat &dets, const cv::Mat &emb, + std::vector *tracks); + virtual ~JDETracker() {} + +private: + cv::Mat motion_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b); + void linear_assignment(const cv::Mat &cost, float cost_limit, Match *matches, + std::vector *mismatch_row, + std::vector *mismatch_col); + void remove_duplicate_trajectory(TrajectoryPool *a, TrajectoryPool *b, + float iou_thresh = 0.15f); + +private: + int timestamp; + TrajectoryPool tracked_trajectories; + TrajectoryPool lost_trajectories; + TrajectoryPool removed_trajectories; + int max_lost_time; + float lambda; + float det_thresh; + int count = 0; +}; + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc new file mode 100755 index 0000000000..63dcf69ce6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc @@ -0,0 +1,529 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.cpp +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#include "ultrainfer/vision/tracking/pptracking/trajectory.h" +#include + +namespace ultrainfer { +namespace vision { +namespace tracking { + +void TKalmanFilter::init(const cv::Mat &measurement) { + measurement.copyTo(statePost(cv::Rect(0, 0, 1, 4))); + statePost(cv::Rect(0, 4, 1, 4)).setTo(0); + statePost.copyTo(statePre); + + float varpos = 2 * std_weight_position * (*measurement.ptr(3)); + varpos *= varpos; + float varvel = 10 * std_weight_velocity * (*measurement.ptr(3)); + varvel *= varvel; + + errorCovPost.setTo(0); + *errorCovPost.ptr(0, 0) = varpos; + *errorCovPost.ptr(1, 1) = varpos; + *errorCovPost.ptr(2, 2) = 1e-4f; + *errorCovPost.ptr(3, 3) = varpos; + *errorCovPost.ptr(4, 4) = varvel; + *errorCovPost.ptr(5, 5) = varvel; + *errorCovPost.ptr(6, 6) = 1e-10f; + *errorCovPost.ptr(7, 7) = varvel; + errorCovPost.copyTo(errorCovPre); +} + +const cv::Mat &TKalmanFilter::predict() { + float varpos = std_weight_position * (*statePre.ptr(3)); + varpos *= varpos; + float varvel = std_weight_velocity * (*statePre.ptr(3)); + varvel *= varvel; + + processNoiseCov.setTo(0); + *processNoiseCov.ptr(0, 0) = varpos; + *processNoiseCov.ptr(1, 1) = varpos; + *processNoiseCov.ptr(2, 2) = 1e-4f; + *processNoiseCov.ptr(3, 3) = varpos; + *processNoiseCov.ptr(4, 4) = varvel; + *processNoiseCov.ptr(5, 5) = varvel; + *processNoiseCov.ptr(6, 6) = 1e-10f; + *processNoiseCov.ptr(7, 7) = varvel; + + return cv::KalmanFilter::predict(); +} + +const cv::Mat &TKalmanFilter::correct(const cv::Mat &measurement) { + float varpos = std_weight_position * (*measurement.ptr(3)); + varpos *= varpos; + + measurementNoiseCov.setTo(0); + *measurementNoiseCov.ptr(0, 0) = varpos; + *measurementNoiseCov.ptr(1, 1) = varpos; + *measurementNoiseCov.ptr(2, 2) = 1e-2f; + *measurementNoiseCov.ptr(3, 3) = varpos; + + return cv::KalmanFilter::correct(measurement); +} + +void TKalmanFilter::project(cv::Mat *mean, cv::Mat *covariance) const { + float varpos = std_weight_position * (*statePost.ptr(3)); + varpos *= varpos; + + cv::Mat measurementNoiseCov_ = cv::Mat::eye(4, 4, CV_32F); + *measurementNoiseCov_.ptr(0, 0) = varpos; + *measurementNoiseCov_.ptr(1, 1) = varpos; + *measurementNoiseCov_.ptr(2, 2) = 1e-2f; + *measurementNoiseCov_.ptr(3, 3) = varpos; + + *mean = measurementMatrix * statePost; + cv::Mat temp = measurementMatrix * errorCovPost; + gemm(temp, measurementMatrix, 1, measurementNoiseCov_, 1, *covariance, + cv::GEMM_2_T); +} + +const cv::Mat &Trajectory::predict(void) { + if (state != Tracked) + *cv::KalmanFilter::statePost.ptr(7) = 0; + return TKalmanFilter::predict(); +} + +void Trajectory::update(Trajectory *traj, int timestamp_, + bool update_embedding_) { + timestamp = timestamp_; + ++length; + ltrb = traj->ltrb; + xyah = traj->xyah; + TKalmanFilter::correct(cv::Mat(traj->xyah)); + state = Tracked; + is_activated = true; + score = traj->score; + if (update_embedding_) + update_embedding(traj->current_embedding); +} + +void Trajectory::activate(int &cnt, int timestamp_) { + id = next_id(cnt); + TKalmanFilter::init(cv::Mat(xyah)); + length = 0; + state = Tracked; + if (timestamp_ == 1) { + is_activated = true; + } + timestamp = timestamp_; + starttime = timestamp_; +} + +void Trajectory::reactivate(Trajectory *traj, int &cnt, int timestamp_, + bool newid) { + TKalmanFilter::correct(cv::Mat(traj->xyah)); + update_embedding(traj->current_embedding); + length = 0; + state = Tracked; + is_activated = true; + timestamp = timestamp_; + if (newid) + id = next_id(cnt); +} + +void Trajectory::update_embedding(const cv::Mat &embedding) { + current_embedding = embedding / cv::norm(embedding); + if (smooth_embedding.empty()) { + smooth_embedding = current_embedding; + } else { + smooth_embedding = eta * smooth_embedding + (1 - eta) * current_embedding; + } + smooth_embedding = smooth_embedding / cv::norm(smooth_embedding); +} + +TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPool &b) { + TrajectoryPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) + ids[i] = a[i].id; + + for (size_t i = 0; i < b.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i].id); + if (iter == ids.end()) { + sum.push_back(b[i]); + ids.push_back(b[i].id); + } + } + + return sum; +} + +TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPtrPool &b) { + TrajectoryPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) + ids[i] = a[i].id; + + for (size_t i = 0; i < b.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i]->id); + if (iter == ids.end()) { + sum.push_back(*b[i]); + ids.push_back(b[i]->id); + } + } + + return sum; +} + +TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT + const TrajectoryPtrPool &b) { + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) + ids[i] = a[i].id; + + for (size_t i = 0; i < b.size(); ++i) { + if (b[i]->smooth_embedding.empty()) + continue; + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i]->id); + if (iter == ids.end()) { + a.push_back(*b[i]); + ids.push_back(b[i]->id); + } + } + + return a; +} + +TrajectoryPool operator-(const TrajectoryPool &a, const TrajectoryPool &b) { + TrajectoryPool dif; + std::vector ids(b.size()); + for (size_t i = 0; i < b.size(); ++i) + ids[i] = b[i].id; + + for (size_t i = 0; i < a.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), a[i].id); + if (iter == ids.end()) + dif.push_back(a[i]); + } + + return dif; +} + +TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT + const TrajectoryPool &b) { + std::vector ids(b.size()); + for (size_t i = 0; i < b.size(); ++i) + ids[i] = b[i].id; + + TrajectoryPoolIterator piter; + for (piter = a.begin(); piter != a.end();) { + std::vector::iterator iter = find(ids.begin(), ids.end(), piter->id); + if (iter == ids.end()) + ++piter; + else + piter = a.erase(piter); + } + + return a; +} + +TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + TrajectoryPtrPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) + ids[i] = a[i]->id; + + for (size_t i = 0; i < b.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i]->id); + if (iter == ids.end()) { + sum.push_back(b[i]); + ids.push_back(b[i]->id); + } + } + + return sum; +} + +TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, TrajectoryPool *b) { + TrajectoryPtrPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) + ids[i] = a[i]->id; + + for (size_t i = 0; i < b->size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), (*b)[i].id); + if (iter == ids.end()) { + sum.push_back(&(*b)[i]); + ids.push_back((*b)[i].id); + } + } + + return sum; +} + +TrajectoryPtrPool operator-(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + TrajectoryPtrPool dif; + std::vector ids(b.size()); + for (size_t i = 0; i < b.size(); ++i) + ids[i] = b[i]->id; + + for (size_t i = 0; i < a.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), a[i]->id); + if (iter == ids.end()) + dif.push_back(a[i]); + } + + return dif; +} + +cv::Mat embedding_distance(const TrajectoryPool &a, const TrajectoryPool &b) { + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + cv::Mat u = a[i].smooth_embedding; + cv::Mat v = b[j].smooth_embedding; + double uv = u.dot(v); + double uu = u.dot(u); + double vv = v.dot(v); + double dist = std::abs(1. - uv / std::sqrt(uu * vv)); + // double dist = cv::norm(a[i].smooth_embedding, b[j].smooth_embedding, + // cv::NORM_L2); + distsi[j] = static_cast(std::max(std::min(dist, 2.), 0.)); + } + } + return dists; +} + +cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + // double dist = cv::norm(a[i]->smooth_embedding, b[j]->smooth_embedding, + // cv::NORM_L2); + // distsi[j] = static_cast(dist); + cv::Mat u = a[i]->smooth_embedding; + cv::Mat v = b[j]->smooth_embedding; + double uv = u.dot(v); + double uu = u.dot(u); + double vv = v.dot(v); + double dist = std::abs(1. - uv / std::sqrt(uu * vv)); + distsi[j] = static_cast(std::max(std::min(dist, 2.), 0.)); + } + } + + return dists; +} + +cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b) { + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + // double dist = cv::norm(a[i]->smooth_embedding, b[j].smooth_embedding, + // cv::NORM_L2); + // distsi[j] = static_cast(dist); + cv::Mat u = a[i]->smooth_embedding; + cv::Mat v = b[j].smooth_embedding; + double uv = u.dot(v); + double uu = u.dot(u); + double vv = v.dot(v); + double dist = std::abs(1. - uv / std::sqrt(uu * vv)); + distsi[j] = static_cast(std::max(std::min(dist, 2.), 0.)); + } + } + + return dists; +} + +cv::Mat mahalanobis_distance(const TrajectoryPool &a, const TrajectoryPool &b) { + std::vector means(a.size()); + std::vector icovariances(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + cv::Mat covariance; + a[i].project(&means[i], &covariance); + cv::invert(covariance, icovariances[i]); + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Mat x(b[j].xyah); + float dist = + static_cast(cv::Mahalanobis(x, means[i], icovariances[i])); + distsi[j] = dist * dist; + } + } + + return dists; +} + +cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + std::vector means(a.size()); + std::vector icovariances(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + cv::Mat covariance; + a[i]->project(&means[i], &covariance); + cv::invert(covariance, icovariances[i]); + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Mat x(b[j]->xyah); + float dist = + static_cast(cv::Mahalanobis(x, means[i], icovariances[i])); + distsi[j] = dist * dist; + } + } + + return dists; +} + +cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b) { + std::vector means(a.size()); + std::vector icovariances(a.size()); + + for (size_t i = 0; i < a.size(); ++i) { + cv::Mat covariance; + a[i]->project(&means[i], &covariance); + cv::invert(covariance, icovariances[i]); + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Mat x(b[j].xyah); + float dist = + static_cast(cv::Mahalanobis(x, means[i], icovariances[i])); + distsi[j] = dist * dist; + } + } + + return dists; +} + +static inline float calc_inter_area(const cv::Vec4f &a, const cv::Vec4f &b) { + if (a[2] < b[0] || a[0] > b[2] || a[3] < b[1] || a[1] > b[3]) + return 0.f; + + float w = std::min(a[2], b[2]) - std::max(a[0], b[0]); + float h = std::min(a[3], b[3]) - std::max(a[1], b[1]); + return w * h; +} + +cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b) { + std::vector areaa(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + float w = a[i].ltrb[2] - a[i].ltrb[0]; + float h = a[i].ltrb[3] - a[i].ltrb[1]; + areaa[i] = w * h; + } + + std::vector areab(b.size()); + for (size_t j = 0; j < b.size(); ++j) { + float w = b[j].ltrb[2] - b[j].ltrb[0]; + float h = b[j].ltrb[3] - b[j].ltrb[1]; + areab[j] = w * h; + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + const cv::Vec4f &boxa = a[i].ltrb; + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Vec4f &boxb = b[j].ltrb; + float inters = calc_inter_area(boxa, boxb); + distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters); + } + } + + return dists; +} + +cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b) { + std::vector areaa(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + float w = a[i]->ltrb[2] - a[i]->ltrb[0]; + float h = a[i]->ltrb[3] - a[i]->ltrb[1]; + areaa[i] = w * h; + } + + std::vector areab(b.size()); + for (size_t j = 0; j < b.size(); ++j) { + float w = b[j]->ltrb[2] - b[j]->ltrb[0]; + float h = b[j]->ltrb[3] - b[j]->ltrb[1]; + areab[j] = w * h; + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + const cv::Vec4f &boxa = a[i]->ltrb; + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Vec4f &boxb = b[j]->ltrb; + float inters = calc_inter_area(boxa, boxb); + distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters); + } + } + + return dists; +} + +cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b) { + std::vector areaa(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + float w = a[i]->ltrb[2] - a[i]->ltrb[0]; + float h = a[i]->ltrb[3] - a[i]->ltrb[1]; + areaa[i] = w * h; + } + + std::vector areab(b.size()); + for (size_t j = 0; j < b.size(); ++j) { + float w = b[j].ltrb[2] - b[j].ltrb[0]; + float h = b[j].ltrb[3] - b[j].ltrb[1]; + areab[j] = w * h; + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + const cv::Vec4f &boxa = a[i]->ltrb; + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Vec4f &boxb = b[j].ltrb; + float inters = calc_inter_area(boxa, boxb); + distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters); + } + } + + return dists; +} + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h new file mode 100755 index 0000000000..d5df581c5d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h @@ -0,0 +1,213 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.h +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#pragma once + +#include "opencv2/video/tracking.hpp" +#include "ultrainfer/ultrainfer_model.h" +#include +#include +#include +#include + +namespace ultrainfer { +namespace vision { +namespace tracking { + +typedef enum { New = 0, Tracked = 1, Lost = 2, Removed = 3 } TrajectoryState; + +class Trajectory; +typedef std::vector TrajectoryPool; +typedef std::vector::iterator TrajectoryPoolIterator; +typedef std::vector TrajectoryPtrPool; +typedef std::vector::iterator TrajectoryPtrPoolIterator; + +class ULTRAINFER_DECL TKalmanFilter : public cv::KalmanFilter { +public: + TKalmanFilter(void); + virtual ~TKalmanFilter(void) {} + virtual void init(const cv::Mat &measurement); + virtual const cv::Mat &predict(); + virtual const cv::Mat &correct(const cv::Mat &measurement); + virtual void project(cv::Mat *mean, cv::Mat *covariance) const; + +private: + float std_weight_position; + float std_weight_velocity; +}; + +inline TKalmanFilter::TKalmanFilter(void) : cv::KalmanFilter(8, 4) { + cv::KalmanFilter::transitionMatrix = cv::Mat::eye(8, 8, CV_32F); + for (int i = 0; i < 4; ++i) + cv::KalmanFilter::transitionMatrix.at(i, i + 4) = 1; + cv::KalmanFilter::measurementMatrix = cv::Mat::eye(4, 8, CV_32F); + std_weight_position = 1 / 20.f; + std_weight_velocity = 1 / 160.f; +} + +class ULTRAINFER_DECL Trajectory : public TKalmanFilter { +public: + Trajectory(); + Trajectory(const cv::Vec4f <rb, float score, const cv::Mat &embedding); + Trajectory(const Trajectory &other); + Trajectory &operator=(const Trajectory &rhs); + virtual ~Trajectory(void) {} + + int next_id(int &nt); + virtual const cv::Mat &predict(void); + virtual void update(Trajectory *traj, int timestamp, + bool update_embedding = true); + virtual void activate(int &cnt, int timestamp); + virtual void reactivate(Trajectory *traj, int &cnt, int timestamp, + bool newid = false); + virtual void mark_lost(void); + virtual void mark_removed(void); + + friend TrajectoryPool operator+(const TrajectoryPool &a, + const TrajectoryPool &b); + friend TrajectoryPool operator+(const TrajectoryPool &a, + const TrajectoryPtrPool &b); + friend TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT + const TrajectoryPtrPool &b); + friend TrajectoryPool operator-(const TrajectoryPool &a, + const TrajectoryPool &b); + friend TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT + const TrajectoryPool &b); + friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, + TrajectoryPool *b); + friend TrajectoryPtrPool operator-(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + + friend cv::Mat embedding_distance(const TrajectoryPool &a, + const TrajectoryPool &b); + friend cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b); + + friend cv::Mat mahalanobis_distance(const TrajectoryPool &a, + const TrajectoryPool &b); + friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b); + + friend cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b); + friend cv::Mat iou_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend cv::Mat iou_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b); + +private: + void update_embedding(const cv::Mat &embedding); + +public: + TrajectoryState state; + cv::Vec4f ltrb; + cv::Mat smooth_embedding; + int id; + bool is_activated; + int timestamp; + int starttime; + float score; + +private: + // int count=0; + cv::Vec4f xyah; + cv::Mat current_embedding; + float eta; + int length; +}; + +inline cv::Vec4f ltrb2xyah(const cv::Vec4f <rb) { + cv::Vec4f xyah; + xyah[0] = (ltrb[0] + ltrb[2]) * 0.5f; + xyah[1] = (ltrb[1] + ltrb[3]) * 0.5f; + xyah[3] = ltrb[3] - ltrb[1]; + xyah[2] = (ltrb[2] - ltrb[0]) / xyah[3]; + return xyah; +} + +inline Trajectory::Trajectory() + : state(New), ltrb(cv::Vec4f()), smooth_embedding(cv::Mat()), id(0), + is_activated(false), timestamp(0), starttime(0), score(0), eta(0.9), + length(0) {} + +inline Trajectory::Trajectory(const cv::Vec4f <rb_, float score_, + const cv::Mat &embedding) + : state(New), ltrb(ltrb_), smooth_embedding(cv::Mat()), id(0), + is_activated(false), timestamp(0), starttime(0), score(score_), eta(0.9), + length(0) { + xyah = ltrb2xyah(ltrb); + update_embedding(embedding); +} + +inline Trajectory::Trajectory(const Trajectory &other) + : state(other.state), ltrb(other.ltrb), id(other.id), + is_activated(other.is_activated), timestamp(other.timestamp), + starttime(other.starttime), xyah(other.xyah), score(other.score), + eta(other.eta), length(other.length) { + other.smooth_embedding.copyTo(smooth_embedding); + other.current_embedding.copyTo(current_embedding); + // copy state in KalmanFilter + + other.statePre.copyTo(cv::KalmanFilter::statePre); + other.statePost.copyTo(cv::KalmanFilter::statePost); + other.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre); + other.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost); +} + +inline Trajectory &Trajectory::operator=(const Trajectory &rhs) { + this->state = rhs.state; + this->ltrb = rhs.ltrb; + rhs.smooth_embedding.copyTo(this->smooth_embedding); + this->id = rhs.id; + this->is_activated = rhs.is_activated; + this->timestamp = rhs.timestamp; + this->starttime = rhs.starttime; + this->xyah = rhs.xyah; + this->score = rhs.score; + rhs.current_embedding.copyTo(this->current_embedding); + this->eta = rhs.eta; + this->length = rhs.length; + + // copy state in KalmanFilter + + rhs.statePre.copyTo(cv::KalmanFilter::statePre); + rhs.statePost.copyTo(cv::KalmanFilter::statePost); + rhs.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre); + rhs.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost); + + return *this; +} + +inline int Trajectory::next_id(int &cnt) { + ++cnt; + return cnt; +} + +inline void Trajectory::mark_lost(void) { state = Lost; } + +inline void Trajectory::mark_removed(void) { state = Removed; } + +} // namespace tracking +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc b/libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc new file mode 100755 index 0000000000..6d3565c8c8 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindPPTracking(pybind11::module &m); + +void BindTracking(pybind11::module &m) { + auto tracking_module = m.def_submodule("tracking", "object tracking models."); + BindPPTracking(tracking_module); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc b/libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc new file mode 100755 index 0000000000..c0a3f34e56 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +float CosineSimilarity(const std::vector &a, const std::vector &b, + bool normalized) { + FDASSERT((a.size() == b.size()) && (a.size() != 0), + "The size of a and b must be equal and >= 1."); + size_t num_val = a.size(); + if (normalized) { + float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f; + for (size_t i = 0; i < num_val; ++i) { + mul_a += (a[i] * a[i]); + mul_b += (b[i] * b[i]); + mul_ab += (a[i] * b[i]); + } + return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b))); + } + auto norm_a = L2Normalize(a); + auto norm_b = L2Normalize(b); + float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f; + for (size_t i = 0; i < num_val; ++i) { + mul_a += (norm_a[i] * norm_a[i]); + mul_b += (norm_b[i] * norm_b[i]); + mul_ab += (norm_a[i] * norm_b[i]); + } + return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b))); +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc b/libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc new file mode 100755 index 0000000000..8da51f87f4 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +bool CropImageByBox(Mat &src_im, Mat *dst_im, const std::vector &box, + std::vector *center, std::vector *scale, + const float expandratio) { + const cv::Mat *img = src_im.GetOpenCVMat(); + cv::Mat *crop_img = dst_im->GetOpenCVMat(); + int xmin = static_cast(box[0]); + int ymin = static_cast(box[1]); + int xmax = static_cast(box[2]); + int ymax = static_cast(box[3]); + float centerx = (xmin + xmax) / 2.0f; + float centery = (ymin + ymax) / 2.0f; + float half_h = (ymax - ymin) * (1 + expandratio) / 2.0f; + float half_w = (xmax - xmin) * (1 + expandratio) / 2.0f; + // adjust h or w to keep image ratio, expand the shorter edge + if (half_h * 3 > half_w * 4) { + half_w = half_h * 0.75; + } + int crop_xmin = std::max(0, static_cast(centerx - half_w)); + int crop_ymin = std::max(0, static_cast(centery - half_h)); + int crop_xmax = std::min(img->cols - 1, static_cast(centerx + half_w)); + int crop_ymax = std::min(img->rows - 1, static_cast(centery + half_h)); + + crop_img->create(crop_ymax - crop_ymin, crop_xmax - crop_xmin, img->type()); + *crop_img = + (*img)(cv::Range(crop_ymin, crop_ymax), cv::Range(crop_xmin, crop_xmax)); + center->clear(); + center->emplace_back((crop_xmin + crop_xmax) / 2.0f); + center->emplace_back((crop_ymin + crop_ymax) / 2.0f); + + scale->clear(); + scale->emplace_back((crop_xmax - crop_xmin)); + scale->emplace_back((crop_ymax - crop_ymin)); + + dst_im->SetWidth(crop_img->cols); + dst_im->SetHeight(crop_img->rows); + return true; +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h b/libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h new file mode 100755 index 0000000000..17c9bbe04d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#ifndef CUDA_CHECK +#define CUDA_CHECK(callstr) \ + { \ + cudaError_t error_code = callstr; \ + if (error_code != cudaSuccess) { \ + std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":"; \ + std::cerr << __LINE__; \ + assert(0); \ + } \ + } +#endif // CUDA_CHECK + +namespace ultrainfer { +namespace vision { +namespace utils { +void CudaYoloPreprocess(uint8_t *src, int src_width, int src_height, float *dst, + int dst_width, int dst_height, + const std::vector padding_value, + cudaStream_t stream); +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc b/libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc new file mode 100755 index 0000000000..02e853146d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +void DarkParse(const std::vector &heatmap, const std::vector &dim, + std::vector *coords, const int px, const int py, + const int index, const int ch) { + /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate + Representation for Human Pose Estimation (CVPR 2020). + 1) offset = - hassian.inv() * derivative + 2) dx = (heatmap[x+1] - heatmap[x-1])/2. + 3) dxx = (dx[x+1] - dx[x-1])/2. + 4) derivative = Mat([dx, dy]) + 5) hassian = Mat([[dxx, dxy], [dxy, dyy]]) + */ + std::vector::const_iterator first1 = heatmap.begin() + index; + std::vector::const_iterator last1 = + heatmap.begin() + index + dim[2] * dim[3]; + std::vector heatmap_ch(first1, last1); + cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]); + heatmap_mat.convertTo(heatmap_mat, CV_32FC1); + cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0); + heatmap_mat = heatmap_mat.reshape(1, 1); + heatmap_ch = std::vector(heatmap_mat.reshape(1, 1)); + + float epsilon = 1e-10; + // sample heatmap to get values in around target location + float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon)); + float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon)); + float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon)); + + float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon)); + float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon)); + float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon)); + float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon)); + float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon)); + float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon)); + float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon)); + float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon)); + float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon)); + float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon)); + + // compute dx/dy and dxx/dyy with sampled values + float dx = 0.5 * (xr - xl); + float dy = 0.5 * (yu - yd); + float dxx = 0.25 * (xr2 - 2 * xy + xl2); + float dxy = 0.25 * (xryu - xryd - xlyu + xlyd); + float dyy = 0.25 * (yu2 - 2 * xy + yd2); + + // finally get offset by derivative and hassian, which combined by dx/dy and + // dxx/dyy + if (dxx * dyy - dxy * dxy != 0) { + float M[2][2] = {dxx, dxy, dxy, dyy}; + float D[2] = {dx, dy}; + cv::Mat hassian(2, 2, CV_32F, M); + cv::Mat derivative(2, 1, CV_32F, D); + cv::Mat offset = -hassian.inv() * derivative; + (*coords)[ch * 2] += offset.at(0, 0); + (*coords)[ch * 2 + 1] += offset.at(1, 0); + } +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/face_align.cc b/libs/ultrainfer/ultrainfer/vision/utils/face_align.cc new file mode 100755 index 0000000000..90114aeaf6 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/face_align.cc @@ -0,0 +1,152 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// reference: +// https://github.com/deepinsight/insightface/blob/master/recognition/_tools_/cpp_align/face_align.h +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +cv::Mat MeanAxis0(const cv::Mat &src) { + int num = src.rows; + int dim = src.cols; + cv::Mat output(1, dim, CV_32F); + for (int i = 0; i < dim; i++) { + float sum = 0; + for (int j = 0; j < num; j++) { + sum += src.at(j, i); + } + output.at(0, i) = sum / num; + } + return output; +} + +cv::Mat ElementwiseMinus(const cv::Mat &A, const cv::Mat &B) { + cv::Mat output(A.rows, A.cols, A.type()); + assert(B.cols == A.cols); + if (B.cols == A.cols) { + for (int i = 0; i < A.rows; i++) { + for (int j = 0; j < B.cols; j++) { + output.at(i, j) = A.at(i, j) - B.at(0, j); + } + } + } + return output; +} + +cv::Mat VarAxis0(const cv::Mat &src) { + cv::Mat temp_ = ElementwiseMinus(src, MeanAxis0(src)); + cv::multiply(temp_, temp_, temp_); + return MeanAxis0(temp_); +} + +int MatrixRank(cv::Mat M) { + cv::Mat w, u, vt; + cv::SVD::compute(M, w, u, vt); + cv::Mat1b non_zero_singular_values = w > 0.0001; + int rank = countNonZero(non_zero_singular_values); + return rank; +} + +cv::Mat SimilarTransform(cv::Mat &dst, cv::Mat &src) { + int num = dst.rows; + int dim = dst.cols; + cv::Mat src_mean = MeanAxis0(dst); + cv::Mat dst_mean = MeanAxis0(src); + cv::Mat src_demean = ElementwiseMinus(dst, src_mean); + cv::Mat dst_demean = ElementwiseMinus(src, dst_mean); + cv::Mat A = (dst_demean.t() * src_demean) / static_cast(num); + cv::Mat d(dim, 1, CV_32F); + d.setTo(1.0f); + if (cv::determinant(A) < 0) { + d.at(dim - 1, 0) = -1; + } + cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F); + cv::Mat U, S, V; + cv::SVD::compute(A, S, U, V); + int rank = MatrixRank(A); + if (rank == 0) { + assert(rank == 0); + } else if (rank == dim - 1) { + if (cv::determinant(U) * cv::determinant(V) > 0) { + T.rowRange(0, dim).colRange(0, dim) = U * V; + } else { + int s = d.at(dim - 1, 0) = -1; + d.at(dim - 1, 0) = -1; + + T.rowRange(0, dim).colRange(0, dim) = U * V; + cv::Mat diag_ = cv::Mat::diag(d); + cv::Mat twp = diag_ * V; // np.dot(np.diag(d), V.T) + cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1); + cv::Mat C = B.diag(0); + T.rowRange(0, dim).colRange(0, dim) = U * twp; + d.at(dim - 1, 0) = s; + } + } else { + cv::Mat diag_ = cv::Mat::diag(d); + cv::Mat twp = diag_ * V.t(); // np.dot(np.diag(d), V.T) + cv::Mat res = U * twp; // U + T.rowRange(0, dim).colRange(0, dim) = -U.t() * twp; + } + cv::Mat var_ = VarAxis0(src_demean); + float val = cv::sum(var_).val[0]; + cv::Mat res; + cv::multiply(d, S, res); + float scale = 1.0 / val * cv::sum(res).val[0]; + T.rowRange(0, dim).colRange(0, dim) = + -T.rowRange(0, dim).colRange(0, dim).t(); + cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim] + cv::Mat temp2 = src_mean.t(); // src_mean.T + cv::Mat temp3 = temp1 * temp2; // np.dot(T[:dim, :dim], src_mean.T) + cv::Mat temp4 = scale * temp3; + T.rowRange(0, dim).colRange(dim, dim + 1) = -(temp4 - dst_mean.t()); + T.rowRange(0, dim).colRange(0, dim) *= scale; + return T; +} + +std::vector +AlignFaceWithFivePoints(cv::Mat &image, FaceDetectionResult &result, + std::vector> std_landmarks, + std::array output_size) { + FDASSERT(std_landmarks.size() == 5, "The landmarks.size() must be 5.") + FDASSERT(!image.empty(), "The input_image can't be empty.") + std::vector output_images; + output_images.reserve(result.scores.size()); + if (result.boxes.empty()) { + FDWARNING << "The result is empty." << std::endl; + return output_images; + } + + cv::Mat src(5, 2, CV_32FC1, std_landmarks.data()); + for (int i = 0; i < result.landmarks.size(); i += 5) { + cv::Mat dst(5, 2, CV_32FC1, result.landmarks.data() + i); + cv::Mat m = SimilarTransform(dst, src); + cv::Mat map_matrix; + cv::Rect map_matrix_r = cv::Rect(0, 0, 3, 2); + cv::Mat(m, map_matrix_r).copyTo(map_matrix); + cv::Mat cropped_image_aligned; + cv::warpAffine(image, cropped_image_aligned, map_matrix, + {output_size[0], output_size[1]}); + if (cropped_image_aligned.empty()) { + FDWARNING << "croppedImageAligned is empty." << std::endl; + } + output_images.emplace_back(cropped_image_aligned); + } + return output_images; +} +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc b/libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc new file mode 100755 index 0000000000..031e32711d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +std::vector L2Normalize(const std::vector &values) { + size_t num_val = values.size(); + if (num_val == 0) { + return {}; + } + std::vector norm; + float l2_sum_val = 0.f; + for (size_t i = 0; i < num_val; ++i) { + l2_sum_val += (values[i] * values[i]); + } + float l2_sum_sqrt = std::sqrt(l2_sum_val); + norm.resize(num_val); + for (size_t i = 0; i < num_val; ++i) { + norm[i] = values[i] / l2_sum_sqrt; + } + return norm; +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/nms.cc b/libs/ultrainfer/ultrainfer/vision/utils/nms.cc new file mode 100755 index 0000000000..73859b1636 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/nms.cc @@ -0,0 +1,142 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/utils/perf.h" +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +// The implementation refers to +// https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/cpp/src/utils.cc +void NMS(DetectionResult *result, float iou_threshold, + std::vector *index) { + // get sorted score indices + std::vector sorted_indices; + if (index != nullptr) { + std::map> score_map; + for (size_t i = 0; i < result->scores.size(); ++i) { + score_map.insert(std::pair(result->scores[i], i)); + } + for (auto iter : score_map) { + sorted_indices.push_back(iter.second); + } + } + utils::SortDetectionResult(result); + + std::vector area_of_boxes(result->boxes.size()); + std::vector suppressed(result->boxes.size(), 0); + for (size_t i = 0; i < result->boxes.size(); ++i) { + area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) * + (result->boxes[i][3] - result->boxes[i][1]); + } + + for (size_t i = 0; i < result->boxes.size(); ++i) { + if (suppressed[i] == 1) { + continue; + } + for (size_t j = i + 1; j < result->boxes.size(); ++j) { + if (suppressed[j] == 1) { + continue; + } + float xmin = std::max(result->boxes[i][0], result->boxes[j][0]); + float ymin = std::max(result->boxes[i][1], result->boxes[j][1]); + float xmax = std::min(result->boxes[i][2], result->boxes[j][2]); + float ymax = std::min(result->boxes[i][3], result->boxes[j][3]); + float overlap_w = std::max(0.0f, xmax - xmin); + float overlap_h = std::max(0.0f, ymax - ymin); + float overlap_area = overlap_w * overlap_h; + float overlap_ratio = + overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area); + if (overlap_ratio > iou_threshold) { + suppressed[j] = 1; + } + } + } + DetectionResult backup(*result); + result->Clear(); + result->Reserve(suppressed.size()); + for (size_t i = 0; i < suppressed.size(); ++i) { + if (suppressed[i] == 1) { + continue; + } + result->boxes.emplace_back(backup.boxes[i]); + result->scores.push_back(backup.scores[i]); + result->label_ids.push_back(backup.label_ids[i]); + if (index != nullptr) { + index->push_back(sorted_indices[i]); + } + } +} + +void NMS(FaceDetectionResult *result, float iou_threshold) { + utils::SortDetectionResult(result); + + std::vector area_of_boxes(result->boxes.size()); + std::vector suppressed(result->boxes.size(), 0); + for (size_t i = 0; i < result->boxes.size(); ++i) { + area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) * + (result->boxes[i][3] - result->boxes[i][1]); + } + + for (size_t i = 0; i < result->boxes.size(); ++i) { + if (suppressed[i] == 1) { + continue; + } + for (size_t j = i + 1; j < result->boxes.size(); ++j) { + if (suppressed[j] == 1) { + continue; + } + float xmin = std::max(result->boxes[i][0], result->boxes[j][0]); + float ymin = std::max(result->boxes[i][1], result->boxes[j][1]); + float xmax = std::min(result->boxes[i][2], result->boxes[j][2]); + float ymax = std::min(result->boxes[i][3], result->boxes[j][3]); + float overlap_w = std::max(0.0f, xmax - xmin); + float overlap_h = std::max(0.0f, ymax - ymin); + float overlap_area = overlap_w * overlap_h; + float overlap_ratio = + overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area); + if (overlap_ratio > iou_threshold) { + suppressed[j] = 1; + } + } + } + FaceDetectionResult backup(*result); + int landmarks_per_face = result->landmarks_per_face; + + result->Clear(); + // don't forget to reset the landmarks_per_face + // before apply Reserve method. + result->landmarks_per_face = landmarks_per_face; + result->Reserve(suppressed.size()); + for (size_t i = 0; i < suppressed.size(); ++i) { + if (suppressed[i] == 1) { + continue; + } + result->boxes.emplace_back(backup.boxes[i]); + result->scores.push_back(backup.scores[i]); + // landmarks (if have) + if (result->landmarks_per_face > 0) { + for (size_t j = 0; j < result->landmarks_per_face; ++j) { + result->landmarks.emplace_back( + backup.landmarks[i * result->landmarks_per_face + j]); + } + } + } +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc b/libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc new file mode 100755 index 0000000000..d55e98e7ad --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc @@ -0,0 +1,187 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +void Merge(DetectionResult *result, size_t low, size_t mid, size_t high) { + std::vector> &boxes = result->boxes; + std::vector &scores = result->scores; + std::vector &label_ids = result->label_ids; + std::vector> temp_boxes(boxes); + std::vector temp_scores(scores); + std::vector temp_label_ids(label_ids); + size_t i = low; + size_t j = mid + 1; + size_t k = i; + // TODO(qiuyanjun): add masks process + for (; i <= mid && j <= high; k++) { + if (temp_scores[i] >= temp_scores[j]) { + scores[k] = temp_scores[i]; + label_ids[k] = temp_label_ids[i]; + boxes[k] = temp_boxes[i]; + i++; + } else { + scores[k] = temp_scores[j]; + label_ids[k] = temp_label_ids[j]; + boxes[k] = temp_boxes[j]; + j++; + } + } + while (i <= mid) { + scores[k] = temp_scores[i]; + label_ids[k] = temp_label_ids[i]; + boxes[k] = temp_boxes[i]; + k++; + i++; + } + while (j <= high) { + scores[k] = temp_scores[j]; + label_ids[k] = temp_label_ids[j]; + boxes[k] = temp_boxes[j]; + k++; + j++; + } +} + +void MergeSort(DetectionResult *result, size_t low, size_t high) { + if (low < high) { + size_t mid = (high - low) / 2 + low; + MergeSort(result, low, mid); + MergeSort(result, mid + 1, high); + Merge(result, low, mid, high); + } +} + +void SortDetectionResult(DetectionResult *result) { + size_t low = 0; + size_t high = result->scores.size(); + if (high == 0) { + return; + } + high = high - 1; + MergeSort(result, low, high); +} + +template +bool LexSortByXYCompare(const std::array &box_a, + const std::array &box_b) { + // WARN: The status shoule be false if (a==b). + // https://blog.csdn.net/xxxwrq/article/details/83080640 + auto is_equal = [](const T &a, const T &b) -> bool { + return std::abs(a - b) < 1e-6f; + }; + const T &x0_a = box_a[0]; + const T &y0_a = box_a[1]; + const T &x0_b = box_b[0]; + const T &y0_b = box_b[1]; + if (is_equal(x0_a, x0_b)) { + return is_equal(y0_a, y0_b) ? false : y0_a > y0_b; + } + return x0_a > x0_b; +} + +// Only for int dtype +template <> +bool LexSortByXYCompare(const std::array &box_a, + const std::array &box_b) { + const int &x0_a = box_a[0]; + const int &y0_a = box_a[1]; + const int &x0_b = box_b[0]; + const int &y0_b = box_b[1]; + if (x0_a == x0_b) { + return y0_a == y0_b ? false : y0_a > y0_b; + } + return x0_a > x0_b; +} + +void ReorderDetectionResultByIndices(DetectionResult *result, + const std::vector &indices) { + // reorder boxes, scores, label_ids, masks + DetectionResult backup = (*result); + const bool contain_masks = backup.contain_masks; + const int boxes_num = backup.boxes.size(); + result->Clear(); + result->Resize(boxes_num); + // boxes, scores, labels_ids + for (int i = 0; i < boxes_num; ++i) { + result->boxes[i] = backup.boxes[indices[i]]; + result->scores[i] = backup.scores[indices[i]]; + result->label_ids[i] = backup.label_ids[indices[i]]; + } + if (contain_masks) { + result->contain_masks = true; + for (int i = 0; i < boxes_num; ++i) { + const auto &shape = backup.masks[indices[i]].shape; + const int mask_numel = shape[0] * shape[1]; + result->masks[i].shape = shape; + result->masks[i].Resize(mask_numel); + std::memcpy(result->masks[i].Data(), backup.masks[indices[i]].Data(), + mask_numel * sizeof(uint8_t)); + } + } +} + +void LexSortDetectionResultByXY(DetectionResult *result) { + if (result->boxes.empty()) { + return; + } + std::vector indices; + indices.resize(result->boxes.size()); + for (size_t i = 0; i < result->boxes.size(); ++i) { + indices[i] = i; + } + // lex sort by x(w) then y(h) + auto &boxes = result->boxes; + std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) { + return LexSortByXYCompare(boxes[a], boxes[b]); + }); + ReorderDetectionResultByIndices(result, indices); +} + +void LexSortOCRDetResultByXY(std::vector> *result) { + if (result->empty()) { + return; + } + std::vector indices; + indices.resize(result->size()); + std::vector> boxes; + boxes.resize(result->size()); + for (size_t i = 0; i < result->size(); ++i) { + indices[i] = i; + // 4 points to 2 points for LexSort + boxes[i] = {(*result)[i][0], (*result)[i][1], (*result)[i][6], + (*result)[i][7]}; + } + // lex sort by x(w) then y(h) + std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) { + return LexSortByXYCompare(boxes[a], boxes[b]); + }); + // reorder boxes + std::vector> backup = (*result); + const int boxes_num = backup.size(); + result->clear(); + result->resize(boxes_num); + // boxes + for (int i = 0; i < boxes_num; ++i) { + (*result)[i] = backup[indices[i]]; + } +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc b/libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc new file mode 100755 index 0000000000..31798a102c --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/utils/utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +void SortDetectionResult(FaceDetectionResult *result) { + // sort face detection results with landmarks or not. + if (result->boxes.size() == 0) { + return; + } + int landmarks_per_face = result->landmarks_per_face; + if (landmarks_per_face > 0) { + FDASSERT( + (result->landmarks.size() == result->boxes.size() * landmarks_per_face), + "The size of landmarks != boxes.size * landmarks_per_face."); + } + + // argsort for scores. + std::vector indices; + indices.resize(result->boxes.size()); + for (size_t i = 0; i < result->boxes.size(); ++i) { + indices[i] = i; + } + std::vector &scores = result->scores; + std::sort(indices.begin(), indices.end(), + [&scores](size_t a, size_t b) { return scores[a] > scores[b]; }); + + // reorder boxes, scores, landmarks (if have). + FaceDetectionResult backup(*result); + result->Clear(); + // don't forget to reset the landmarks_per_face + // before apply Reserve method. + result->landmarks_per_face = landmarks_per_face; + result->Reserve(indices.size()); + if (landmarks_per_face > 0) { + for (size_t i = 0; i < indices.size(); ++i) { + result->boxes.emplace_back(backup.boxes[indices[i]]); + result->scores.push_back(backup.scores[indices[i]]); + for (size_t j = 0; j < landmarks_per_face; ++j) { + result->landmarks.emplace_back( + backup.landmarks[indices[i] * landmarks_per_face + j]); + } + } + } else { + for (size_t i = 0; i < indices.size(); ++i) { + result->boxes.emplace_back(backup.boxes[indices[i]]); + result->scores.push_back(backup.scores[indices[i]]); + } + } +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/utils.h b/libs/ultrainfer/ultrainfer/vision/utils/utils.h new file mode 100755 index 0000000000..0c9342751d --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/utils.h @@ -0,0 +1,124 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "ultrainfer/core/fd_tensor.h" +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/common/result.h" + +// #include "unsupported/Eigen/CXX11/Tensor" +#include "ultrainfer/function/reduce.h" +#include "ultrainfer/function/softmax.h" +#include "ultrainfer/function/transpose.h" +#include "ultrainfer/vision/common/processors/mat.h" + +namespace ultrainfer { +namespace vision { +namespace utils { +// topk sometimes is a very small value +// so this implementation is simple but I don't think it will +// cost too much time +// Also there may be cause problem since we suppose the minimum value is +// -99999999 +// Do not use this function on array which topk contains value less than +// -99999999 +template +std::vector TopKIndices(const T *array, int array_size, int topk) { + topk = std::min(array_size, topk); + std::vector res(topk); + std::set searched; + for (int32_t i = 0; i < topk; ++i) { + T min = static_cast(-99999999); + for (int32_t j = 0; j < array_size; ++j) { + if (searched.find(j) != searched.end()) { + continue; + } + if (*(array + j) > min) { + res[i] = j; + min = *(array + j); + } + } + searched.insert(res[i]); + } + return res; +} + +void NMS(DetectionResult *output, float iou_threshold = 0.5, + std::vector *index = nullptr); + +void NMS(FaceDetectionResult *result, float iou_threshold = 0.5); + +/// Sort DetectionResult/FaceDetectionResult by score +ULTRAINFER_DECL void SortDetectionResult(DetectionResult *result); +ULTRAINFER_DECL void SortDetectionResult(FaceDetectionResult *result); +/// Lex Sort DetectionResult by x(w) & y(h) axis +ULTRAINFER_DECL void LexSortDetectionResultByXY(DetectionResult *result); +/// Lex Sort OCRDet Result by x(w) & y(h) axis +ULTRAINFER_DECL void +LexSortOCRDetResultByXY(std::vector> *result); + +/// L2 Norm / cosine similarity (for face recognition, ...) +ULTRAINFER_DECL std::vector +L2Normalize(const std::vector &values); + +ULTRAINFER_DECL float CosineSimilarity(const std::vector &a, + const std::vector &b, + bool normalized = true); + +/** \brief Do face align for model with five points. + * + * \param[in] image The original image + * \param[in] result FaceDetectionResult + * \param[in] std_landmarks Standard face template + * \param[in] output_size The size of output mat + */ +ULTRAINFER_DECL std::vector AlignFaceWithFivePoints( + cv::Mat &image, FaceDetectionResult &result, + std::vector> std_landmarks = {{38.2946f, 51.6963f}, + {73.5318f, 51.5014f}, + {56.0252f, 71.7366f}, + {41.5493f, 92.3655f}, + {70.7299f, 92.2041f}}, + std::array output_size = {112, 112}); + +bool CropImageByBox(Mat &src_im, Mat *dst_im, const std::vector &box, + std::vector *center, std::vector *scale, + const float expandratio = 0.3); + +/** + * Function: for keypoint detection model, fine positioning of keypoints in + * postprocess + * Parameters: + * heatmap: model inference results for keypoint detection models + * dim: shape information of the inference result + * coords: coordinates after refined positioning + * px: px = int(coords[ch * 2] + 0.5) , refer to API + * detection::GetFinalPredictions py: px = int(coords[ch * 2 + 1] + 0.5), refer + * to API detection::GetFinalPredictions index: index information of heatmap + * pixels ch: channel Paper reference: DARK postpocessing, Zhang et al. + * Distribution-Aware Coordinate Representation for Human Pose Estimation (CVPR + * 2020). + */ +void DarkParse(const std::vector &heatmap, const std::vector &dim, + std::vector *coords, const int px, const int py, + const int index, const int ch); + +} // namespace utils +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu b/libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu new file mode 100755 index 0000000000..0ae81a63bd --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu @@ -0,0 +1,153 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Part of the following code in this file refs to +// https://github.com/wang-xinyu/tensorrtx/blob/yolov5-v6.0/yolov5/preprocess.cu +// +// Copyright (c) 2022 tensorrtx +// Licensed under The MIT License +// \file preprocess.cu +// \brief +// \author Qi Liu, Xinyu Wang + +#ifdef WITH_GPU +#include + +#include "ultrainfer/vision/utils/cuda_utils.h" + +namespace ultrainfer { +namespace vision { +namespace utils { + +struct AffineMatrix { + float value[6]; +}; + +__global__ void +YoloPreprocessCudaKernel(uint8_t *src, int src_line_size, int src_width, + int src_height, float *dst, int dst_width, + int dst_height, uint8_t padding_color_b, + uint8_t padding_color_g, uint8_t padding_color_r, + AffineMatrix d2s, int edge) { + int position = blockDim.x * blockIdx.x + threadIdx.x; + if (position >= edge) + return; + + float m_x1 = d2s.value[0]; + float m_y1 = d2s.value[1]; + float m_z1 = d2s.value[2]; + float m_x2 = d2s.value[3]; + float m_y2 = d2s.value[4]; + float m_z2 = d2s.value[5]; + + int dx = position % dst_width; + int dy = position / dst_width; + float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f; + float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f; + float c0, c1, c2; + + if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) { + // out of range + c0 = padding_color_b; + c1 = padding_color_g; + c2 = padding_color_r; + } else { + int y_low = floorf(src_y); + int x_low = floorf(src_x); + int y_high = y_low + 1; + int x_high = x_low + 1; + + uint8_t const_value[] = {padding_color_b, padding_color_g, padding_color_r}; + float ly = src_y - y_low; + float lx = src_x - x_low; + float hy = 1 - ly; + float hx = 1 - lx; + float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + uint8_t *v1 = const_value; + uint8_t *v2 = const_value; + uint8_t *v3 = const_value; + uint8_t *v4 = const_value; + + if (y_low >= 0) { + if (x_low >= 0) + v1 = src + y_low * src_line_size + x_low * 3; + if (x_high < src_width) + v2 = src + y_low * src_line_size + x_high * 3; + } + + if (y_high < src_height) { + if (x_low >= 0) + v3 = src + y_high * src_line_size + x_low * 3; + if (x_high < src_width) + v4 = src + y_high * src_line_size + x_high * 3; + } + + c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0]; + c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1]; + c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2]; + } + + // bgr to rgb + float t = c2; + c2 = c0; + c0 = t; + + // normalization + c0 = c0 / 255.0f; + c1 = c1 / 255.0f; + c2 = c2 / 255.0f; + + // rgbrgbrgb to rrrgggbbb + int area = dst_width * dst_height; + float *pdst_c0 = dst + dy * dst_width + dx; + float *pdst_c1 = pdst_c0 + area; + float *pdst_c2 = pdst_c1 + area; + *pdst_c0 = c0; + *pdst_c1 = c1; + *pdst_c2 = c2; +} + +void CudaYoloPreprocess(uint8_t *src, int src_width, int src_height, float *dst, + int dst_width, int dst_height, + const std::vector padding_value, + cudaStream_t stream) { + AffineMatrix s2d, d2s; + float scale = + std::min(dst_height / (float)src_height, dst_width / (float)src_width); + + s2d.value[0] = scale; + s2d.value[1] = 0; + s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5; + s2d.value[3] = 0; + s2d.value[4] = scale; + s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5; + + cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value); + cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value); + cv::invertAffineTransform(m2x3_s2d, m2x3_d2s); + + memcpy(d2s.value, m2x3_d2s.ptr(0), sizeof(d2s.value)); + + int jobs = dst_height * dst_width; + int threads = 256; + int blocks = ceil(jobs / (float)threads); + YoloPreprocessCudaKernel<<>>( + src, src_width * 3, src_width, src_height, dst, dst_width, dst_height, + padding_value[0], padding_value[1], padding_value[2], d2s, jobs); +} + +} // namespace utils +} // namespace vision +} // namespace ultrainfer +#endif diff --git a/libs/ultrainfer/ultrainfer/vision/vision_pybind.cc b/libs/ultrainfer/ultrainfer/vision/vision_pybind.cc new file mode 100755 index 0000000000..ad1a23a729 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/vision_pybind.cc @@ -0,0 +1,292 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { + +void BindFDMat(pybind11::module &m); +void BindFDMatBatch(pybind11::module &m); +void BindProcessors(pybind11::module &m); +void BindDetection(pybind11::module &m); +void BindClassification(pybind11::module &m); +void BindSegmentation(pybind11::module &m); +void BindMatting(pybind11::module &m); +void BindFaceDet(pybind11::module &m); +void BindFaceAlign(pybind11::module &m); +void BindFaceId(pybind11::module &m); +void BindOcr(pybind11::module &m); +void BindTracking(pybind11::module &m); +void BindKeyPointDetection(pybind11::module &m); +void BindHeadPose(pybind11::module &m); +void BindSR(pybind11::module &m); +void BindGeneration(pybind11::module &m); +void BindVisualize(pybind11::module &m); +void BindPerception(pybind11::module &m); + +void BindVision(pybind11::module &m) { + pybind11::class_(m, "Mask") + .def(pybind11::init()) + .def_readwrite("data", &vision::Mask::data) + .def_readwrite("shape", &vision::Mask::shape) + .def(pybind11::pickle( + [](const vision::Mask &m) { + return pybind11::make_tuple(m.data, m.shape); + }, + [](pybind11::tuple t) { + if (t.size() != 2) + throw std::runtime_error( + "vision::Mask pickle with invalid state!"); + + vision::Mask m; + m.data = t[0].cast>(); + m.shape = t[1].cast>(); + + return m; + })) + .def("__repr__", &vision::Mask::Str) + .def("__str__", &vision::Mask::Str); + + pybind11::class_(m, "ClassifyResult") + .def(pybind11::init()) + .def_readwrite("label_ids", &vision::ClassifyResult::label_ids) + .def_readwrite("scores", &vision::ClassifyResult::scores) + .def_readwrite("feature", &vision::ClassifyResult::feature) + .def(pybind11::pickle( + [](const vision::ClassifyResult &c) { + if (c.feature.empty()) { + return pybind11::make_tuple(c.label_ids, c.scores); + } + return pybind11::make_tuple(c.label_ids, c.scores, c.feature); + }, + [](pybind11::tuple t) { + if ((t.size() != 2) && (t.size() != 3)) { + throw std::runtime_error( + "vision::ClassifyResult pickle with invalid state!"); + } + + vision::ClassifyResult c; + c.label_ids = t[0].cast>(); + c.scores = t[1].cast>(); + if (t.size() == 3) { + c.feature = t[2].cast>(); + } + + return c; + })) + .def("__repr__", &vision::ClassifyResult::Str) + .def("__str__", &vision::ClassifyResult::Str); + + pybind11::class_(m, "DetectionResult") + .def(pybind11::init()) + .def_readwrite("boxes", &vision::DetectionResult::boxes) + .def_readwrite("scores", &vision::DetectionResult::scores) + .def_readwrite("rotated_boxes", &vision::DetectionResult::rotated_boxes) + .def_readwrite("label_ids", &vision::DetectionResult::label_ids) + .def_readwrite("masks", &vision::DetectionResult::masks) + .def_readwrite("contain_masks", &vision::DetectionResult::contain_masks) + .def(pybind11::pickle( + [](const vision::DetectionResult &d) { + return pybind11::make_tuple(d.boxes, d.scores, d.rotated_boxes, + d.label_ids, d.masks, d.contain_masks); + }, + [](pybind11::tuple t) { + if (t.size() != 5) + throw std::runtime_error( + "vision::DetectionResult pickle with Invalid state!"); + + vision::DetectionResult d; + d.boxes = t[0].cast>>(); + d.rotated_boxes = t[0].cast>>(); + d.scores = t[1].cast>(); + d.label_ids = t[2].cast>(); + d.masks = t[3].cast>(); + d.contain_masks = t[4].cast(); + + return d; + })) + .def("__repr__", &vision::DetectionResult::Str) + .def("__str__", &vision::DetectionResult::Str); + + pybind11::class_(m, "PerceptionResult") + .def(pybind11::init()) + .def_readwrite("valid", &vision::PerceptionResult::valid) + .def_readwrite("scores", &vision::PerceptionResult::scores) + .def_readwrite("label_ids", &vision::PerceptionResult::label_ids) + .def_readwrite("boxes", &vision::PerceptionResult::boxes) + .def_readwrite("center", &vision::PerceptionResult::center) + .def_readwrite("observation_angle", + &vision::PerceptionResult::observation_angle) + .def_readwrite("yaw_angle", &vision::PerceptionResult::yaw_angle) + .def_readwrite("velocity", &vision::PerceptionResult::velocity) + .def(pybind11::pickle( + [](const vision::PerceptionResult &d) { + return pybind11::make_tuple(d.scores, d.label_ids, d.boxes, + d.center, d.observation_angle, + d.yaw_angle, d.velocity); + }, + [](pybind11::tuple t) { + if (t.size() != 7) + throw std::runtime_error( + "vision::PerceptionResult pickle with Invalid state!"); + + vision::PerceptionResult d; + d.scores = t[0].cast>(); + d.label_ids = t[1].cast>(); + d.boxes = t[2].cast>>(); + d.center = t[3].cast>>(); + d.observation_angle = t[4].cast>(); + d.yaw_angle = t[5].cast>(); + d.velocity = t[6].cast>>(); + return d; + })) + .def("__repr__", &vision::PerceptionResult::Str) + .def("__str__", &vision::PerceptionResult::Str); + + pybind11::class_(m, "OCRResult") + .def(pybind11::init()) + .def_readwrite("boxes", &vision::OCRResult::boxes) + .def_readwrite("text", &vision::OCRResult::text) + .def_readwrite("rec_scores", &vision::OCRResult::rec_scores) + .def_readwrite("cls_scores", &vision::OCRResult::cls_scores) + .def_readwrite("cls_labels", &vision::OCRResult::cls_labels) + .def_readwrite("table_boxes", &vision::OCRResult::table_boxes) + .def_readwrite("table_structure", &vision::OCRResult::table_structure) + .def_readwrite("table_html", &vision::OCRResult::table_html) + .def("__repr__", &vision::OCRResult::Str) + .def("__str__", &vision::OCRResult::Str); + + pybind11::class_(m, "OCRCURVEResult") + .def(pybind11::init()) + .def_readwrite("boxes", &vision::OCRCURVEResult::boxes) + .def_readwrite("text", &vision::OCRCURVEResult::text) + .def_readwrite("rec_scores", &vision::OCRCURVEResult::rec_scores) + .def_readwrite("cls_scores", &vision::OCRCURVEResult::cls_scores) + .def_readwrite("cls_labels", &vision::OCRCURVEResult::cls_labels) + .def_readwrite("table_boxes", &vision::OCRCURVEResult::table_boxes) + .def_readwrite("table_structure", + &vision::OCRCURVEResult::table_structure) + .def_readwrite("table_html", &vision::OCRCURVEResult::table_html) + .def("__repr__", &vision::OCRCURVEResult::Str) + .def("__str__", &vision::OCRCURVEResult::Str); + + pybind11::class_(m, "MOTResult") + .def(pybind11::init()) + .def_readwrite("boxes", &vision::MOTResult::boxes) + .def_readwrite("ids", &vision::MOTResult::ids) + .def_readwrite("scores", &vision::MOTResult::scores) + .def_readwrite("class_ids", &vision::MOTResult::class_ids) + .def("__repr__", &vision::MOTResult::Str) + .def("__str__", &vision::MOTResult::Str); + + pybind11::class_(m, "FaceDetectionResult") + .def(pybind11::init()) + .def_readwrite("boxes", &vision::FaceDetectionResult::boxes) + .def_readwrite("scores", &vision::FaceDetectionResult::scores) + .def_readwrite("landmarks", &vision::FaceDetectionResult::landmarks) + .def_readwrite("landmarks_per_face", + &vision::FaceDetectionResult::landmarks_per_face) + .def("__repr__", &vision::FaceDetectionResult::Str) + .def("__str__", &vision::FaceDetectionResult::Str); + + pybind11::class_(m, "FaceAlignmentResult") + .def(pybind11::init()) + .def_readwrite("landmarks", &vision::FaceAlignmentResult::landmarks) + .def("__repr__", &vision::FaceAlignmentResult::Str) + .def("__str__", &vision::FaceAlignmentResult::Str); + + pybind11::class_(m, "FaceRecognitionResult") + .def(pybind11::init()) + .def_readwrite("embedding", &vision::FaceRecognitionResult::embedding) + .def("__repr__", &vision::FaceRecognitionResult::Str) + .def("__str__", &vision::FaceRecognitionResult::Str); + + pybind11::class_(m, "SegmentationResult") + .def(pybind11::init()) + .def_readwrite("label_map", &vision::SegmentationResult::label_map) + .def_readwrite("score_map", &vision::SegmentationResult::score_map) + .def_readwrite("shape", &vision::SegmentationResult::shape) + .def_readwrite("contain_score_map", + &vision::SegmentationResult::contain_score_map) + .def(pybind11::pickle( + [](const vision::SegmentationResult &s) { + return pybind11::make_tuple(s.label_map, s.score_map, s.shape, + s.contain_score_map); + }, + [](pybind11::tuple t) { + if (t.size() != 4) + throw std::runtime_error( + "vision::SegmentationResult pickle with Invalid state!"); + + vision::SegmentationResult s; + s.label_map = t[0].cast>(); + s.score_map = t[1].cast>(); + s.shape = t[2].cast>(); + s.contain_score_map = t[3].cast(); + + return s; + })) + .def("__repr__", &vision::SegmentationResult::Str) + .def("__str__", &vision::SegmentationResult::Str); + + pybind11::class_(m, "MattingResult") + .def(pybind11::init()) + .def_readwrite("alpha", &vision::MattingResult::alpha) + .def_readwrite("foreground", &vision::MattingResult::foreground) + .def_readwrite("shape", &vision::MattingResult::shape) + .def_readwrite("contain_foreground", + &vision::MattingResult::contain_foreground) + .def("__repr__", &vision::MattingResult::Str) + .def("__str__", &vision::MattingResult::Str); + + pybind11::class_(m, + "KeyPointDetectionResult") + .def(pybind11::init()) + .def_readwrite("keypoints", &vision::KeyPointDetectionResult::keypoints) + .def_readwrite("scores", &vision::KeyPointDetectionResult::scores) + .def_readwrite("num_joints", &vision::KeyPointDetectionResult::num_joints) + .def("__repr__", &vision::KeyPointDetectionResult::Str) + .def("__str__", &vision::KeyPointDetectionResult::Str); + + pybind11::class_(m, "HeadPoseResult") + .def(pybind11::init()) + .def_readwrite("euler_angles", &vision::HeadPoseResult::euler_angles) + .def("__repr__", &vision::HeadPoseResult::Str) + .def("__str__", &vision::HeadPoseResult::Str); + + m.def("enable_flycv", &vision::EnableFlyCV, + "Enable image preprocessing by FlyCV."); + m.def("disable_flycv", &vision::DisableFlyCV, + "Disable image preprocessing by FlyCV, change to use OpenCV."); + + BindFDMat(m); + BindFDMatBatch(m); + BindProcessors(m); + BindDetection(m); + BindClassification(m); + BindSegmentation(m); + BindFaceDet(m); + BindFaceAlign(m); + BindFaceId(m); + BindMatting(m); + BindOcr(m); + BindTracking(m); + BindKeyPointDetection(m); + BindHeadPose(m); + BindSR(m); + BindGeneration(m); + BindVisualize(m); + BindPerception(m); +} +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/classification.cc b/libs/ultrainfer/ultrainfer/vision/visualize/classification.cc new file mode 100755 index 0000000000..10502ce345 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/classification.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat VisClassification(const cv::Mat &im, const ClassifyResult &result, + int top_k, float score_threshold, float font_size) { + int h = im.rows; + int w = im.cols; + auto vis_im = im.clone(); + int h_sep = h / 30; + int w_sep = w / 10; + if (top_k > result.scores.size()) { + top_k = result.scores.size(); + } + for (int i = 0; i < top_k; ++i) { + if (result.scores[i] < score_threshold) { + continue; + } + std::string id = std::to_string(result.label_ids[i]); + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + "," + score; + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Point origin; + origin.x = w_sep; + origin.y = h_sep * (i + 1); + cv::putText(vis_im, text, origin, font, font_size, + cv::Scalar(255, 255, 255), 1); + } + return vis_im; +} + +// Visualize ClassifyResult with custom labels. +cv::Mat VisClassification(const cv::Mat &im, const ClassifyResult &result, + const std::vector &labels, int top_k, + float score_threshold, float font_size) { + int h = im.rows; + int w = im.cols; + auto vis_im = im.clone(); + int h_sep = h / 30; + int w_sep = w / 10; + if (top_k > result.scores.size()) { + top_k = result.scores.size(); + } + for (int i = 0; i < top_k; ++i) { + if (result.scores[i] < score_threshold) { + continue; + } + std::string id = std::to_string(result.label_ids[i]); + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + "," + score; + if (labels.size() > result.label_ids[i]) { + text = labels[result.label_ids[i]] + "," + text; + } else { + FDWARNING << "The label_id: " << result.label_ids[i] + << " in DetectionResult should be less than length of labels:" + << labels.size() << "." << std::endl; + } + if (text.size() > 16) { + text = text.substr(0, 16); + } + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Point origin; + origin.x = w_sep; + origin.y = h_sep * (i + 1); + cv::putText(vis_im, text, origin, font, font_size, + cv::Scalar(255, 255, 255), 1); + } + return vis_im; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/detection.cc b/libs/ultrainfer/ultrainfer/vision/visualize/detection.cc new file mode 100755 index 0000000000..2cb71a46e9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/detection.cc @@ -0,0 +1,374 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat VisDetection(const cv::Mat &im, const DetectionResult &result, + float score_threshold, int line_size, float font_size) { + if (result.boxes.empty() && result.rotated_boxes.empty()) { + return im; + } + if (result.contain_masks) { + FDASSERT(result.boxes.size() == result.masks.size(), + "The size of masks must be equal to the size of boxes, but now " + "%zu != %zu.", + result.boxes.size(), result.masks.size()); + } + int max_label_id = + *std::max_element(result.label_ids.begin(), result.label_ids.end()); + std::vector color_map = GenerateColorMap(max_label_id); + + int h = im.rows; + int w = im.cols; + auto vis_im = im.clone(); + for (size_t i = 0; i < result.rotated_boxes.size(); ++i) { + if (result.scores[i] < score_threshold) { + continue; + } + + int c0 = color_map[3 * result.label_ids[i] + 0]; + int c1 = color_map[3 * result.label_ids[i] + 1]; + int c2 = color_map[3 * result.label_ids[i] + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string id = std::to_string(result.label_ids[i]); + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + ", " + score; + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); + + for (int j = 0; j < 4; j++) { + auto start = cv::Point( + static_cast(round(result.rotated_boxes[i][2 * j])), + static_cast(round(result.rotated_boxes[i][2 * j + 1]))); + + cv::Point end; + if (j != 3) { + end = cv::Point( + static_cast(round(result.rotated_boxes[i][2 * (j + 1)])), + static_cast(round(result.rotated_boxes[i][2 * (j + 1) + 1]))); + } else { + end = cv::Point(static_cast(round(result.rotated_boxes[i][0])), + static_cast(round(result.rotated_boxes[i][1]))); + cv::putText(vis_im, text, end, font, font_size, + cv::Scalar(255, 255, 255), 1); + } + cv::line(vis_im, start, end, cv::Scalar(255, 255, 255), 3, cv::LINE_AA, + 0); + } + } + + for (size_t box_i = 0; box_i < result.boxes.size(); ++box_i) { + if (result.scores[box_i] < score_threshold) { + continue; + } + int x1 = static_cast(round(result.boxes[box_i][0])); + int y1 = static_cast(round(result.boxes[box_i][1])); + int x2 = static_cast(round(result.boxes[box_i][2])); + int y2 = static_cast(round(result.boxes[box_i][3])); + int box_h = y2 - y1; + int box_w = x2 - x1; + int c0 = color_map[3 * result.label_ids[box_i] + 0]; + int c1 = color_map[3 * result.label_ids[box_i] + 1]; + int c2 = color_map[3 * result.label_ids[box_i] + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string id = std::to_string(result.label_ids[box_i]); + std::string score = std::to_string(result.scores[box_i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + ", " + score; + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); + cv::Point origin; + origin.x = x1; + origin.y = y1; + cv::Rect rect(x1, y1, box_w, box_h); + cv::rectangle(vis_im, rect, rect_color, line_size); + cv::putText(vis_im, text, origin, font, font_size, + cv::Scalar(255, 255, 255), 1); + if (result.contain_masks) { + int mask_h = static_cast(result.masks[box_i].shape[0]); + int mask_w = static_cast(result.masks[box_i].shape[1]); + // non-const pointer for cv:Mat constructor + uint32_t *mask_raw_data = const_cast( + static_cast(result.masks[box_i].Data())); + // only reference to mask data (zero copy) + cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data); + if ((mask_h != box_h) || (mask_w != box_w)) { + cv::resize(mask, mask, cv::Size(box_w, box_h)); + } + // use a bright color for instance mask + int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127; + int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127; + int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127; + uint32_t *mask_data = reinterpret_cast(mask.data); + // inplace blending (zero copy) + uchar *vis_im_data = static_cast(vis_im.data); + for (size_t i = y1; i < y2; ++i) { + for (size_t j = x1; j < x2; ++j) { + if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) { + vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast( + static_cast(mc0) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f); + vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast( + static_cast(mc1) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f); + vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast( + static_cast(mc2) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f); + } + } + } + } + } + return vis_im; +} + +// Visualize DetectionResult with custom labels. +cv::Mat VisDetection(const cv::Mat &im, const DetectionResult &result, + const std::vector &labels, + float score_threshold, int line_size, float font_size, + std::vector font_color, int font_thickness) { + if (result.boxes.empty()) { + return im; + } + if (result.contain_masks) { + FDASSERT(result.boxes.size() == result.masks.size(), + "The size of masks must be equal to the size of boxes, but now " + "%zu != %zu.", + result.boxes.size(), result.masks.size()); + } + int max_label_id = + *std::max_element(result.label_ids.begin(), result.label_ids.end()); + std::vector color_map = GenerateColorMap(max_label_id); + + int h = im.rows; + int w = im.cols; + auto vis_im = im.clone(); + auto font_color_ = cv::Scalar(font_color[0], font_color[1], font_color[2]); + for (size_t i = 0; i < result.rotated_boxes.size(); ++i) { + if (result.scores[i] < score_threshold) { + continue; + } + + int c0 = color_map[3 * result.label_ids[i] + 0]; + int c1 = color_map[3 * result.label_ids[i] + 1]; + int c2 = color_map[3 * result.label_ids[i] + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string id = std::to_string(result.label_ids[i]); + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + ", " + score; + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); + + for (int j = 0; j < 4; j++) { + auto start = cv::Point( + static_cast(round(result.rotated_boxes[i][2 * j])), + static_cast(round(result.rotated_boxes[i][2 * j + 1]))); + + cv::Point end; + if (j == 3) { + end = cv::Point( + static_cast(round(result.rotated_boxes[i][2 * j])), + static_cast(round(result.rotated_boxes[i][2 * j + 1]))); + } else { + end = cv::Point(static_cast(round(result.rotated_boxes[i][0])), + static_cast(round(result.rotated_boxes[i][1]))); + cv::putText(vis_im, text, end, font, font_size, font_color_, + font_thickness); + } + cv::line(vis_im, start, end, cv::Scalar(255, 255, 255), 3, cv::LINE_AA, + 0); + } + } + for (size_t i = 0; i < result.boxes.size(); ++i) { + if (result.scores[i] < score_threshold) { + continue; + } + int x1 = static_cast(result.boxes[i][0]); + int y1 = static_cast(result.boxes[i][1]); + int x2 = static_cast(result.boxes[i][2]); + int y2 = static_cast(result.boxes[i][3]); + int box_h = y2 - y1; + int box_w = x2 - x1; + int c0 = color_map[3 * result.label_ids[i] + 0]; + int c1 = color_map[3 * result.label_ids[i] + 1]; + int c2 = color_map[3 * result.label_ids[i] + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string id = std::to_string(result.label_ids[i]); + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + "," + score; + if (labels.size() > result.label_ids[i]) { + text = labels[result.label_ids[i]] + "," + text; + } else { + FDWARNING << "The label_id: " << result.label_ids[i] + << " in DetectionResult should be less than length of labels:" + << labels.size() << "." << std::endl; + } + if (text.size() > 16) { + text = text.substr(0, 16); + } + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); + cv::Point origin; + origin.x = x1; + origin.y = y1; + cv::Rect rect(x1, y1, box_w, box_h); + cv::rectangle(vis_im, rect, rect_color, line_size); + cv::putText(vis_im, text, origin, font, font_size, font_color_, + font_thickness); + if (result.contain_masks) { + int mask_h = static_cast(result.masks[i].shape[0]); + int mask_w = static_cast(result.masks[i].shape[1]); + // non-const pointer for cv:Mat constructor + int32_t *mask_raw_data = const_cast( + static_cast(result.masks[i].Data())); + // only reference to mask data (zero copy) + cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data); + if ((mask_h != box_h) || (mask_w != box_w)) { + cv::resize(mask, mask, cv::Size(box_w, box_h)); + } + // use a bright color for instance mask + int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127; + int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127; + int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127; + int32_t *mask_data = reinterpret_cast(mask.data); + // inplace blending (zero copy) + uchar *vis_im_data = static_cast(vis_im.data); + for (size_t i = y1; i < y2; ++i) { + for (size_t j = x1; j < x2; ++j) { + if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) { + vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast( + static_cast(mc0) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f); + vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast( + static_cast(mc1) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f); + vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast( + static_cast(mc2) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f); + } + } + } + } + } + return vis_im; +} + +// Default only support visualize num_classes <= 1000 +// If need to visualize num_classes > 1000 +// Please call Visualize::GetColorMap(num_classes) first +cv::Mat Visualize::VisDetection(const cv::Mat &im, + const DetectionResult &result, + float score_threshold, int line_size, + float font_size) { + if (result.boxes.empty()) { + return im; + } + FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisDetection is " + "deprecated, please use ultrainfer::vision:VisDetection " + "function instead." + << std::endl; + if (result.contain_masks) { + FDASSERT(result.boxes.size() == result.masks.size(), + "The size of masks must be equal the size of boxes!"); + } + auto color_map = GetColorMap(); + int h = im.rows; + int w = im.cols; + auto vis_im = im.clone(); + for (size_t i = 0; i < result.boxes.size(); ++i) { + if (result.scores[i] < score_threshold) { + continue; + } + int x1 = static_cast(result.boxes[i][0]); + int y1 = static_cast(result.boxes[i][1]); + int x2 = static_cast(result.boxes[i][2]); + int y2 = static_cast(result.boxes[i][3]); + int box_h = y2 - y1; + int box_w = x2 - x1; + int c0 = color_map[3 * result.label_ids[i] + 0]; + int c1 = color_map[3 * result.label_ids[i] + 1]; + int c2 = color_map[3 * result.label_ids[i] + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string id = std::to_string(result.label_ids[i]); + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + "," + score; + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); + cv::Point origin; + origin.x = x1; + origin.y = y1; + cv::Rect rect(x1, y1, box_w, box_h); + cv::rectangle(vis_im, rect, rect_color, line_size); + cv::putText(vis_im, text, origin, font, font_size, + cv::Scalar(255, 255, 255), 1); + if (result.contain_masks) { + int mask_h = static_cast(result.masks[i].shape[0]); + int mask_w = static_cast(result.masks[i].shape[1]); + // non-const pointer for cv:Mat constructor + int32_t *mask_raw_data = const_cast( + static_cast(result.masks[i].Data())); + // only reference to mask data (zero copy) + cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data); + if ((mask_h != box_h) || (mask_w != box_w)) { + cv::resize(mask, mask, cv::Size(box_w, box_h)); + } + // use a bright color for instance mask + int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127; + int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127; + int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127; + int32_t *mask_data = reinterpret_cast(mask.data); + // inplace blending (zero copy) + uchar *vis_im_data = static_cast(vis_im.data); + for (size_t i = y1; i < y2; ++i) { + for (size_t j = x1; j < x2; ++j) { + if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) { + vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast( + static_cast(mc0) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f); + vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast( + static_cast(mc1) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f); + vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast( + static_cast(mc2) * 0.5f + + static_cast(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f); + } + } + } + } + } + return vis_im; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc b/libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc new file mode 100755 index 0000000000..8f676d7f24 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { + +namespace vision { + +cv::Mat VisFaceAlignment(const cv::Mat &im, const FaceAlignmentResult &result, + int line_size) { + auto vis_im = im.clone(); + // vis landmarks + cv::Scalar landmark_color = cv::Scalar(0, 255, 0); + for (size_t i = 0; i < result.landmarks.size(); ++i) { + cv::Point landmark; + landmark.x = static_cast(result.landmarks[i][0]); + landmark.y = static_cast(result.landmarks[i][1]); + cv::circle(vis_im, landmark, line_size, landmark_color, -1); + } + return vis_im; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc b/libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc new file mode 100755 index 0000000000..a2ed40e6df --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc @@ -0,0 +1,137 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { + +namespace vision { + +cv::Mat VisFaceDetection(const cv::Mat &im, const FaceDetectionResult &result, + int line_size, float font_size) { + auto color_map = GenerateColorMap(); + int h = im.rows; + int w = im.cols; + + auto vis_im = im.clone(); + bool vis_landmarks = false; + if ((result.landmarks_per_face > 0) && + (result.boxes.size() * result.landmarks_per_face == + result.landmarks.size())) { + vis_landmarks = true; + } + for (size_t i = 0; i < result.boxes.size(); ++i) { + cv::Rect rect(result.boxes[i][0], result.boxes[i][1], + result.boxes[i][2] - result.boxes[i][0], + result.boxes[i][3] - result.boxes[i][1]); + int color_id = i % 333; + int c0 = color_map[3 * color_id + 0]; + int c1 = color_map[3 * color_id + 1]; + int c2 = color_map[3 * color_id + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string text = std::to_string(result.scores[i]); + if (text.size() > 4) { + text = text.substr(0, 4); + } + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); + cv::Point origin; + origin.x = rect.x; + origin.y = rect.y; + cv::Rect text_background = + cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height, + text_size.width, text_size.height); + cv::rectangle(vis_im, rect, rect_color, line_size); + cv::putText(vis_im, text, origin, font, font_size, + cv::Scalar(255, 255, 255), 1); + // vis landmarks (if have) + if (vis_landmarks) { + cv::Scalar landmark_color = rect_color; + for (size_t j = 0; j < result.landmarks_per_face; ++j) { + cv::Point landmark; + landmark.x = static_cast( + result.landmarks[i * result.landmarks_per_face + j][0]); + landmark.y = static_cast( + result.landmarks[i * result.landmarks_per_face + j][1]); + cv::circle(vis_im, landmark, line_size, landmark_color, -1); + } + } + } + return vis_im; +} + +// Default only support visualize num_classes <= 1000 +// If need to visualize num_classes > 1000 +// Please call Visualize::GetColorMap(num_classes) first +cv::Mat Visualize::VisFaceDetection(const cv::Mat &im, + const FaceDetectionResult &result, + int line_size, float font_size) { + FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisFaceDetection is " + "deprecated, please use ultrainfer::vision:VisFaceDetection " + "function instead." + << std::endl; + auto color_map = GetColorMap(); + int h = im.rows; + int w = im.cols; + + auto vis_im = im.clone(); + bool vis_landmarks = false; + if ((result.landmarks_per_face > 0) && + (result.boxes.size() * result.landmarks_per_face == + result.landmarks.size())) { + vis_landmarks = true; + } + for (size_t i = 0; i < result.boxes.size(); ++i) { + cv::Rect rect(result.boxes[i][0], result.boxes[i][1], + result.boxes[i][2] - result.boxes[i][0], + result.boxes[i][3] - result.boxes[i][1]); + int color_id = i % 333; + int c0 = color_map[3 * color_id + 0]; + int c1 = color_map[3 * color_id + 1]; + int c2 = color_map[3 * color_id + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string text = std::to_string(result.scores[i]); + if (text.size() > 4) { + text = text.substr(0, 4); + } + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); + cv::Point origin; + origin.x = rect.x; + origin.y = rect.y; + cv::Rect text_background = + cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height, + text_size.width, text_size.height); + cv::rectangle(vis_im, rect, rect_color, line_size); + cv::putText(vis_im, text, origin, font, font_size, + cv::Scalar(255, 255, 255), 1); + // vis landmarks (if have) + if (vis_landmarks) { + cv::Scalar landmark_color = rect_color; + for (size_t j = 0; j < result.landmarks_per_face; ++j) { + cv::Point landmark; + landmark.x = static_cast( + result.landmarks[i * result.landmarks_per_face + j][0]); + landmark.y = static_cast( + result.landmarks[i * result.landmarks_per_face + j][1]); + cv::circle(vis_im, landmark, line_size, landmark_color, -1); + } + } + } + return vis_im; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc b/libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc new file mode 100755 index 0000000000..4ba2420992 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { + +namespace vision { + +cv::Mat VisHeadPose(const cv::Mat &im, const HeadPoseResult &result, int size, + int line_size) { + const float PI = 3.1415926535; + auto vis_im = im.clone(); + int h = im.rows; + int w = im.cols; + // vis headpose + float pitch = result.euler_angles[0] * PI / 180.f; + float yaw = -result.euler_angles[1] * PI / 180.f; + float roll = result.euler_angles[2] * PI / 180.f; + + int tdx = w / 2; + int tdy = h / 2; + + // X-Axis | drawn in red + int x1 = static_cast(size * std::cos(yaw) * std::cos(roll)) + tdx; + int y1 = static_cast( + size * (std::cos(pitch) * std::sin(roll) + + std::cos(roll) * std::sin(pitch) * std::sin(yaw))) + + tdy; + // Y-Axis | drawn in green + int x2 = static_cast(-size * std::cos(yaw) * std::sin(roll)) + tdx; + int y2 = static_cast( + size * (std::cos(pitch) * std::cos(roll) - + std::sin(pitch) * std::sin(yaw) * std::sin(roll))) + + tdy; + // Z-Axis | drawn in blue + int x3 = static_cast(size * std::sin(yaw)) + tdx; + int y3 = static_cast(-size * std::cos(yaw) * std::sin(pitch)) + tdy; + + cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1), + cv::Scalar(0, 0, 255), line_size); + cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2), + cv::Scalar(0, 255, 0), line_size); + cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3), + cv::Scalar(255, 0, 0), line_size); + return vis_im; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc b/libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc new file mode 100755 index 0000000000..20c124ba5b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat VisKeypointDetection(const cv::Mat &im, + const KeyPointDetectionResult &results, + float conf_threshold) { + const int edge[][2] = {{0, 1}, {0, 2}, {1, 3}, {2, 4}, {3, 5}, + {4, 6}, {5, 7}, {6, 8}, {7, 9}, {8, 10}, + {5, 11}, {6, 12}, {11, 13}, {12, 14}, {13, 15}, + {14, 16}, {11, 12}}; + auto colormap = GenerateColorMap(); + cv::Mat vis_img = im.clone(); + int detection_nums = results.keypoints.size() / 17; + for (int i = 0; i < detection_nums; i++) { + int index = i * 17; + bool is_over_threshold = true; + for (int j = 0; j < results.num_joints; j++) { + if (results.scores[index + j] < conf_threshold) { + is_over_threshold = false; + break; + } + } + if (is_over_threshold) { + for (int k = 0; k < results.num_joints; k++) { + int x_coord = int(results.keypoints[index + k][0]); + int y_coord = int(results.keypoints[index + k][1]); + cv::circle(vis_img, cv::Point2d(x_coord, y_coord), 1, + cv::Scalar(0, 0, 255), 2); + int x_start = int(results.keypoints[index + edge[k][0]][0]); + int y_start = int(results.keypoints[index + edge[k][0]][1]); + int x_end = int(results.keypoints[index + edge[k][1]][0]); + int y_end = int(results.keypoints[index + edge[k][1]][1]); + cv::line(vis_img, cv::Point2d(x_start, y_start), + cv::Point2d(x_end, y_end), colormap[k], 1); + } + } + } + return vis_img; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/matting.cc b/libs/ultrainfer/ultrainfer/vision/visualize/matting.cc new file mode 100755 index 0000000000..03ef71d802 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/matting.cc @@ -0,0 +1,152 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat VisMatting(const cv::Mat &im, const MattingResult &result, + bool transparent_background, float transparent_threshold, + bool remove_small_connected_area) { + FDASSERT((!im.empty()), "im can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels mat!"); + auto vis_img = im.clone(); + cv::Mat transparent_vis_mat; + int channel = im.channels(); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + std::vector alpha_copy; + alpha_copy.assign(result.alpha.begin(), result.alpha.end()); + float *alpha_ptr = static_cast(alpha_copy.data()); + cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); + if (remove_small_connected_area) { + alpha = RemoveSmallConnectedArea(alpha, 0.05f); + } + if ((out_h != height) || (out_w != width)) { + cv::resize(alpha, alpha, cv::Size(width, height)); + } + + if ((vis_img).type() != CV_8UC3) { + (vis_img).convertTo((vis_img), CV_8UC3); + } + + if (transparent_background) { + if (vis_img.channels() != 4) { + cv::cvtColor(vis_img, transparent_vis_mat, cv::COLOR_BGR2BGRA); + vis_img = transparent_vis_mat; + channel = 4; + } + } + + uchar *vis_data = static_cast(vis_img.data); + uchar *im_data = static_cast(im.data); + float *alpha_data = reinterpret_cast(alpha.data); + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + float alpha_val = alpha_data[i * width + j]; + if (transparent_background) { + if (alpha_val < transparent_threshold) { + vis_data[i * width * channel + j * channel + 3] = + cv::saturate_cast(0.f); + } else { + vis_data[i * width * channel + j * channel + 0] = + cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 0])); + vis_data[i * width * channel + j * channel + 1] = + cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 1])); + vis_data[i * width * channel + j * channel + 2] = + cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 2])); + } + } else { + vis_data[i * width * channel + j * channel + 0] = + cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 0]) * + alpha_val + + (1.f - alpha_val) * 153.f); + vis_data[i * width * channel + j * channel + 1] = + cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 1]) * + alpha_val + + (1.f - alpha_val) * 255.f); + vis_data[i * width * channel + j * channel + 2] = + cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 2]) * + alpha_val + + (1.f - alpha_val) * 120.f); + } + } + } + return vis_img; +} + +cv::Mat Visualize::VisMattingAlpha(const cv::Mat &im, + const MattingResult &result, + bool remove_small_connected_area) { + FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisMattingAlpha is " + "deprecated, please use ultrainfer::vision:VisMatting function " + "instead." + << std::endl; + FDASSERT((!im.empty()), "im can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels mat!"); + + auto vis_img = im.clone(); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + std::vector alpha_copy; + alpha_copy.assign(result.alpha.begin(), result.alpha.end()); + float *alpha_ptr = static_cast(alpha_copy.data()); + cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); + if (remove_small_connected_area) { + alpha = RemoveSmallConnectedArea(alpha, 0.05f); + } + if ((out_h != height) || (out_w != width)) { + cv::resize(alpha, alpha, cv::Size(width, height)); + } + + if ((vis_img).type() != CV_8UC3) { + (vis_img).convertTo((vis_img), CV_8UC3); + } + + uchar *vis_data = static_cast(vis_img.data); + uchar *im_data = static_cast(im.data); + float *alpha_data = reinterpret_cast(alpha.data); + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + float alpha_val = alpha_data[i * width + j]; + vis_data[i * width * 3 + j * 3 + 0] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 0]) * alpha_val + + (1.f - alpha_val) * 153.f); + vis_data[i * width * 3 + j * 3 + 1] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 1]) * alpha_val + + (1.f - alpha_val) * 255.f); + vis_data[i * width * 3 + j * 3 + 2] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 2]) * alpha_val + + (1.f - alpha_val) * 120.f); + } + } + return vis_img; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/mot.cc b/libs/ultrainfer/ultrainfer/vision/visualize/mot.cc new file mode 100755 index 0000000000..215248f901 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/mot.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +cv::Scalar GetMOTBoxColor(int idx) { + idx = idx * 3; + cv::Scalar color = + cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255); + return color; +} + +cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results, + float score_threshold, tracking::TrailRecorder *recorder) { + cv::Mat vis_img = img.clone(); + int im_h = img.rows; + int im_w = img.cols; + float text_scale = std::max(1, static_cast(im_w / 1600.)); + float text_thickness = 2.; + float line_thickness = std::max(1, static_cast(im_w / 500.)); + for (int i = 0; i < results.boxes.size(); ++i) { + if (results.scores[i] < score_threshold) { + continue; + } + const int obj_id = results.ids[i]; + const float score = results.scores[i]; + cv::Scalar color = GetMOTBoxColor(obj_id); + if (recorder != nullptr) { + int id = results.ids[i]; + auto iter = recorder->records.find(id); + if (iter != recorder->records.end()) { + for (int j = 0; j < iter->second.size(); j++) { + cv::Point center(iter->second[j][0], iter->second[j][1]); + cv::circle(vis_img, center, text_thickness, color); + } + } + } + cv::Point pt1 = cv::Point(results.boxes[i][0], results.boxes[i][1]); + cv::Point pt2 = cv::Point(results.boxes[i][2], results.boxes[i][3]); + cv::Point id_pt = cv::Point(results.boxes[i][0], results.boxes[i][1] + 10); + cv::Point score_pt = + cv::Point(results.boxes[i][0], results.boxes[i][1] - 10); + cv::rectangle(vis_img, pt1, pt2, color, line_thickness); + std::ostringstream idoss; + idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4); + idoss << obj_id; + std::string id_text = idoss.str(); + + cv::putText(vis_img, id_text, id_pt, cv::FONT_HERSHEY_PLAIN, text_scale, + color, text_thickness); + + std::ostringstream soss; + soss << std::setiosflags(std::ios::fixed) << std::setprecision(2); + soss << score; + std::string score_text = soss.str(); + + cv::putText(vis_img, score_text, score_pt, cv::FONT_HERSHEY_PLAIN, + text_scale, color, text_thickness); + } + return vis_img; +} +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc b/libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc new file mode 100755 index 0000000000..bafd017528 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result, + const float score_threshold) { + auto vis_im = im.clone(); + bool have_score = (ocr_result.boxes.size() == ocr_result.rec_scores.size()); + + for (int n = 0; n < ocr_result.boxes.size(); n++) { + if (have_score) { + if (ocr_result.rec_scores[n] < score_threshold) { + continue; + } + } + cv::Point rook_points[4]; + + for (int m = 0; m < 4; m++) { + rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]), + int(ocr_result.boxes[n][m * 2 + 1])); + } + + const cv::Point *ppt[1] = {rook_points}; + int npt[] = {4}; + cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); + } + + return vis_im; +} + +cv::Mat VisCURVEOcr(const cv::Mat &im, const OCRCURVEResult &ocr_result, + const float score_threshold) { + auto vis_im = im.clone(); + bool have_score = (ocr_result.boxes.size() == ocr_result.rec_scores.size()); + + for (int n = 0; n < ocr_result.boxes.size(); n++) { + if (have_score) { + if (ocr_result.rec_scores[n] < score_threshold) { + continue; + } + } + std::vector rook_points; + + for (int m = 0; m < ocr_result.boxes[n].size() / 2; m++) { + rook_points.push_back(cv::Point(int(ocr_result.boxes[n][m * 2]), + int(ocr_result.boxes[n][m * 2 + 1]))); + } + + if (!rook_points.empty()) { + cv::Point *ppt = &rook_points[0]; + int npt = static_cast(rook_points.size()); + cv::polylines(vis_im, &ppt, &npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); + } + } + + return vis_im; +} + +cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { + FDWARNING + << "DEPRECATED: ultrainfer::vision::Visualize::VisOcr is deprecated, " + "please use ultrainfer::vision:VisOcr function instead." + << std::endl; + auto vis_im = im.clone(); + + for (int n = 0; n < ocr_result.boxes.size(); n++) { + cv::Point rook_points[4]; + + for (int m = 0; m < 4; m++) { + rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]), + int(ocr_result.boxes[n][m * 2 + 1])); + } + + const cv::Point *ppt[1] = {rook_points}; + int npt[] = {4}; + cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); + } + + return vis_im; +} + +cv::Mat Visualize::VisCURVEOcr(const cv::Mat &im, + const OCRCURVEResult &ocr_result) { + FDWARNING + << "DEPRECATED: ultrainfer::vision::Visualize::VisOcr is deprecated, " + "please use ultrainfer::vision:VisOcr function instead." + << std::endl; + auto vis_im = im.clone(); + + for (int n = 0; n < ocr_result.boxes.size(); n++) { + std::vector rook_points; + for (int m = 0; m < ocr_result.boxes[n].size() / 2; m++) { + rook_points.push_back(cv::Point(int(ocr_result.boxes[n][m * 2]), + int(ocr_result.boxes[n][m * 2 + 1]))); + } + if (!rook_points.empty()) { + cv::Point *ppt = &rook_points[0]; + int npt = static_cast(rook_points.size()); + cv::polylines(vis_im, &ppt, &npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); + } + } + + return vis_im; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/perception.cc b/libs/ultrainfer/ultrainfer/vision/visualize/perception.cc new file mode 100755 index 0000000000..8f41786d13 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/perception.cc @@ -0,0 +1,195 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "opencv2/calib3d/calib3d.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" +#include "yaml-cpp/yaml.h" + +namespace ultrainfer { +namespace vision { + +using matrix = std::vector>; + +matrix Multiple(const matrix a, const matrix b) { + const int m = a.size(); // a rows + if (m == 0) { + matrix c; + return c; + } + if (a[0].size() != b.size()) { + FDERROR << "A[m,n] * B[p,q], n must equal to p." << std::endl; + matrix c; + return c; + } + const int n = a[0].size(); // a cols + const int p = b[0].size(); // b cols + matrix c(m, std::vector(p, 0)); + for (auto i = 0; i < m; i++) { + for (auto j = 0; j < p; j++) { + for (auto k = 0; k < n; k++) + c[i][j] += a[i][k] * b[k][j]; + } + } + return c; +} + +cv::Mat VisPerception(const cv::Mat &im, const PerceptionResult &result, + const std::string &config_file, float score_threshold, + int line_size, float font_size) { + if (result.scores.empty()) { + return im; + } + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file); + } catch (YAML::BadFile &e) { + FDERROR << "Failed to load yaml file " << config_file + << ", maybe you should check this file." << std::endl; + return im; + } + + std::vector target_size; + for (const auto &op : cfg["Preprocess"]) { + std::string op_name = op["type"].as(); + if (op_name == "Resize") { + target_size = op["target_size"].as>(); + } + } + + std::vector vec_k_data = cfg["k_data"].as>(); + if (vec_k_data.size() != 9) { + FDERROR + << "The K data load from the yaml file: " << config_file + << " is unexpected, the expected size is 9, but the loaded size is: " + << vec_k_data.size() << " ,maybe you should check this file." + << std::endl; + return im; + } + matrix k_data(3, std::vector()); + for (auto j = 0; j < 3; j++) { + k_data[j].insert(k_data[j].begin(), vec_k_data.begin() + j * 3, + vec_k_data.begin() + j * 3 + 3); + } + + std::vector rvec = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0}; + std::vector tvec = {0, 0, 0}; + + matrix connect_line_id = {{1, 0}, {2, 7}, {3, 6}, {4, 5}, {1, 2}, {2, 3}, + {3, 4}, {4, 1}, {0, 7}, {7, 6}, {6, 5}, {5, 0}}; + + int max_label_id = + *std::max_element(result.label_ids.begin(), result.label_ids.end()); + std::vector color_map = GenerateColorMap(max_label_id); + int h = im.rows; + int w = im.cols; + cv::Mat vis_im = im.clone(); + cv::resize(im, vis_im, cv::Size(target_size[1], target_size[0]), 0, 0, 0); + for (size_t i = 0; i < result.scores.size(); ++i) { + if (result.scores[i] < 0.5) { + continue; + } + float h = result.boxes[i][4]; + float w = result.boxes[i][5]; + float l = result.boxes[i][6]; + + float x = result.center[i][0]; + float y = result.center[i][1]; + float z = result.center[i][2]; + std::vector x_corners = {0, l, l, l, l, 0, 0, 0}; + std::vector y_corners = {0, 0, h, h, 0, 0, h, h}; + std::vector z_corners = {0, 0, 0, w, w, w, w, 0}; + + for (auto j = 0; j < x_corners.size(); j++) { + x_corners[j] = x_corners[j] - l / 2; + y_corners[j] = y_corners[j] - h; + z_corners[j] = z_corners[j] - w / 2; + } + + matrix corners_3d = {x_corners, y_corners, z_corners}; + + float ry = result.yaw_angle[i]; + matrix rot_mat = { + {cosf(ry), 0, sinf(ry)}, {0, 1, 0}, {sinf(ry), 0, cosf(ry)}}; + + matrix rot_corners_3d = Multiple(rot_mat, corners_3d); + + for (auto j = 0; j < rot_corners_3d[0].size(); j++) { + rot_corners_3d[0][j] += x; + rot_corners_3d[1][j] += y; + rot_corners_3d[2][j] += z; + } + + auto corners_2d = Multiple(k_data, rot_corners_3d); + + for (auto j = 0; j < corners_2d[0].size(); j++) { + corners_2d[0][j] /= corners_2d[2][j]; + corners_2d[1][j] /= corners_2d[2][j]; + } + + std::vector box2d = { + *std::min_element(corners_2d[0].begin(), corners_2d[0].end()), + *std::min_element(corners_2d[1].begin(), corners_2d[1].end()), + *std::max_element(corners_2d[0].begin(), corners_2d[0].end()), + *std::max_element(corners_2d[1].begin(), corners_2d[1].end())}; + + if (box2d[0] == 0 && box2d[1] == 0 && box2d[2] == 0 && box2d[3] == 0) { + continue; + } + + std::vector points3d; + for (auto j = 0; j < rot_corners_3d[0].size(); j++) { + points3d.push_back(cv::Point3f(rot_corners_3d[0][j], rot_corners_3d[1][j], + rot_corners_3d[2][j])); + } + cv::Mat rVec(3, 3, cv::DataType::type, rvec.data()); + cv::Mat tVec(3, 1, cv::DataType::type, tvec.data()); + std::vector vec_k; + for (auto &&v : k_data) { + vec_k.insert(vec_k.end(), v.begin(), v.end()); + } + cv::Mat intrinsicMat(3, 3, cv::DataType::type, vec_k.data()); + cv::Mat distCoeffs(5, 1, cv::DataType::type); + std::vector projectedPoints; + cv::projectPoints(points3d, rVec, tVec, intrinsicMat, distCoeffs, + projectedPoints); + + int c0 = color_map[3 * result.label_ids[i] + 0]; + int c1 = color_map[3 * result.label_ids[i] + 1]; + int c2 = color_map[3 * result.label_ids[i] + 2]; + cv::Scalar color = cv::Scalar(c0, c1, c2); + for (auto id = 0; id < connect_line_id.size(); id++) { + int p1 = connect_line_id[id][0]; + int p2 = connect_line_id[id][1]; + cv::line(vis_im, projectedPoints[p1], projectedPoints[p2], color, 1); + } + int font = cv::FONT_HERSHEY_SIMPLEX; + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = std::to_string(result.label_ids[i]) + ", " + score; + cv::Point2f original; + original.x = box2d[0]; + original.y = box2d[1]; + cv::putText(vis_im, text, original, font, font_size, + cv::Scalar(255, 255, 255), 1); + } + return vis_im; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc b/libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc new file mode 100755 index 0000000000..5b5e55c38b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat RemoveSmallConnectedArea(const cv::Mat &alpha_pred, float threshold) { + cv::Mat gray, binary; + alpha_pred.convertTo(gray, CV_8UC1, 255.f); + cv::Mat alpha_pred_clone = alpha_pred.clone(); + // 255 * 0.05 ~ 13 + unsigned int binary_threshold = static_cast(255.f * threshold); + cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY); + // morphologyEx with OPEN operation to remove noise first. + auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3), + cv::Point(-1, -1)); + cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel); + // Computationally connected domain + cv::Mat labels = cv::Mat::zeros(alpha_pred_clone.size(), CV_32S); + cv::Mat stats, centroids; + int num_labels = + cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4); + if (num_labels <= 1) { + // no noise, skip. + return alpha_pred; + } + // find max connected area, 0 is background + int max_connected_id = 1; // 1,2,... + int max_connected_area = stats.at(max_connected_id, cv::CC_STAT_AREA); + for (int i = 1; i < num_labels; ++i) { + int tmp_connected_area = stats.at(i, cv::CC_STAT_AREA); + if (tmp_connected_area > max_connected_area) { + max_connected_area = tmp_connected_area; + max_connected_id = i; + } + } + const int h = alpha_pred_clone.rows; + const int w = alpha_pred_clone.cols; + // remove small connected area. + for (int i = 0; i < h; ++i) { + int *label_row_ptr = labels.ptr(i); + float *alpha_row_ptr = alpha_pred_clone.ptr(i); + for (int j = 0; j < w; ++j) { + if (label_row_ptr[j] != max_connected_id) + alpha_row_ptr[j] = 0.f; + } + } + return alpha_pred_clone; +} + +cv::Mat Visualize::RemoveSmallConnectedArea(const cv::Mat &alpha_pred, + float threshold) { + cv::Mat gray, binary; + alpha_pred.convertTo(gray, CV_8UC1, 255.f); + cv::Mat alpha_pred_clone = alpha_pred.clone(); + // 255 * 0.05 ~ 13 + unsigned int binary_threshold = static_cast(255.f * threshold); + cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY); + // morphologyEx with OPEN operation to remove noise first. + auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3), + cv::Point(-1, -1)); + cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel); + // Computationally connected domain + cv::Mat labels = cv::Mat::zeros(alpha_pred_clone.size(), CV_32S); + cv::Mat stats, centroids; + int num_labels = + cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4); + if (num_labels <= 1) { + // no noise, skip. + return alpha_pred; + } + // find max connected area, 0 is background + int max_connected_id = 1; // 1,2,... + int max_connected_area = stats.at(max_connected_id, cv::CC_STAT_AREA); + for (int i = 1; i < num_labels; ++i) { + int tmp_connected_area = stats.at(i, cv::CC_STAT_AREA); + if (tmp_connected_area > max_connected_area) { + max_connected_area = tmp_connected_area; + max_connected_id = i; + } + } + const int h = alpha_pred_clone.rows; + const int w = alpha_pred_clone.cols; + // remove small connected area. + for (int i = 0; i < h; ++i) { + int *label_row_ptr = labels.ptr(i); + float *alpha_row_ptr = alpha_pred_clone.ptr(i); + for (int j = 0; j < w; ++j) { + if (label_row_ptr[j] != max_connected_id) + alpha_row_ptr[j] = 0.f; + } + } + return alpha_pred_clone; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc new file mode 100755 index 0000000000..fc4425816f --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/visualize/segmentation_arm.h" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +static cv::Mat VisSegmentationCommonCpu(const cv::Mat &im, + const SegmentationResult &result, + float weight) { + // Use the native c++ version without any optimization. + auto color_map = GenerateColorMap(1000); + int64_t height = result.shape[0]; + int64_t width = result.shape[1]; + auto vis_img = cv::Mat(height, width, CV_8UC3); + + int64_t index = 0; + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int category_id = result.label_map[index++]; + if (category_id == 0) { + vis_img.at(i, j)[0] = im.at(i, j)[0]; + vis_img.at(i, j)[1] = im.at(i, j)[1]; + vis_img.at(i, j)[2] = im.at(i, j)[2]; + } else { + vis_img.at(i, j)[0] = color_map[3 * category_id + 0]; + vis_img.at(i, j)[1] = color_map[3 * category_id + 1]; + vis_img.at(i, j)[2] = color_map[3 * category_id + 2]; + } + } + } + cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img); + return vis_img; +} + +cv::Mat VisSegmentation(const cv::Mat &im, const SegmentationResult &result, + float weight) { + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON + return VisSegmentationNEON(im, result, weight, true); +#else + return VisSegmentationCommonCpu(im, result, weight); +#endif +} + +cv::Mat Visualize::VisSegmentation(const cv::Mat &im, + const SegmentationResult &result) { + FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisSegmentation is " + "deprecated, please use ultrainfer::vision:VisSegmentation " + "function instead." + << std::endl; +#ifdef __ARM_NEON + return VisSegmentationNEON(im, result, 0.5f, true); +#else + return VisSegmentationCommonCpu(im, result, 0.5f); +#endif +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc new file mode 100755 index 0000000000..9d2defa74b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc @@ -0,0 +1,177 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/visualize/segmentation_arm.h" +#ifdef __ARM_NEON +#include +#endif + +namespace ultrainfer { +namespace vision { + +static constexpr int _OMP_THREADS = 2; + +static inline void QuantizeBlendingWeight8(float weight, + uint8_t *old_multi_factor, + uint8_t *new_multi_factor) { + // Quantize the weight to boost blending performance. + // if 0.0 < w <= 1/8, w ~ 1/8=1/(2^3) shift right 3 mul 1, 7 + // if 1/8 < w <= 2/8, w ~ 2/8=1/(2^3) shift right 3 mul 2, 6 + // if 2/8 < w <= 3/8, w ~ 3/8=1/(2^3) shift right 3 mul 3, 5 + // if 3/8 < w <= 4/8, w ~ 4/8=1/(2^3) shift right 3 mul 4, 4 + // Shift factor is always 3, but the mul factor is different. + // Moving 7 bits to the right tends to result in a zero value, + // So, We choose to shift 3 bits to get an approximation. + uint8_t weight_quantize = static_cast(weight * 8.0f); + *new_multi_factor = weight_quantize; + *old_multi_factor = (8 - weight_quantize); +} + +cv::Mat VisSegmentationNEON(const cv::Mat &im, const SegmentationResult &result, + float weight, bool quantize_weight) { +#ifndef __ARM_NEON + FDASSERT(false, "UltraInfer was not compiled with Arm NEON support!") +#else + int64_t height = result.shape[0]; + int64_t width = result.shape[1]; + auto vis_img = cv::Mat(height, width, CV_8UC3); + + int32_t size = static_cast(height * width); + uint8_t *vis_ptr = static_cast(vis_img.data); + const uint8_t *label_ptr = + static_cast(result.label_map.data()); + const uint8_t *im_ptr = static_cast(im.data); + + if (!quantize_weight) { + uint8x16_t zerox16 = vdupq_n_u8(0); +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) + for (int i = 0; i < size - 15; i += 16) { + uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes + uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes + uint8x16_t ibx16 = bgrx16x3.val[0]; + uint8x16_t igx16 = bgrx16x3.val[1]; + uint8x16_t irx16 = bgrx16x3.val[2]; + // e.g 0b00000001 << 7 -> 0b10000000 128; + uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); + uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); + uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); + uint8x16x3_t vbgrx16x3; + // Keep the pixels of input im if mask = 0 + uint8x16_t cezx16 = vceqq_u8(labelx16, zerox16); + vbgrx16x3.val[0] = vorrq_u8(vandq_u8(cezx16, ibx16), mbx16); + vbgrx16x3.val[1] = vorrq_u8(vandq_u8(cezx16, igx16), mgx16); + vbgrx16x3.val[2] = vorrq_u8(vandq_u8(cezx16, irx16), mrx16); + vst3q_u8(vis_ptr + i * 3, vbgrx16x3); + } + for (int i = size - 15; i < size; i++) { + uint8_t label = label_ptr[i]; + vis_ptr[i * 3 + 0] = (label << 7); + vis_ptr[i * 3 + 1] = (label << 4); + vis_ptr[i * 3 + 2] = (label << 3); + } + // Blend the colors use OpenCV + cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img); + return vis_img; + } + + // Quantize the weight to boost blending performance. + // After that, we can directly use shift instructions + // to blend the colors from input im and mask. Please + // check QuantizeBlendingWeight8 for more details. + uint8_t old_multi_factor, new_multi_factor; + QuantizeBlendingWeight8(weight, &old_multi_factor, &new_multi_factor); + if (new_multi_factor == 0) { + return im; // Only keep origin image. + } + + if (new_multi_factor == 8) { +// Only keep mask, no need to blending with origin image. +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) + for (int i = 0; i < size - 15; i += 16) { + uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes + // e.g 0b00000001 << 7 -> 0b10000000 128; + uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); + uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); + uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); + uint8x16x3_t vbgr16x3; + vbgr16x3.val[0] = mbx16; + vbgr16x3.val[1] = mgx16; + vbgr16x3.val[2] = mrx16; + vst3q_u8(vis_ptr + i * 3, vbgr16x3); + } + for (int i = size - 15; i < size; i++) { + uint8_t label = label_ptr[i]; + vis_ptr[i * 3 + 0] = (label << 7); + vis_ptr[i * 3 + 1] = (label << 4); + vis_ptr[i * 3 + 2] = (label << 3); + } + return vis_img; + } + + uint8x16_t zerox16 = vdupq_n_u8(0); + uint8x16_t old_fx16 = vdupq_n_u8(old_multi_factor); + uint8x16_t new_fx16 = vdupq_n_u8(new_multi_factor); +// Blend the two colors together with quantize 'weight'. +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) + for (int i = 0; i < size - 15; i += 16) { + uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes + uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes + uint8x16_t ibx16 = bgrx16x3.val[0]; + uint8x16_t igx16 = bgrx16x3.val[1]; + uint8x16_t irx16 = bgrx16x3.val[2]; + // e.g 0b00000001 << 7 -> 0b10000000 128; + uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); + uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); + uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); + // Moving 7 bits to the right tends to result in zero, + // So, We choose to shift 3 bits to get an approximation + uint8x16_t ibx16_mshr = vmulq_u8(vshrq_n_u8(ibx16, 3), old_fx16); + uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16); + uint8x16_t irx16_mshr = vmulq_u8(vshrq_n_u8(irx16, 3), old_fx16); + uint8x16_t mbx16_mshr = vmulq_u8(vshrq_n_u8(mbx16, 3), new_fx16); + uint8x16_t mgx16_mshr = vmulq_u8(vshrq_n_u8(mgx16, 3), new_fx16); + uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16); + uint8x16_t qbx16 = vqaddq_u8(ibx16_mshr, mbx16_mshr); + uint8x16_t qgx16 = vqaddq_u8(igx16_mshr, mgx16_mshr); + uint8x16_t qrx16 = vqaddq_u8(irx16_mshr, mrx16_mshr); + // Keep the pixels of input im if label = 0 (means mask = 0) + uint8x16_t cezx16 = vceqq_u8(labelx16, zerox16); + uint8x16_t abx16 = vandq_u8(cezx16, ibx16); + uint8x16_t agx16 = vandq_u8(cezx16, igx16); + uint8x16_t arx16 = vandq_u8(cezx16, irx16); + uint8x16x3_t vbgr16x3; + // Reset qx values to 0 if label is 0, then, keep mask values + // if label is not 0 + uint8x16_t ncezx16 = vmvnq_u8(cezx16); + vbgr16x3.val[0] = vorrq_u8(abx16, vandq_u8(ncezx16, qbx16)); + vbgr16x3.val[1] = vorrq_u8(agx16, vandq_u8(ncezx16, qgx16)); + vbgr16x3.val[2] = vorrq_u8(arx16, vandq_u8(ncezx16, qrx16)); + // Store the blended pixels to vis img + vst3q_u8(vis_ptr + i * 3, vbgr16x3); + } + for (int i = size - 15; i < size; i++) { + uint8_t label = label_ptr[i]; + vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor + + ((label << 7) >> 3) * new_multi_factor; + vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor + + ((label << 4) >> 3) * new_multi_factor; + vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor + + ((label << 3) >> 3) * new_multi_factor; + } + return vis_img; +#endif +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h new file mode 100755 index 0000000000..5e82c7859e --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h @@ -0,0 +1,27 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat VisSegmentationNEON(const cv::Mat &im, const SegmentationResult &result, + float weight, bool quantize_weight = true); + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc new file mode 100755 index 0000000000..2f05c302a0 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc @@ -0,0 +1,180 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/utils/utils.h" +#include "ultrainfer/vision/visualize/swap_background_arm.h" +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +static cv::Mat SwapBackgroundCommonCpu(const cv::Mat &im, + const cv::Mat &background, + const MattingResult &result, + bool remove_small_connected_area) { + FDASSERT((!im.empty()), "Image can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); + FDASSERT((!background.empty()), "Background image can't be empty!"); + FDASSERT((background.channels() == 3), + "Only support 3 channels background image mat!"); + auto vis_img = im.clone(); + auto background_copy = background.clone(); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + int bg_height = background.rows; + int bg_width = background.cols; + std::vector alpha_copy; + alpha_copy.assign(result.alpha.begin(), result.alpha.end()); + float *alpha_ptr = static_cast(alpha_copy.data()); + cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); + if (remove_small_connected_area) { + alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f); + } + if ((vis_img).type() != CV_8UC3) { + (vis_img).convertTo((vis_img), CV_8UC3); + } + if ((background_copy).type() != CV_8UC3) { + (background_copy).convertTo((background_copy), CV_8UC3); + } + if ((bg_height != height) || (bg_width != width)) { + cv::resize(background, background_copy, cv::Size(width, height)); + } + if ((out_h != height) || (out_w != width)) { + cv::resize(alpha, alpha, cv::Size(width, height)); + } + uchar *vis_data = static_cast(vis_img.data); + uchar *background_data = static_cast(background_copy.data); + uchar *im_data = static_cast(im.data); + float *alpha_data = reinterpret_cast(alpha.data); + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + float alpha_val = alpha_data[i * width + j]; + for (size_t c = 0; c < 3; ++c) { + vis_data[i * width * 3 + j * 3 + c] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + c]) * alpha_val + + (1.f - alpha_val) * background_data[i * width * 3 + j * 3 + c]); + } + } + } + + return vis_img; +} + +static cv::Mat SwapBackgroundCommonCpu(const cv::Mat &im, + const cv::Mat &background, + const SegmentationResult &result, + int background_label) { + FDASSERT((!im.empty()), "Image can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); + FDASSERT((!background.empty()), "Background image can't be empty!"); + FDASSERT((background.channels() == 3), + "Only support 3 channels background image mat!"); + auto vis_img = im.clone(); + auto background_copy = background.clone(); + int height = im.rows; + int width = im.cols; + int bg_height = background.rows; + int bg_width = background.cols; + if ((vis_img).type() != CV_8UC3) { + (vis_img).convertTo((vis_img), CV_8UC3); + } + if ((background_copy).type() != CV_8UC3) { + (background_copy).convertTo((background_copy), CV_8UC3); + } + if ((bg_height != height) || (bg_width != width)) { + cv::resize(background, background_copy, cv::Size(width, height)); + } + uchar *vis_data = static_cast(vis_img.data); + uchar *background_data = static_cast(background_copy.data); + uchar *im_data = static_cast(im.data); + float keep_value = 0.f; + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + int category_id = result.label_map[i * width + j]; + if (background_label != category_id) { + keep_value = 1.0f; + } else { + keep_value = 0.f; + } + for (size_t c = 0; c < 3; ++c) { + vis_data[i * width * 3 + j * 3 + c] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + c]) * + keep_value + + (1.f - keep_value) * background_data[i * width * 3 + j * 3 + c]); + } + } + } + + return vis_img; +} + +// Public interfaces for SwapBackground. +cv::Mat SwapBackground(const cv::Mat &im, const cv::Mat &background, + const MattingResult &result, + bool remove_small_connected_area) { + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON + return SwapBackgroundNEON(im, background, result, + remove_small_connected_area); +#else + return SwapBackgroundCommonCpu(im, background, result, + remove_small_connected_area); +#endif +} + +cv::Mat SwapBackground(const cv::Mat &im, const cv::Mat &background, + const SegmentationResult &result, int background_label) { + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON + // return SwapBackgroundNEON(im, background, result, background_label); + return SwapBackgroundNEON(im, background, result, background_label); +#else + return SwapBackgroundCommonCpu(im, background, result, background_label); +#endif +} + +// DEPRECATED +cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat &im, + const cv::Mat &background, + const MattingResult &result, + bool remove_small_connected_area) { +// TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON + return SwapBackgroundNEON(im, background, result, + remove_small_connected_area); +#else + return SwapBackgroundCommonCpu(im, background, result, + remove_small_connected_area); +#endif +} + +cv::Mat Visualize::SwapBackgroundSegmentation( + const cv::Mat &im, const cv::Mat &background, int background_label, + const SegmentationResult &result) { + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON + return SwapBackgroundNEON(im, background, result, background_label); +#else + return SwapBackgroundCommonCpu(im, background, result, background_label); +#endif +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc new file mode 100755 index 0000000000..1382ef6fcb --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc @@ -0,0 +1,238 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/visualize/swap_background_arm.h" + +#include "ultrainfer/vision/visualize/visualize.h" +#ifdef __ARM_NEON +#include +#endif +#include "ultrainfer/utils/utils.h" + +namespace ultrainfer { +namespace vision { + +static constexpr int _OMP_THREADS = 2; + +cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background, + const MattingResult &result, + bool remove_small_connected_area) { +#ifndef __ARM_NEON + FDASSERT(false, "UltraInfer was not compiled with Arm NEON support!"); +#else + FDASSERT((!im.empty()), "Image can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); + FDASSERT((!background.empty()), "Background image can't be empty!"); + FDASSERT((background.channels() == 3), + "Only support 3 channels background image mat!"); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + int bg_height = background.rows; + int bg_width = background.cols; + + // WARN: may change the original alpha + float *alpha_ptr = const_cast(result.alpha.data()); + + cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); + if (remove_small_connected_area) { + alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f); + } + auto vis_img = cv::Mat(height, width, CV_8UC3); + + cv::Mat background_ref; + if ((bg_height != height) || (bg_width != width)) { + cv::resize(background, background_ref, cv::Size(width, height)); + } else { + background_ref = background; // ref only + } + if ((background_ref).type() != CV_8UC3) { + (background_ref).convertTo((background_ref), CV_8UC3); + } + + if ((out_h != height) || (out_w != width)) { + cv::resize(alpha, alpha, cv::Size(width, height)); + } + + uint8_t *vis_data = static_cast(vis_img.data); + const uint8_t *background_data = + static_cast(background_ref.data); + const uint8_t *im_data = static_cast(im.data); + const float *alpha_data = reinterpret_cast(alpha.data); + + const int32_t size = static_cast(height * width); +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) + for (int i = 0; i < size - 7; i += 8) { + uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3); // 24 bytes + // u8 -> u16 -> u32 -> f32 + uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]); + uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]); + uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]); + uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3); // 24 bytes + uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]); + uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]); + uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]); + + uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8)); + uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8)); + uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8)); + uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8)); + uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8)); + uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8)); + + uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8)); + uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8)); + uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8)); + uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8)); + uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8)); + uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8)); + + float32x4_t fhibx4 = vcvtq_f32_u32(hibx4); + float32x4_t fhigx4 = vcvtq_f32_u32(higx4); + float32x4_t fhirx4 = vcvtq_f32_u32(hirx4); + float32x4_t flibx4 = vcvtq_f32_u32(libx4); + float32x4_t fligx4 = vcvtq_f32_u32(ligx4); + float32x4_t flirx4 = vcvtq_f32_u32(lirx4); + + float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4); + float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4); + float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4); + float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4); + float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4); + float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4); + + // alpha load from little end + float32x4_t lalpx4 = vld1q_f32(alpha_data + i); // low bits + float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4); // high bits + float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4); + float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4); + + // blending + float32x4_t fhvbx4 = + vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4)); + float32x4_t fhvgx4 = + vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4)); + float32x4_t fhvrx4 = + vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4)); + float32x4_t flvbx4 = + vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4)); + float32x4_t flvgx4 = + vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4)); + float32x4_t flvrx4 = + vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4)); + + // f32 -> u32 -> u16 -> u8 + uint8x8x3_t vbgrx8x3; + // combine low 64 bits and high 64 bits into one 128 neon register + vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)), + vmovn_u32(vcvtq_u32_f32(fhvbx4)))); + vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)), + vmovn_u32(vcvtq_u32_f32(fhvgx4)))); + vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)), + vmovn_u32(vcvtq_u32_f32(fhvrx4)))); + vst3_u8(vis_data + i * 3, vbgrx8x3); + } + + for (int i = size - 7; i < size; i++) { + float alp = alpha_data[i]; + for (int c = 0; c < 3; ++c) { + vis_data[i * 3 + 0] = cv::saturate_cast( + static_cast(im_data[i * 3 + c]) * alp + + (1.0f - alp) * static_cast(background_data[i * 3 + c])); + } + } + + return vis_img; +#endif +} + +cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background, + const SegmentationResult &result, + int background_label) { +#ifndef __ARM_NEON + FDASSERT(false, "UltraInfer was not compiled with Arm NEON support!") +#else + FDASSERT((!im.empty()), "Image can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); + FDASSERT((!background.empty()), "Background image can't be empty!"); + FDASSERT((background.channels() == 3), + "Only support 3 channels background image mat!"); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + int bg_height = background.rows; + int bg_width = background.cols; + auto vis_img = cv::Mat(height, width, CV_8UC3); + + cv::Mat background_ref; + if ((bg_height != height) || (bg_width != width)) { + cv::resize(background, background_ref, cv::Size(width, height)); + } else { + background_ref = background; // ref only + } + if ((background_ref).type() != CV_8UC3) { + (background_ref).convertTo((background_ref), CV_8UC3); + } + + uint8_t *vis_data = static_cast(vis_img.data); + const uint8_t *background_data = + static_cast(background_ref.data); + const uint8_t *im_data = static_cast(im.data); + const uint8_t *label_data = + static_cast(result.label_map.data()); + + const uint8_t background_label_ = static_cast(background_label); + const int32_t size = static_cast(height * width); + + uint8x16_t backgroundx16 = vdupq_n_u8(background_label_); +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) + for (int i = 0; i < size - 15; i += 16) { + uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3); // 48 bytes + uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3); + uint8x16_t labelx16 = vld1q_u8(label_data + i); // 16 bytes + // Set mask bit = 1 if label != background_label + uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16); + uint8x16_t keepx16 = vmvnq_u8(nkeepx16); // keep_value = 1 + uint8x16x3_t vbgr16x3; + vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16), + vandq_u8(bbgr16x3.val[0], nkeepx16)); + vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16), + vandq_u8(bbgr16x3.val[1], nkeepx16)); + vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16), + vandq_u8(bbgr16x3.val[2], nkeepx16)); + // Store the blended pixels to vis img + vst3q_u8(vis_data + i * 3, vbgr16x3); + } + + for (int i = size - 15; i < size; i++) { + uint8_t label = label_data[i]; + if (label != background_label_) { + vis_data[i * 3 + 0] = im_data[i * 3 + 0]; + vis_data[i * 3 + 1] = im_data[i * 3 + 1]; + vis_data[i * 3 + 2] = im_data[i * 3 + 2]; + } else { + vis_data[i * 3 + 0] = background_data[i * 3 + 0]; + vis_data[i * 3 + 1] = background_data[i * 3 + 1]; + vis_data[i * 3 + 2] = background_data[i * 3 + 2]; + } + } + + return vis_img; +#endif +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h new file mode 100755 index 0000000000..5f614281c2 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/common/result.h" + +namespace ultrainfer { +namespace vision { + +cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background, + const MattingResult &result, + bool remove_small_connected_area = false); + +cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background, + const SegmentationResult &result, + int background_label); + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc new file mode 100755 index 0000000000..5e1da407c9 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/vision/visualize/visualize.h" + +namespace ultrainfer { +namespace vision { + +static std::vector global_fd_vis_color_map = std::vector(); + +std::vector GenerateColorMap(int num_classes) { + if (num_classes < 10) { + num_classes = 10; + } + std::vector color_map(num_classes * 3, 0); + for (int i = 0; i < num_classes; ++i) { + int j = 0; + int lab = i; + while (lab) { + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)); + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); + ++j; + lab >>= 3; + } + } + return color_map; +} + +// This class will deprecated, please not use it +int Visualize::num_classes_ = 0; +std::vector Visualize::color_map_ = std::vector(); + +const std::vector &Visualize::GetColorMap(int num_classes) { + if (num_classes < num_classes_) { + return color_map_; + } + num_classes_ = num_classes; + std::vector().swap(color_map_); + color_map_.resize(3 * num_classes_, 0); + for (int i = 0; i < num_classes_; ++i) { + int j = 0; + int lab = i; + while (lab) { + color_map_[i * 3] |= (((lab >> 0) & 1) << (7 - j)); + color_map_[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); + color_map_[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); + ++j; + lab >>= 3; + } + } + return color_map_; +} + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/visualize.h b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.h new file mode 100755 index 0000000000..921924279b --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.h @@ -0,0 +1,251 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "opencv2/imgproc/imgproc.hpp" +#include "ultrainfer/vision/common/result.h" +#include "ultrainfer/vision/tracking/pptracking/model.h" + +namespace ultrainfer { +/** \brief All C++ UltraInfer Vision Models APIs are defined inside this + * namespace + * + */ +namespace vision { + +class ULTRAINFER_DECL Visualize { +public: + static int num_classes_; + static std::vector color_map_; + static const std::vector &GetColorMap(int num_classes = 1000); + static cv::Mat VisDetection(const cv::Mat &im, const DetectionResult &result, + float score_threshold = 0.0, int line_size = 1, + float font_size = 0.5f); + static cv::Mat VisPerception(const cv::Mat &im, + const PerceptionResult &result, + const std::string &config_file, + float score_threshold = 0.0, int line_size = 1, + float font_size = 0.5f); + static cv::Mat VisFaceDetection(const cv::Mat &im, + const FaceDetectionResult &result, + int line_size = 1, float font_size = 0.5f); + static cv::Mat VisSegmentation(const cv::Mat &im, + const SegmentationResult &result); + static cv::Mat VisMattingAlpha(const cv::Mat &im, const MattingResult &result, + bool remove_small_connected_area = false); + static cv::Mat RemoveSmallConnectedArea(const cv::Mat &alpha_pred, + float threshold); + static cv::Mat + SwapBackgroundMatting(const cv::Mat &im, const cv::Mat &background, + const MattingResult &result, + bool remove_small_connected_area = false); + static cv::Mat SwapBackgroundSegmentation(const cv::Mat &im, + const cv::Mat &background, + int background_label, + const SegmentationResult &result); + static cv::Mat VisOcr(const cv::Mat &srcimg, const OCRResult &ocr_result); + static cv::Mat VisCURVEOcr(const cv::Mat &srcimg, + const OCRCURVEResult &ocr_result); +}; + +std::vector GenerateColorMap(int num_classes = 1000); +cv::Mat RemoveSmallConnectedArea(const cv::Mat &alpha_pred, float threshold); +/** \brief Show the visualized results for detection models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] score_threshold threshold for result scores, the bounding box will + * not be shown if the score is less than score_threshold \param[in] line_size + * line size for bounding boxes \param[in] font_size font size for text \return + * cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisDetection(const cv::Mat &im, + const DetectionResult &result, + float score_threshold = 0.0, + int line_size = 1, float font_size = 0.5f); +/** \brief Show the visualized results with custom labels for detection models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] labels the visualized result will show the bounding box contain + * class label \param[in] score_threshold threshold for result scores, the + * bounding box will not be shown if the score is less than score_threshold + * \param[in] line_size line size for bounding boxes + * \param[in] font_size font size for text + * \param[in] font_color font color for bounding text + * \param[in] font_thickness font thickness for text + * \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisDetection( + const cv::Mat &im, const DetectionResult &result, + const std::vector &labels, float score_threshold = 0.0, + int line_size = 1, float font_size = 0.5f, + std::vector font_color = {255, 255, 255}, int font_thickness = 1); + +/** \brief Show the visualized results with custom labels for detection models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] labels the visualized result will show the bounding box contain + * class label \param[in] score_threshold threshold for result scores, the + * bounding box will not be shown if the score is less than score_threshold + * \param[in] line_size line size for bounding boxes + * \param[in] font_size font size for text + * \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat +VisPerception(const cv::Mat &im, const PerceptionResult &result, + const std::string &config_file, float score_threshold = 0.0, + int line_size = 1, float font_size = 0.5f); +/** \brief Show the visualized results for classification models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] top_k the length of return values, e.g., if topk==2, the result + * will include the 2 most possible class label for input image. \param[in] + * score_threshold threshold for top_k scores, the class will not be shown if + * the score is less than score_threshold \param[in] font_size font size \return + * cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisClassification(const cv::Mat &im, + const ClassifyResult &result, + int top_k = 5, + float score_threshold = 0.0f, + float font_size = 0.5f); +/** \brief Show the visualized results with custom labels for classification + * models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] labels custom labels for user, the visualized result will show the + * corresponding custom labels \param[in] top_k the length of return values, + * e.g., if topk==2, the result will include the 2 most possible class label for + * input image. \param[in] score_threshold threshold for top_k scores, the class + * will not be shown if the score is less than score_threshold \param[in] + * font_size font size \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat +VisClassification(const cv::Mat &im, const ClassifyResult &result, + const std::vector &labels, int top_k = 5, + float score_threshold = 0.0f, float font_size = 0.5f); +/** \brief Show the visualized results for face detection models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] line_size line size for bounding boxes + * \param[in] font_size font size for text + * \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisFaceDetection(const cv::Mat &im, + const FaceDetectionResult &result, + int line_size = 1, + float font_size = 0.5f); +/** \brief Show the visualized results for face alignment models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] line_size line size for circle point + * \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisFaceAlignment(const cv::Mat &im, + const FaceAlignmentResult &result, + int line_size = 1); +/** \brief Show the visualized results for segmentation models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] weight transparent weight of visualized result image + * \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisSegmentation(const cv::Mat &im, + const SegmentationResult &result, + float weight = 0.5); +/** \brief Show the visualized results for matting models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \param[in] transparent_background if transparent_background==true, the + * background will with transparent color \param[in] transparent_threshold since + * the alpha value in MattringResult is a float between [0, 1], + * transparent_threshold is used to filter background pixel \param[in] + * remove_small_connected_area if remove_small_connected_area==true, the + * visualized result will not include the small connected areas \return cv::Mat + * type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisMatting(const cv::Mat &im, + const MattingResult &result, + bool transparent_background = false, + float transparent_threshold = 0.999, + bool remove_small_connected_area = false); +/** \brief Show the visualized results for Ocr models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] result the result produced by model + * \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result, + const float score_threshold = 0); +ULTRAINFER_DECL cv::Mat VisCURVEOcr(const cv::Mat &im, + const OCRCURVEResult &ocr_result, + const float score_threshold = 0); + +ULTRAINFER_DECL cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results, + float score_threshold = 0.0f, + tracking::TrailRecorder *recorder = nullptr); +/** \brief Swap the image background with MattingResult + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] background the background image data, + * comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result the MattingResult produced by model + * \param[in] remove_small_connected_area if remove_small_connected_area==true, + * the visualized result will not include the small connected areas \return + * cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat +SwapBackground(const cv::Mat &im, const cv::Mat &background, + const MattingResult &result, + bool remove_small_connected_area = false); +/** \brief Swap the image background with SegmentationResult + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] background the background image data, + * comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result the SegmentationResult produced by model + * \param[in] background_label the background label number in SegmentationResult + * \return cv::Mat type stores the visualized results + */ +ULTRAINFER_DECL cv::Mat SwapBackground(const cv::Mat &im, + const cv::Mat &background, + const SegmentationResult &result, + int background_label); + +/** \brief Show the visualized results for key point detection models + * + * \param[in] im the input image data, comes from cv::imread(), is a 3-D array + * with layout HWC, BGR format \param[in] results the result produced by model + * \param[in] conf_threshold threshold for result scores, the result will not be + * shown if the score is less than conf_threshold \return cv::Mat type stores + * the visualized results + */ +ULTRAINFER_DECL cv::Mat +VisKeypointDetection(const cv::Mat &im, const KeyPointDetectionResult &results, + float conf_threshold = 0.5f); +ULTRAINFER_DECL cv::Mat VisHeadPose(const cv::Mat &im, + const HeadPoseResult &result, int size = 50, + int line_size = 1); + +} // namespace vision +} // namespace ultrainfer diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc b/libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc new file mode 100755 index 0000000000..b6ce5131b5 --- /dev/null +++ b/libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc @@ -0,0 +1,256 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultrainfer/pybind/main.h" + +namespace ultrainfer { +void BindVisualize(pybind11::module &m) { + m.def("vis_detection", + [](pybind11::array &im_data, vision::DetectionResult &result, + std::vector &labels, float score_threshold, + int line_size, float font_size, std::vector font_color, + int font_thickness) { + auto im = PyArrayToCvMat(im_data); + cv::Mat vis_im; + if (labels.empty()) { + vis_im = vision::VisDetection(im, result, score_threshold, + line_size, font_size); + } else { + vis_im = vision::VisDetection(im, result, labels, score_threshold, + line_size, font_size, font_color, + font_thickness); + } + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_perception", + [](pybind11::array &im_data, vision::PerceptionResult &result, + const std::string &config_file, float score_threshold, + int line_size, float font_size) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = + vision::VisPerception(im, result, config_file, score_threshold, + line_size, font_size); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_face_detection", + [](pybind11::array &im_data, vision::FaceDetectionResult &result, + int line_size, float font_size) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = + vision::VisFaceDetection(im, result, line_size, font_size); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_face_alignment", + [](pybind11::array &im_data, vision::FaceAlignmentResult &result, + int line_size) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisFaceAlignment(im, result, line_size); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_segmentation", + [](pybind11::array &im_data, vision::SegmentationResult &result, + float weight) { + cv::Mat im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisSegmentation(im, result, weight); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("swap_background", + [](pybind11::array &im_data, pybind11::array &background_data, + vision::MattingResult &result, bool remove_small_connected_area) { + cv::Mat im = PyArrayToCvMat(im_data); + cv::Mat background = PyArrayToCvMat(background_data); + auto vis_im = vision::SwapBackground(im, background, result, + remove_small_connected_area); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("swap_background", + [](pybind11::array &im_data, pybind11::array &background_data, + vision::SegmentationResult &result, int background_label) { + cv::Mat im = PyArrayToCvMat(im_data); + cv::Mat background = PyArrayToCvMat(background_data); + auto vis_im = vision::SwapBackground(im, background, result, + background_label); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_ppocr", + [](pybind11::array &im_data, vision::OCRResult &result) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisOcr(im, result); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_ppocr_curve", + [](pybind11::array &im_data, vision::OCRCURVEResult &result) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisCURVEOcr(im, result); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_mot", + [](pybind11::array &im_data, vision::MOTResult &result, + float score_threshold, vision::tracking::TrailRecorder record) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisMOT(im, result, score_threshold, &record); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_matting", + [](pybind11::array &im_data, vision::MattingResult &result, + bool transparent_background, float transparent_threshold, + bool remove_small_connected_area) { + cv::Mat im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisMatting( + im, result, transparent_background, transparent_threshold, + remove_small_connected_area); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def("vis_headpose", + [](pybind11::array &im_data, vision::HeadPoseResult &result, + int size, int line_size) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisHeadPose(im, result, size, line_size); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }); + + pybind11::class_(m, "Visualize") + .def(pybind11::init<>()) + .def_static("vis_detection", + [](pybind11::array &im_data, vision::DetectionResult &result, + float score_threshold, int line_size, float font_size) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::Visualize::VisDetection( + im, result, score_threshold, line_size, font_size); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static( + "vis_keypoint_detection", + [](pybind11::array &im_data, vision::KeyPointDetectionResult &result, + float conf_threshold) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = + vision::VisKeypointDetection(im, result, conf_threshold); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static("vis_face_detection", + [](pybind11::array &im_data, + vision::FaceDetectionResult &result, int line_size, + float font_size) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::Visualize::VisFaceDetection( + im, result, line_size, font_size); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static( + "vis_segmentation", + [](pybind11::array &im_data, vision::SegmentationResult &result) { + cv::Mat im = PyArrayToCvMat(im_data); + auto vis_im = vision::Visualize::VisSegmentation(im, result); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static("swap_background_matting", + [](pybind11::array &im_data, pybind11::array &background_data, + vision::MattingResult &result, + bool remove_small_connected_area) { + cv::Mat im = PyArrayToCvMat(im_data); + cv::Mat background = PyArrayToCvMat(background_data); + auto vis_im = vision::Visualize::SwapBackgroundMatting( + im, background, result, remove_small_connected_area); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static("swap_background_segmentation", + [](pybind11::array &im_data, pybind11::array &background_data, + int background_label, vision::SegmentationResult &result) { + cv::Mat im = PyArrayToCvMat(im_data); + cv::Mat background = PyArrayToCvMat(background_data); + auto vis_im = vision::Visualize::SwapBackgroundSegmentation( + im, background, background_label, result); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static("remove_small_connected_area", + [](pybind11::array &alpha_pred_data, float threshold) { + cv::Mat alpha_pred = PyArrayToCvMat(alpha_pred_data); + auto vis_im = vision::Visualize::RemoveSmallConnectedArea( + alpha_pred, threshold); + }) + .def_static("vis_ppocr", + [](pybind11::array &im_data, vision::OCRResult &result) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::Visualize::VisOcr(im, result); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static("vis_ppocr_curve", + [](pybind11::array &im_data, vision::OCRCURVEResult &result) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::Visualize::VisCURVEOcr(im, result); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static( + "vis_mot", + [](pybind11::array &im_data, vision::MOTResult &result, + float score_threshold, vision::tracking::TrailRecorder *record) { + auto im = PyArrayToCvMat(im_data); + auto vis_im = vision::VisMOT(im, result, score_threshold, record); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }) + .def_static("vis_matting_alpha", + [](pybind11::array &im_data, vision::MattingResult &result, + bool remove_small_connected_area) { + cv::Mat im = PyArrayToCvMat(im_data); + auto vis_im = vision::Visualize::VisMattingAlpha( + im, result, remove_small_connected_area); + FDTensor out; + vision::Mat(vis_im).ShareWithTensor(&out); + return TensorToPyArray(out); + }); +} +} // namespace ultrainfer